John Zulauf | 79f0658 | 2021-02-27 18:38:39 -0700 | [diff] [blame^] | 1 | /* Copyright (c) 2020-2021 The Khronos Group Inc. |
| 2 | * Copyright (c) 2020-2021 Valve Corporation |
| 3 | * Copyright (c) 2020-2021 LunarG, Inc. |
| 4 | * Copyright (C) 2020-2021 Google Inc. |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 5 | * |
| 6 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | * you may not use this file except in compliance with the License. |
| 8 | * You may obtain a copy of the License at |
| 9 | * |
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | * |
| 12 | * Unless required by applicable law or agreed to in writing, software |
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | * See the License for the specific language governing permissions and |
| 16 | * limitations under the License. |
| 17 | * |
| 18 | * John Zulauf <jzulauf@lunarg.com> |
| 19 | * |
| 20 | */ |
| 21 | #ifndef SPARSE_CONTAINERS_H_ |
| 22 | #define SPARSE_CONTAINERS_H_ |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 23 | #include <cassert> |
| 24 | #include <memory> |
| 25 | #include <unordered_map> |
| 26 | #include <vector> |
| 27 | |
| 28 | namespace sparse_container { |
| 29 | // SparseVector: |
| 30 | // |
| 31 | // Defines a sparse single-dimensional container which is targeted for three distinct use cases |
| 32 | // 1) Large range of indices sparsely populated ("Sparse access" below) |
| 33 | // 2) Large range of indices where all values are the same ("Sparse access" below) |
| 34 | // 3) Large range of values densely populated (more that 1/4 full) ("Dense access" below) |
| 35 | // 4) Small range of values where direct access is most efficient ("Dense access" below) |
| 36 | // |
| 37 | // To update semantics are supported bases on kSetReplaces: |
| 38 | // true -- updates to already set (valid) indices replace current value |
| 39 | // false -- updates to already set (valid) indicies are ignored. |
| 40 | // |
| 41 | // Theory of operation: |
| 42 | // |
| 43 | // When created, a sparse vector is created (based on size relative to |
| 44 | // the kSparseThreshold) in either Sparse or Dense access mode. |
| 45 | // |
| 46 | // In "Sparse access" mode individual values are stored in a map keyed |
| 47 | // by the index. A "full range" value (if set) defines the value of all |
| 48 | // entries not present in the map. Setting a full range value via |
| 49 | // |
| 50 | // SetRange(range_min, range_max, full_range_value ) |
| 51 | // |
| 52 | // either clears the map (kSetReplaces==true) or prevents further |
| 53 | // updates to the vector (kSetReplaces==false). If the map becomes |
Petr Kraus | 6c4bdce | 2019-08-27 17:35:01 +0200 | [diff] [blame] | 54 | // more than 1/kConversionThreshold (=4) full, the SparseVector is |
| 55 | // converted into "Dense access" mode. Entries are copied from map, |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 56 | // with non-present indices set to the default value (kDefaultValue) |
| 57 | // or the full range value (if present). |
| 58 | // |
| 59 | // In "Dense access" mode, values are stored in a vector the size of |
| 60 | // the valid range indexed by the incoming index value minus range_min_. |
| 61 | // The same upate semantic applies bases on kSetReplaces. |
| 62 | // |
Mark Lobodzinski | 0875f0c | 2019-09-18 14:15:17 -0600 | [diff] [blame] | 63 | // Note that when kSparseThreshold is zero, the map is always in "Dense access" mode. |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 64 | // |
| 65 | // Access: |
| 66 | // |
| 67 | // NOTE all "end" indices (in construction or access) are *exclusive*. |
| 68 | // |
| 69 | // Given the variable semantics and effective compression of Sparse |
| 70 | // access mode, all access is through Get, Set, and SetRange functions |
| 71 | // and a constant iterator. Get return either the value found (using |
Petr Kraus | 6c4bdce | 2019-08-27 17:35:01 +0200 | [diff] [blame] | 72 | // the current access mode) or the kDefaultValue. Set and SetRange |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 73 | // return whether or not state was updated, in order to support dirty |
| 74 | // bit updates for any dependent state. |
| 75 | // |
| 76 | // The iterator ConstIterator provides basic, "by value" access. The |
| 77 | // "by value" nature of the access reflect the compressed nature |
| 78 | // operators *, ++, ==, and != are provided, with the latter two only |
Petr Kraus | 6c4bdce | 2019-08-27 17:35:01 +0200 | [diff] [blame] | 79 | // suitable for comparisons vs. cend. The iterator skips all |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 80 | // kDefaultValue entries in either access mode, returning a std::pair |
| 81 | // containing {IndexType, ValueType}. The multiple access modes give |
| 82 | // the iterator a bit more complexity than is optimal, but hides the |
| 83 | // underlying complexity from the callers. |
| 84 | // |
| 85 | // TODO: Update iterator to use a reference (likely using |
| 86 | // reference_wrapper...) |
| 87 | |
| 88 | template <typename IndexType_, typename T, bool kSetReplaces, T kDefaultValue = T(), size_t kSparseThreshold = 16> |
| 89 | class SparseVector { |
Petr Kraus | 4ed81e3 | 2019-09-02 23:41:19 +0200 | [diff] [blame] | 90 | public: |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 91 | typedef IndexType_ IndexType; |
| 92 | typedef T value_type; |
| 93 | typedef value_type ValueType; |
| 94 | typedef std::unordered_map<IndexType, ValueType> SparseType; |
| 95 | typedef std::vector<ValueType> DenseType; |
| 96 | |
| 97 | SparseVector(IndexType start, IndexType end) |
| 98 | : range_min_(start), range_max_(end), threshold_((end - start) / kConversionThreshold) { |
| 99 | assert(end > start); |
| 100 | Reset(); |
| 101 | } |
| 102 | |
| 103 | // Initial access mode is set based on range size vs. kSparseThreshold. Either sparse_ or dense_ is always set, but only |
| 104 | // ever one at a time |
| 105 | void Reset() { |
| 106 | has_full_range_value_ = false; |
| 107 | full_range_value_ = kDefaultValue; |
| 108 | size_t count = range_max_ - range_min_; |
Petr Kraus | 6c4bdce | 2019-08-27 17:35:01 +0200 | [diff] [blame] | 109 | if (kSparseThreshold > 0 && (count > kSparseThreshold)) { |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 110 | sparse_.reset(new SparseType()); |
| 111 | dense_.reset(); |
| 112 | } else { |
| 113 | sparse_.reset(); |
| 114 | dense_.reset(new DenseType(count, kDefaultValue)); |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | const ValueType &Get(const IndexType index) const { |
| 119 | // Note that here (and similarly below, the 'IsSparse' clause is |
| 120 | // eliminated as dead code in release builds if kSparseThreshold==0 |
| 121 | if (IsSparse()) { |
| 122 | if (!sparse_->empty()) { // Don't attempt lookup in empty map |
| 123 | auto it = sparse_->find(index); |
| 124 | if (it != sparse_->cend()) { |
| 125 | return it->second; |
| 126 | } |
| 127 | } |
| 128 | // If there is a full_range_value, return it, but there isn't a full_range_value_, it's set to kDefaultValue |
| 129 | // so it's still the correct this to return |
| 130 | return full_range_value_; |
| 131 | } else { |
| 132 | // Direct access |
| 133 | assert(dense_.get()); |
| 134 | const ValueType &value = (*dense_)[index - range_min_]; |
| 135 | return value; |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | // Set a indexes value, based on the access mode, update semantics are enforced within the access mode specific function |
| 140 | bool Set(const IndexType index, const ValueType &value) { |
| 141 | bool updated = false; |
| 142 | if (IsSparse()) { |
| 143 | updated = SetSparse(index, value); |
| 144 | } else { |
| 145 | assert(dense_.get()); |
| 146 | updated = SetDense(index, value); |
| 147 | } |
| 148 | return updated; |
| 149 | } |
| 150 | |
| 151 | // Set a range of values based on access mode, with some update semantics applied a the range level |
| 152 | bool SetRange(const IndexType start, IndexType end, ValueType value) { |
| 153 | bool updated = false; |
| 154 | if (IsSparse()) { |
| 155 | if (!kSetReplaces && HasFullRange()) return false; // We have full coverage, we can change this no more |
| 156 | |
| 157 | bool is_full_range = IsFullRange(start, end); |
| 158 | if (kSetReplaces && is_full_range) { |
| 159 | updated = value != full_range_value_; |
| 160 | full_range_value_ = value; |
| 161 | if (HasSparseSubranges()) { |
| 162 | updated = true; |
| 163 | sparse_->clear(); // full range replaces all subranges |
| 164 | } |
| 165 | // has_full_range_value_ state of the full_range_value_ to avoid ValueType comparisons |
| 166 | has_full_range_value_ = value != kDefaultValue; |
| 167 | } else if (!kSetReplaces && (value != kDefaultValue) && is_full_range && !HasFullRange()) { |
| 168 | // With the update only invalid semantics, the value becomes the fallback, and will prevent other updates |
| 169 | full_range_value_ = value; |
| 170 | has_full_range_value_ = true; |
| 171 | updated = true; |
| 172 | // Clean up the sparse map a bit |
| 173 | for (auto it = sparse_->begin(); it != sparse_->end();) { // no increment clause because of erase below |
| 174 | if (it->second == value) { |
| 175 | it = sparse_->erase(it); // remove redundant entries |
| 176 | } else { |
| 177 | ++it; |
| 178 | } |
| 179 | } |
| 180 | } else { |
| 181 | for (IndexType index = start; index < end; ++index) { |
| 182 | // NOTE: We can't use SetSparse here, because this may be converted to dense access mid update |
| 183 | updated |= Set(index, value); |
| 184 | } |
| 185 | } |
| 186 | } else { |
| 187 | // Note that "Dense Access" does away with the full_range_value_ logic, storing empty entries using kDefaultValue |
| 188 | assert(dense_); |
| 189 | for (IndexType index = start; index < end; ++index) { |
| 190 | updated = SetDense(index, value); |
| 191 | } |
| 192 | } |
| 193 | return updated; |
| 194 | } |
| 195 | |
| 196 | // Set only the non-default values from another sparse vector |
| 197 | bool Merge(const SparseVector &from) { |
| 198 | // Must not set from Sparse arracy with larger bounds... |
| 199 | assert((range_min_ <= from.range_min_) && (range_max_ >= from.range_max_)); |
| 200 | bool updated = false; |
| 201 | if (from.IsSparse()) { |
| 202 | if (from.HasFullRange() && !from.HasSparseSubranges()) { |
| 203 | // Short cut to copy a full range if that's all we have |
| 204 | updated |= SetRange(from.range_min_, from.range_max_, from.full_range_value_); |
| 205 | } else { |
| 206 | // Have to do it the complete (potentially) slow way |
| 207 | // TODO add sorted keys to iterator to reduce hash lookups |
| 208 | for (auto it = from.cbegin(); it != from.cend(); ++it) { |
| 209 | const IndexType index = (*it).first; |
| 210 | const ValueType &value = (*it).second; |
| 211 | Set(index, value); |
| 212 | } |
| 213 | } |
| 214 | } else { |
| 215 | assert(from.dense_); |
| 216 | DenseType &ray = *from.dense_; |
| 217 | for (IndexType entry = from.range_min_; entry < from.range_max_; ++entry) { |
| 218 | IndexType index = entry - from.range_min_; |
| 219 | if (ray[index] != kDefaultValue) { |
| 220 | updated |= Set(entry, ray[index]); |
| 221 | } |
| 222 | } |
| 223 | } |
| 224 | return updated; |
| 225 | } |
| 226 | |
| 227 | friend class ConstIterator; |
| 228 | class ConstIterator { |
Petr Kraus | 4ed81e3 | 2019-09-02 23:41:19 +0200 | [diff] [blame] | 229 | public: |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 230 | using SparseType = typename SparseVector::SparseType; |
| 231 | using SparseIterator = typename SparseType::const_iterator; |
| 232 | using IndexType = typename SparseVector::IndexType; |
| 233 | using ValueType = typename SparseVector::ValueType; |
| 234 | using IteratorValueType = std::pair<IndexType, ValueType>; |
| 235 | const IteratorValueType &operator*() const { return current_value_; } |
| 236 | |
| 237 | ConstIterator &operator++() { |
| 238 | if (delegated_) { // implies sparse |
| 239 | ++it_sparse_; |
| 240 | if (it_sparse_ == vec_->sparse_->cend()) { |
| 241 | the_end_ = true; |
| 242 | current_value_.first = vec_->range_max_; |
| 243 | current_value_.second = SparseVector::DefaultValue(); |
| 244 | } else { |
| 245 | current_value_.first = it_sparse_->first; |
| 246 | current_value_.second = it_sparse_->second; |
| 247 | } |
| 248 | } else { |
| 249 | index_++; |
| 250 | SetCurrentValue(); |
| 251 | } |
| 252 | return *this; |
| 253 | } |
| 254 | bool operator!=(const ConstIterator &rhs) const { |
| 255 | return (the_end_ != rhs.the_end_); // Just good enough for cend checks |
| 256 | } |
| 257 | |
| 258 | bool operator==(const ConstIterator &rhs) const { |
| 259 | return (the_end_ == rhs.the_end_); // Just good enough for cend checks |
| 260 | } |
| 261 | |
| 262 | // The iterator has two modes: |
| 263 | // delegated: |
| 264 | // where we are in sparse access mode and have no full_range_value |
| 265 | // and thus can delegate our iteration to underlying map |
| 266 | // non-delegated: |
| 267 | // either dense mode or we have a full range value and thus |
| 268 | // must iterate over the whole range |
| 269 | ConstIterator(const SparseVector &vec) : vec_(&vec) { |
| 270 | if (!vec_->IsSparse() || vec_->HasFullRange()) { |
| 271 | // Must iterated over entire ranges skipping (in the case of dense access), invalid entries |
| 272 | delegated_ = false; |
| 273 | index_ = vec_->range_min_; |
| 274 | SetCurrentValue(); // Skips invalid and sets the_end_ |
| 275 | } else if (vec_->HasSparseSubranges()) { |
| 276 | // The subranges store the non-default values... and their is no full range value |
| 277 | delegated_ = true; |
| 278 | it_sparse_ = vec_->sparse_->cbegin(); |
| 279 | current_value_.first = it_sparse_->first; |
| 280 | current_value_.second = it_sparse_->second; |
| 281 | the_end_ = false; // the sparse map is non-empty (per HasSparseSubranges() above) |
| 282 | } else { |
| 283 | // Sparse, but with no subranges |
| 284 | the_end_ = true; |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | ConstIterator() : vec_(nullptr), the_end_(true) {} |
| 289 | |
Petr Kraus | 4ed81e3 | 2019-09-02 23:41:19 +0200 | [diff] [blame] | 290 | protected: |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 291 | const SparseVector *vec_; |
| 292 | bool the_end_; |
| 293 | SparseIterator it_sparse_; |
| 294 | bool delegated_; |
| 295 | IndexType index_; |
| 296 | ValueType value_; |
| 297 | |
| 298 | IteratorValueType current_value_; |
| 299 | |
| 300 | // in the non-delegated case we use normal accessors and skip default values. |
| 301 | void SetCurrentValue() { |
| 302 | the_end_ = true; |
| 303 | while (index_ < vec_->range_max_) { |
| 304 | value_ = vec_->Get(index_); |
| 305 | if (value_ != SparseVector::DefaultValue()) { |
| 306 | the_end_ = false; |
| 307 | current_value_ = IteratorValueType(index_, value_); |
| 308 | break; |
| 309 | } |
| 310 | index_++; |
| 311 | } |
| 312 | } |
| 313 | }; |
| 314 | typedef ConstIterator const_iterator; |
| 315 | |
| 316 | ConstIterator cbegin() const { return ConstIterator(*this); } |
| 317 | ConstIterator cend() const { return ConstIterator(); } |
| 318 | |
| 319 | IndexType RangeMax() const { return range_max_; } |
| 320 | IndexType RangeMin() const { return range_min_; } |
| 321 | |
| 322 | static const unsigned kConversionThreshold = 4; |
| 323 | const IndexType range_min_; // exclusive |
| 324 | const IndexType range_max_; // exclusive |
| 325 | const IndexType threshold_; // exclusive |
| 326 | |
| 327 | // Data for sparse mode |
| 328 | // We have a short cut for full range values when in sparse mode |
| 329 | bool has_full_range_value_; |
| 330 | ValueType full_range_value_; |
| 331 | std::unique_ptr<SparseType> sparse_; |
| 332 | |
| 333 | // Data for dense mode |
| 334 | std::unique_ptr<DenseType> dense_; |
| 335 | |
| 336 | static const ValueType &DefaultValue() { |
| 337 | static ValueType value = kDefaultValue; |
| 338 | return value; |
| 339 | } |
| 340 | // Note that IsSparse is compile-time reducible if kSparseThreshold is zero... |
Petr Kraus | 6c4bdce | 2019-08-27 17:35:01 +0200 | [diff] [blame] | 341 | inline bool IsSparse() const { return kSparseThreshold > 0 && sparse_.get(); } |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 342 | bool IsFullRange(IndexType start, IndexType end) const { return (start == range_min_) && (end == range_max_); } |
| 343 | bool IsFullRangeValue(const ValueType &value) const { return has_full_range_value_ && (value == full_range_value_); } |
| 344 | bool HasFullRange() const { return IsSparse() && has_full_range_value_; } |
| 345 | bool HasSparseSubranges() const { return IsSparse() && !sparse_->empty(); } |
| 346 | |
| 347 | // This is called unconditionally, to encapsulate the conversion criteria and logic here |
| 348 | void SparseToDenseConversion() { |
| 349 | // If we're using more threshold of the sparse range, convert to dense_ |
| 350 | if (IsSparse() && (sparse_->size() > threshold_)) { |
| 351 | ValueType default_value = HasFullRange() ? full_range_value_ : kDefaultValue; |
| 352 | dense_.reset(new DenseType((range_max_ - range_min_), default_value)); |
| 353 | DenseType &ray = *dense_; |
John Zulauf | 79f0658 | 2021-02-27 18:38:39 -0700 | [diff] [blame^] | 354 | for (const auto &item : *sparse_) { |
John Zulauf | f660ad6 | 2019-03-23 07:16:05 -0600 | [diff] [blame] | 355 | ray[item.first - range_min_] = item.second; |
| 356 | } |
| 357 | sparse_.reset(); |
| 358 | has_full_range_value_ = false; |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | // Dense access mode setter with update semantics implemented |
| 363 | bool SetDense(IndexType index, const ValueType &value) { |
| 364 | bool updated = false; |
| 365 | ValueType ¤t_value = (*dense_)[index - range_min_]; |
| 366 | if ((kSetReplaces || current_value == kDefaultValue) && (value != current_value)) { |
| 367 | current_value = value; |
| 368 | updated = true; |
| 369 | } |
| 370 | return updated; |
| 371 | } |
| 372 | |
| 373 | // Sparse access mode setter with update full range and update semantics implemented |
| 374 | bool SetSparse(IndexType index, const ValueType &value) { |
| 375 | if (!kSetReplaces && HasFullRange()) { |
| 376 | return false; // We have full coverage, we can change this no more |
| 377 | } |
| 378 | |
| 379 | if (kSetReplaces && IsFullRangeValue(value) && HasSparseSubranges()) { |
| 380 | auto erasure = sparse_->erase(index); // Remove duplicate record from map |
| 381 | return erasure > 0; |
| 382 | } |
| 383 | |
| 384 | // Use insert to reduce the number of hash lookups |
| 385 | auto map_pair = std::make_pair(index, value); |
| 386 | auto insert_pair = sparse_->insert(map_pair); |
| 387 | auto &it = insert_pair.first; // use references to avoid nested pair accesses |
| 388 | const bool inserted = insert_pair.second; |
| 389 | bool updated = false; |
| 390 | if (inserted) { |
| 391 | updated = true; |
| 392 | SparseToDenseConversion(); |
| 393 | } else if (kSetReplaces && value != it->second) { |
| 394 | // Only replace value if semantics allow it and it has changed. |
| 395 | it->second = value; |
| 396 | updated = true; |
| 397 | } |
| 398 | return updated; |
| 399 | } |
| 400 | }; |
| 401 | |
| 402 | } // namespace sparse_container |
| 403 | #endif |