blob: 19f6a2aa7cad1ae06f25f100ca5c707cce5d4dce [file] [log] [blame]
John Zulauff660ad62019-03-23 07:16:05 -06001/* Copyright (c) 2019 The Khronos Group Inc.
2 * Copyright (c) 2019 Valve Corporation
3 * Copyright (c) 2019 LunarG, Inc.
4 * Copyright (C) 2019 Google Inc.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * John Zulauf <jzulauf@lunarg.com>
19 *
20 */
21#ifndef SPARSE_CONTAINERS_H_
22#define SPARSE_CONTAINERS_H_
23#define NOMINMAX
24#include <cassert>
25#include <memory>
26#include <unordered_map>
27#include <vector>
28
29namespace sparse_container {
30// SparseVector:
31//
32// Defines a sparse single-dimensional container which is targeted for three distinct use cases
33// 1) Large range of indices sparsely populated ("Sparse access" below)
34// 2) Large range of indices where all values are the same ("Sparse access" below)
35// 3) Large range of values densely populated (more that 1/4 full) ("Dense access" below)
36// 4) Small range of values where direct access is most efficient ("Dense access" below)
37//
38// To update semantics are supported bases on kSetReplaces:
39// true -- updates to already set (valid) indices replace current value
40// false -- updates to already set (valid) indicies are ignored.
41//
42// Theory of operation:
43//
44// When created, a sparse vector is created (based on size relative to
45// the kSparseThreshold) in either Sparse or Dense access mode.
46//
47// In "Sparse access" mode individual values are stored in a map keyed
48// by the index. A "full range" value (if set) defines the value of all
49// entries not present in the map. Setting a full range value via
50//
51// SetRange(range_min, range_max, full_range_value )
52//
53// either clears the map (kSetReplaces==true) or prevents further
54// updates to the vector (kSetReplaces==false). If the map becomes
55// more than // 1/kConversionThreshold (4) full, the SparseVector is
56// converted into "Dense access" mode. Entries are copied from map,
57// with non-present indices set to the default value (kDefaultValue)
58// or the full range value (if present).
59//
60// In "Dense access" mode, values are stored in a vector the size of
61// the valid range indexed by the incoming index value minus range_min_.
62// The same upate semantic applies bases on kSetReplaces.
63//
64// Note that when kSparseThreshold
65//
66// Access:
67//
68// NOTE all "end" indices (in construction or access) are *exclusive*.
69//
70// Given the variable semantics and effective compression of Sparse
71// access mode, all access is through Get, Set, and SetRange functions
72// and a constant iterator. Get return either the value found (using
73// the current access mode) or the kDefaultValue. Set and SetRange
74// return whether or not state was updated, in order to support dirty
75// bit updates for any dependent state.
76//
77// The iterator ConstIterator provides basic, "by value" access. The
78// "by value" nature of the access reflect the compressed nature
79// operators *, ++, ==, and != are provided, with the latter two only
80// suitable for comparisons vs. cend. The iterator skips all
81// kDefaultValue entries in either access mode, returning a std::pair
82// containing {IndexType, ValueType}. The multiple access modes give
83// the iterator a bit more complexity than is optimal, but hides the
84// underlying complexity from the callers.
85//
86// TODO: Update iterator to use a reference (likely using
87// reference_wrapper...)
88
89template <typename IndexType_, typename T, bool kSetReplaces, T kDefaultValue = T(), size_t kSparseThreshold = 16>
90class SparseVector {
91 public:
92 typedef IndexType_ IndexType;
93 typedef T value_type;
94 typedef value_type ValueType;
95 typedef std::unordered_map<IndexType, ValueType> SparseType;
96 typedef std::vector<ValueType> DenseType;
97
98 SparseVector(IndexType start, IndexType end)
99 : range_min_(start), range_max_(end), threshold_((end - start) / kConversionThreshold) {
100 assert(end > start);
101 Reset();
102 }
103
104 // Initial access mode is set based on range size vs. kSparseThreshold. Either sparse_ or dense_ is always set, but only
105 // ever one at a time
106 void Reset() {
107 has_full_range_value_ = false;
108 full_range_value_ = kDefaultValue;
109 size_t count = range_max_ - range_min_;
110 if (kSparseThreshold && (count > kSparseThreshold)) {
111 sparse_.reset(new SparseType());
112 dense_.reset();
113 } else {
114 sparse_.reset();
115 dense_.reset(new DenseType(count, kDefaultValue));
116 }
117 }
118
119 const ValueType &Get(const IndexType index) const {
120 // Note that here (and similarly below, the 'IsSparse' clause is
121 // eliminated as dead code in release builds if kSparseThreshold==0
122 if (IsSparse()) {
123 if (!sparse_->empty()) { // Don't attempt lookup in empty map
124 auto it = sparse_->find(index);
125 if (it != sparse_->cend()) {
126 return it->second;
127 }
128 }
129 // If there is a full_range_value, return it, but there isn't a full_range_value_, it's set to kDefaultValue
130 // so it's still the correct this to return
131 return full_range_value_;
132 } else {
133 // Direct access
134 assert(dense_.get());
135 const ValueType &value = (*dense_)[index - range_min_];
136 return value;
137 }
138 }
139
140 // Set a indexes value, based on the access mode, update semantics are enforced within the access mode specific function
141 bool Set(const IndexType index, const ValueType &value) {
142 bool updated = false;
143 if (IsSparse()) {
144 updated = SetSparse(index, value);
145 } else {
146 assert(dense_.get());
147 updated = SetDense(index, value);
148 }
149 return updated;
150 }
151
152 // Set a range of values based on access mode, with some update semantics applied a the range level
153 bool SetRange(const IndexType start, IndexType end, ValueType value) {
154 bool updated = false;
155 if (IsSparse()) {
156 if (!kSetReplaces && HasFullRange()) return false; // We have full coverage, we can change this no more
157
158 bool is_full_range = IsFullRange(start, end);
159 if (kSetReplaces && is_full_range) {
160 updated = value != full_range_value_;
161 full_range_value_ = value;
162 if (HasSparseSubranges()) {
163 updated = true;
164 sparse_->clear(); // full range replaces all subranges
165 }
166 // has_full_range_value_ state of the full_range_value_ to avoid ValueType comparisons
167 has_full_range_value_ = value != kDefaultValue;
168 } else if (!kSetReplaces && (value != kDefaultValue) && is_full_range && !HasFullRange()) {
169 // With the update only invalid semantics, the value becomes the fallback, and will prevent other updates
170 full_range_value_ = value;
171 has_full_range_value_ = true;
172 updated = true;
173 // Clean up the sparse map a bit
174 for (auto it = sparse_->begin(); it != sparse_->end();) { // no increment clause because of erase below
175 if (it->second == value) {
176 it = sparse_->erase(it); // remove redundant entries
177 } else {
178 ++it;
179 }
180 }
181 } else {
182 for (IndexType index = start; index < end; ++index) {
183 // NOTE: We can't use SetSparse here, because this may be converted to dense access mid update
184 updated |= Set(index, value);
185 }
186 }
187 } else {
188 // Note that "Dense Access" does away with the full_range_value_ logic, storing empty entries using kDefaultValue
189 assert(dense_);
190 for (IndexType index = start; index < end; ++index) {
191 updated = SetDense(index, value);
192 }
193 }
194 return updated;
195 }
196
197 // Set only the non-default values from another sparse vector
198 bool Merge(const SparseVector &from) {
199 // Must not set from Sparse arracy with larger bounds...
200 assert((range_min_ <= from.range_min_) && (range_max_ >= from.range_max_));
201 bool updated = false;
202 if (from.IsSparse()) {
203 if (from.HasFullRange() && !from.HasSparseSubranges()) {
204 // Short cut to copy a full range if that's all we have
205 updated |= SetRange(from.range_min_, from.range_max_, from.full_range_value_);
206 } else {
207 // Have to do it the complete (potentially) slow way
208 // TODO add sorted keys to iterator to reduce hash lookups
209 for (auto it = from.cbegin(); it != from.cend(); ++it) {
210 const IndexType index = (*it).first;
211 const ValueType &value = (*it).second;
212 Set(index, value);
213 }
214 }
215 } else {
216 assert(from.dense_);
217 DenseType &ray = *from.dense_;
218 for (IndexType entry = from.range_min_; entry < from.range_max_; ++entry) {
219 IndexType index = entry - from.range_min_;
220 if (ray[index] != kDefaultValue) {
221 updated |= Set(entry, ray[index]);
222 }
223 }
224 }
225 return updated;
226 }
227
228 friend class ConstIterator;
229 class ConstIterator {
230 public:
231 using SparseType = typename SparseVector::SparseType;
232 using SparseIterator = typename SparseType::const_iterator;
233 using IndexType = typename SparseVector::IndexType;
234 using ValueType = typename SparseVector::ValueType;
235 using IteratorValueType = std::pair<IndexType, ValueType>;
236 const IteratorValueType &operator*() const { return current_value_; }
237
238 ConstIterator &operator++() {
239 if (delegated_) { // implies sparse
240 ++it_sparse_;
241 if (it_sparse_ == vec_->sparse_->cend()) {
242 the_end_ = true;
243 current_value_.first = vec_->range_max_;
244 current_value_.second = SparseVector::DefaultValue();
245 } else {
246 current_value_.first = it_sparse_->first;
247 current_value_.second = it_sparse_->second;
248 }
249 } else {
250 index_++;
251 SetCurrentValue();
252 }
253 return *this;
254 }
255 bool operator!=(const ConstIterator &rhs) const {
256 return (the_end_ != rhs.the_end_); // Just good enough for cend checks
257 }
258
259 bool operator==(const ConstIterator &rhs) const {
260 return (the_end_ == rhs.the_end_); // Just good enough for cend checks
261 }
262
263 // The iterator has two modes:
264 // delegated:
265 // where we are in sparse access mode and have no full_range_value
266 // and thus can delegate our iteration to underlying map
267 // non-delegated:
268 // either dense mode or we have a full range value and thus
269 // must iterate over the whole range
270 ConstIterator(const SparseVector &vec) : vec_(&vec) {
271 if (!vec_->IsSparse() || vec_->HasFullRange()) {
272 // Must iterated over entire ranges skipping (in the case of dense access), invalid entries
273 delegated_ = false;
274 index_ = vec_->range_min_;
275 SetCurrentValue(); // Skips invalid and sets the_end_
276 } else if (vec_->HasSparseSubranges()) {
277 // The subranges store the non-default values... and their is no full range value
278 delegated_ = true;
279 it_sparse_ = vec_->sparse_->cbegin();
280 current_value_.first = it_sparse_->first;
281 current_value_.second = it_sparse_->second;
282 the_end_ = false; // the sparse map is non-empty (per HasSparseSubranges() above)
283 } else {
284 // Sparse, but with no subranges
285 the_end_ = true;
286 }
287 }
288
289 ConstIterator() : vec_(nullptr), the_end_(true) {}
290
291 protected:
292 const SparseVector *vec_;
293 bool the_end_;
294 SparseIterator it_sparse_;
295 bool delegated_;
296 IndexType index_;
297 ValueType value_;
298
299 IteratorValueType current_value_;
300
301 // in the non-delegated case we use normal accessors and skip default values.
302 void SetCurrentValue() {
303 the_end_ = true;
304 while (index_ < vec_->range_max_) {
305 value_ = vec_->Get(index_);
306 if (value_ != SparseVector::DefaultValue()) {
307 the_end_ = false;
308 current_value_ = IteratorValueType(index_, value_);
309 break;
310 }
311 index_++;
312 }
313 }
314 };
315 typedef ConstIterator const_iterator;
316
317 ConstIterator cbegin() const { return ConstIterator(*this); }
318 ConstIterator cend() const { return ConstIterator(); }
319
320 IndexType RangeMax() const { return range_max_; }
321 IndexType RangeMin() const { return range_min_; }
322
323 static const unsigned kConversionThreshold = 4;
324 const IndexType range_min_; // exclusive
325 const IndexType range_max_; // exclusive
326 const IndexType threshold_; // exclusive
327
328 // Data for sparse mode
329 // We have a short cut for full range values when in sparse mode
330 bool has_full_range_value_;
331 ValueType full_range_value_;
332 std::unique_ptr<SparseType> sparse_;
333
334 // Data for dense mode
335 std::unique_ptr<DenseType> dense_;
336
337 static const ValueType &DefaultValue() {
338 static ValueType value = kDefaultValue;
339 return value;
340 }
341 // Note that IsSparse is compile-time reducible if kSparseThreshold is zero...
342 inline bool IsSparse() const { return kSparseThreshold && sparse_.get(); }
343 bool IsFullRange(IndexType start, IndexType end) const { return (start == range_min_) && (end == range_max_); }
344 bool IsFullRangeValue(const ValueType &value) const { return has_full_range_value_ && (value == full_range_value_); }
345 bool HasFullRange() const { return IsSparse() && has_full_range_value_; }
346 bool HasSparseSubranges() const { return IsSparse() && !sparse_->empty(); }
347
348 // This is called unconditionally, to encapsulate the conversion criteria and logic here
349 void SparseToDenseConversion() {
350 // If we're using more threshold of the sparse range, convert to dense_
351 if (IsSparse() && (sparse_->size() > threshold_)) {
352 ValueType default_value = HasFullRange() ? full_range_value_ : kDefaultValue;
353 dense_.reset(new DenseType((range_max_ - range_min_), default_value));
354 DenseType &ray = *dense_;
355 for (auto const &item : *sparse_) {
356 ray[item.first - range_min_] = item.second;
357 }
358 sparse_.reset();
359 has_full_range_value_ = false;
360 }
361 }
362
363 // Dense access mode setter with update semantics implemented
364 bool SetDense(IndexType index, const ValueType &value) {
365 bool updated = false;
366 ValueType &current_value = (*dense_)[index - range_min_];
367 if ((kSetReplaces || current_value == kDefaultValue) && (value != current_value)) {
368 current_value = value;
369 updated = true;
370 }
371 return updated;
372 }
373
374 // Sparse access mode setter with update full range and update semantics implemented
375 bool SetSparse(IndexType index, const ValueType &value) {
376 if (!kSetReplaces && HasFullRange()) {
377 return false; // We have full coverage, we can change this no more
378 }
379
380 if (kSetReplaces && IsFullRangeValue(value) && HasSparseSubranges()) {
381 auto erasure = sparse_->erase(index); // Remove duplicate record from map
382 return erasure > 0;
383 }
384
385 // Use insert to reduce the number of hash lookups
386 auto map_pair = std::make_pair(index, value);
387 auto insert_pair = sparse_->insert(map_pair);
388 auto &it = insert_pair.first; // use references to avoid nested pair accesses
389 const bool inserted = insert_pair.second;
390 bool updated = false;
391 if (inserted) {
392 updated = true;
393 SparseToDenseConversion();
394 } else if (kSetReplaces && value != it->second) {
395 // Only replace value if semantics allow it and it has changed.
396 it->second = value;
397 updated = true;
398 }
399 return updated;
400 }
401};
402
403} // namespace sparse_container
404#endif