blob: e37751300df584100ca4f37ebd93e05874c4f615 [file] [log] [blame]
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +00001// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4//
5// The representation of a DBImpl consists of a set of Versions. The
6// newest version is called "current". Older versions may be kept
7// around to provide a consistent view to live iterators.
8//
9// Each Version keeps track of a set of Table files per level. The
10// entire set of versions is maintained in a VersionSet.
11//
12// Version,VersionSet are thread-compatible, but require external
13// synchronization on all accesses.
14
15#ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_
16#define STORAGE_LEVELDB_DB_VERSION_SET_H_
17
18#include <map>
19#include <set>
20#include <vector>
21#include "db/dbformat.h"
22#include "db/version_edit.h"
23#include "port/port.h"
24
25namespace leveldb {
26
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +000027namespace log { class Writer; }
28
29class Compaction;
30class Iterator;
31class MemTable;
32class TableBuilder;
33class TableCache;
34class Version;
35class VersionSet;
36class WritableFile;
37
38class Version {
39 public:
40 // Append to *iters a sequence of iterators that will
41 // yield the contents of this Version when merged together.
42 // REQUIRES: This version has been saved (see VersionSet::SaveTo)
43 void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
44
45 // Reference count management (so Versions do not disappear out from
46 // under live iterators)
47 void Ref();
48 void Unref();
49
50 // Return a human readable string that describes this version's contents.
51 std::string DebugString() const;
52
53 private:
54 friend class Compaction;
55 friend class VersionSet;
56
57 class LevelFileNumIterator;
58 Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
59
60 VersionSet* vset_; // VersionSet to which this Version belongs
61 Version* next_; // Next version in linked list
62 int refs_; // Number of live refs to this version
63 MemTable* cleanup_mem_; // NULL, or table to delete when version dropped
64
65 // List of files per level
66 std::vector<FileMetaData*> files_[config::kNumLevels];
67
68 // Level that should be compacted next and its compaction score.
69 // Score < 1 means compaction is not strictly needed. These fields
70 // are initialized by Finalize().
71 double compaction_score_;
72 int compaction_level_;
73
74 explicit Version(VersionSet* vset)
75 : vset_(vset), next_(NULL), refs_(0),
76 cleanup_mem_(NULL),
77 compaction_score_(-1),
78 compaction_level_(-1) {
79 }
80
81 ~Version();
82
83 // No copying allowed
84 Version(const Version&);
85 void operator=(const Version&);
86};
87
88class VersionSet {
89 public:
90 VersionSet(const std::string& dbname,
91 const Options* options,
92 TableCache* table_cache,
93 const InternalKeyComparator*);
94 ~VersionSet();
95
96 // Apply *edit to the current version to form a new descriptor that
97 // is both saved to persistent state and installed as the new
98 // current version. Iff Apply() returns OK, arrange to delete
99 // cleanup_mem (if cleanup_mem != NULL) when it is no longer needed
100 // by older versions.
101 Status LogAndApply(VersionEdit* edit, MemTable* cleanup_mem);
102
103 // Recover the last saved descriptor from persistent storage.
dgrogan@chromium.orgf779e7a2011-04-12 19:38:58 +0000104 Status Recover();
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000105
106 // Save current contents to *log
107 Status WriteSnapshot(log::Writer* log);
108
109 // Return the current version.
110 Version* current() const { return current_; }
111
112 // Return the current manifest file number
113 uint64_t ManifestFileNumber() const { return manifest_file_number_; }
114
115 // Allocate and return a new file number
116 uint64_t NewFileNumber() { return next_file_number_++; }
117
118 // Return the number of Table files at the specified level.
119 int NumLevelFiles(int level) const;
120
dgrogan@chromium.orgf779e7a2011-04-12 19:38:58 +0000121 // Return the combined file size of all files at the specified level.
122 int64_t NumLevelBytes(int level) const;
123
124 // Return the last sequence number.
125 uint64_t LastSequence() const { return last_sequence_; }
126
127 // Set the last sequence number to s.
128 void SetLastSequence(uint64_t s) {
129 assert(s >= last_sequence_);
130 last_sequence_ = s;
131 }
132
133 // Return the current log file number.
134 uint64_t LogNumber() const { return log_number_; }
135
136 // Return the log file number for the log file that is currently
137 // being compacted, or zero if there is no such log file.
138 uint64_t PrevLogNumber() const { return prev_log_number_; }
139
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000140 // Pick level and inputs for a new compaction.
141 // Returns NULL if there is no compaction to be done.
142 // Otherwise returns a pointer to a heap-allocated object that
143 // describes the compaction. Caller should delete the result.
144 Compaction* PickCompaction();
145
146 // Return a compaction object for compacting the range [begin,end] in
147 // the specified level. Returns NULL if there is nothing in that
148 // level that overlaps the specified range. Caller should delete
149 // the result.
150 Compaction* CompactRange(
151 int level,
152 const InternalKey& begin,
153 const InternalKey& end);
154
jorlow@chromium.org13b72af2011-03-22 18:32:49 +0000155 // Return the maximum overlapping data (in bytes) at next level for any
156 // file at a level >= 1.
jorlow@chromium.org8303bb12011-03-22 23:24:02 +0000157 int64_t MaxNextLevelOverlappingBytes();
jorlow@chromium.org13b72af2011-03-22 18:32:49 +0000158
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000159 // Create an iterator that reads over the compaction inputs for "*c".
160 // The caller should delete the iterator when no longer needed.
161 Iterator* MakeInputIterator(Compaction* c);
162
163 // Returns true iff some level needs a compaction.
164 bool NeedsCompaction() const { return current_->compaction_score_ >= 1; }
165
166 // Add all files listed in any live version to *live.
167 // May also mutate some internal state.
168 void AddLiveFiles(std::set<uint64_t>* live);
169
170 // Return the approximate offset in the database of the data for
171 // "key" as of version "v".
172 uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
173
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000174 private:
175 class Builder;
176
177 friend class Compaction;
178 friend class Version;
179
180 Status Finalize(Version* v);
181
182 // Delete any old versions that are no longer needed.
183 void MaybeDeleteOldVersions();
184
185 struct BySmallestKey;
186 Status SortLevel(Version* v, uint64_t level);
187
188 void GetOverlappingInputs(
189 int level,
190 const InternalKey& begin,
191 const InternalKey& end,
192 std::vector<FileMetaData*>* inputs);
193
194 void GetRange(const std::vector<FileMetaData*>& inputs,
195 InternalKey* smallest,
196 InternalKey* largest);
197
jorlow@chromium.org13b72af2011-03-22 18:32:49 +0000198 void GetRange2(const std::vector<FileMetaData*>& inputs1,
199 const std::vector<FileMetaData*>& inputs2,
200 InternalKey* smallest,
201 InternalKey* largest);
202
203 void SetupOtherInputs(Compaction* c);
204
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000205 Env* const env_;
206 const std::string dbname_;
207 const Options* const options_;
208 TableCache* const table_cache_;
209 const InternalKeyComparator icmp_;
210 uint64_t next_file_number_;
211 uint64_t manifest_file_number_;
dgrogan@chromium.orgf779e7a2011-04-12 19:38:58 +0000212 uint64_t last_sequence_;
213 uint64_t log_number_;
214 uint64_t prev_log_number_; // 0 or backing store for memtable being compacted
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000215
216 // Opened lazily
217 WritableFile* descriptor_file_;
218 log::Writer* descriptor_log_;
219
220 // Versions are kept in a singly linked list that is never empty
221 Version* current_; // Pointer to the last (newest) list entry
222 Version* oldest_; // Pointer to the first (oldest) list entry
223
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000224 // Per-level key at which the next compaction at that level should start.
225 // Either an empty string, or a valid InternalKey.
226 std::string compact_pointer_[config::kNumLevels];
227
228 // No copying allowed
229 VersionSet(const VersionSet&);
230 void operator=(const VersionSet&);
231};
232
233// A Compaction encapsulates information about a compaction.
234class Compaction {
235 public:
236 ~Compaction();
237
238 // Return the level that is being compacted. Inputs from "level"
239 // and "level+1" will be merged to produce a set of "level+1" files.
240 int level() const { return level_; }
241
242 // Return the object that holds the edits to the descriptor done
243 // by this compaction.
244 VersionEdit* edit() { return &edit_; }
245
246 // "which" must be either 0 or 1
247 int num_input_files(int which) const { return inputs_[which].size(); }
248
249 // Return the ith input file at "level()+which" ("which" must be 0 or 1).
250 FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
251
252 // Maximum size of files to build during this compaction.
253 uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
254
jorlow@chromium.org13b72af2011-03-22 18:32:49 +0000255 // Is this a trivial compaction that can be implemented by just
256 // moving a single input file to the next level (no merging or splitting)
257 bool IsTrivialMove() const;
258
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000259 // Add all inputs to this compaction as delete operations to *edit.
260 void AddInputDeletions(VersionEdit* edit);
261
262 // Returns true if the information we have available guarantees that
263 // the compaction is producing data in "level+1" for which no data exists
264 // in levels greater than "level+1".
265 bool IsBaseLevelForKey(const Slice& user_key);
266
jorlow@chromium.org13b72af2011-03-22 18:32:49 +0000267 // Returns true iff we should stop building the current output
268 // before processing "key".
269 bool ShouldStopBefore(const InternalKey& key);
270
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000271 // Release the input version for the compaction, once the compaction
272 // is successful.
273 void ReleaseInputs();
274
275 private:
276 friend class Version;
277 friend class VersionSet;
278
279 explicit Compaction(int level);
280
281 int level_;
282 uint64_t max_output_file_size_;
283 Version* input_version_;
284 VersionEdit edit_;
285
286 // Each compaction reads inputs from "level_" and "level_+1"
287 std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
288
jorlow@chromium.org13b72af2011-03-22 18:32:49 +0000289 // State used to check for number of of overlapping grandparent files
290 // (parent == level_ + 1, grandparent == level_ + 2)
291 std::vector<FileMetaData*> grandparents_;
dgrogan@chromium.orgba6dac02011-04-20 22:48:11 +0000292 size_t grandparent_index_; // Index in grandparent_starts_
jorlow@chromium.org8303bb12011-03-22 23:24:02 +0000293 bool seen_key_; // Some output key has been seen
294 int64_t overlapped_bytes_; // Bytes of overlap between current output
295 // and grandparent files
jorlow@chromium.org13b72af2011-03-22 18:32:49 +0000296
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000297 // State for implementing IsBaseLevelForKey
298
299 // level_ptrs_ holds indices into input_version_->levels_: our state
300 // is that we are positioned at one of the file ranges for each
301 // higher level than the ones involved in this compaction (i.e. for
302 // all L >= level_ + 2).
dgrogan@chromium.orgba6dac02011-04-20 22:48:11 +0000303 size_t level_ptrs_[config::kNumLevels];
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +0000304};
305
306}
307
308#endif // STORAGE_LEVELDB_DB_VERSION_SET_H_