jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
| 4 | |
| 5 | #ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ |
| 6 | #define STORAGE_LEVELDB_DB_DB_IMPL_H_ |
| 7 | |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 8 | #include <atomic> |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 9 | #include <deque> |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 10 | #include <set> |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 11 | #include <string> |
| 12 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 13 | #include "db/dbformat.h" |
| 14 | #include "db/log_writer.h" |
| 15 | #include "db/snapshot.h" |
jorlow@chromium.org | 4671a69 | 2011-03-30 18:35:40 +0000 | [diff] [blame] | 16 | #include "leveldb/db.h" |
| 17 | #include "leveldb/env.h" |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 18 | #include "port/port.h" |
David Grogan | 946e5b5 | 2012-10-12 11:53:12 -0700 | [diff] [blame] | 19 | #include "port/thread_annotations.h" |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 20 | |
| 21 | namespace leveldb { |
| 22 | |
| 23 | class MemTable; |
| 24 | class TableCache; |
| 25 | class Version; |
| 26 | class VersionEdit; |
| 27 | class VersionSet; |
| 28 | |
| 29 | class DBImpl : public DB { |
| 30 | public: |
| 31 | DBImpl(const Options& options, const std::string& dbname); |
| 32 | virtual ~DBImpl(); |
| 33 | |
| 34 | // Implementations of the DB interface |
| 35 | virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value); |
| 36 | virtual Status Delete(const WriteOptions&, const Slice& key); |
| 37 | virtual Status Write(const WriteOptions& options, WriteBatch* updates); |
| 38 | virtual Status Get(const ReadOptions& options, |
| 39 | const Slice& key, |
| 40 | std::string* value); |
| 41 | virtual Iterator* NewIterator(const ReadOptions&); |
| 42 | virtual const Snapshot* GetSnapshot(); |
| 43 | virtual void ReleaseSnapshot(const Snapshot* snapshot); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 44 | virtual bool GetProperty(const Slice& property, std::string* value); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 45 | virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 46 | virtual void CompactRange(const Slice* begin, const Slice* end); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 47 | |
| 48 | // Extra methods (for testing) that are not in the public DB interface |
| 49 | |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 50 | // Compact any files in the named level that overlap [*begin,*end] |
| 51 | void TEST_CompactRange(int level, const Slice* begin, const Slice* end); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 52 | |
| 53 | // Force current memtable contents to be compacted. |
| 54 | Status TEST_CompactMemTable(); |
| 55 | |
| 56 | // Return an internal iterator over the current state of the database. |
| 57 | // The keys of this iterator are internal keys (see format.h). |
| 58 | // The returned iterator should be deleted when no longer needed. |
| 59 | Iterator* TEST_NewInternalIterator(); |
| 60 | |
jorlow@chromium.org | 13b72af | 2011-03-22 18:32:49 +0000 | [diff] [blame] | 61 | // Return the maximum overlapping data (in bytes) at next level for any |
| 62 | // file at a level >= 1. |
jorlow@chromium.org | 8303bb1 | 2011-03-22 23:24:02 +0000 | [diff] [blame] | 63 | int64_t TEST_MaxNextLevelOverlappingBytes(); |
jorlow@chromium.org | 13b72af | 2011-03-22 18:32:49 +0000 | [diff] [blame] | 64 | |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 65 | // Record a sample of bytes read at the specified internal key. |
| 66 | // Samples are taken approximately once every config::kReadBytesPeriod |
| 67 | // bytes. |
| 68 | void RecordReadSample(Slice key); |
| 69 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 70 | private: |
| 71 | friend class DB; |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 72 | struct CompactionState; |
| 73 | struct Writer; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 74 | |
| 75 | Iterator* NewInternalIterator(const ReadOptions&, |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 76 | SequenceNumber* latest_snapshot, |
| 77 | uint32_t* seed); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 78 | |
| 79 | Status NewDB(); |
| 80 | |
| 81 | // Recover the descriptor from persistent storage. May do a significant |
| 82 | // amount of work to recover recently logged updates. Any changes to |
| 83 | // be made to the descriptor are added to *edit. |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 84 | Status Recover(VersionEdit* edit, bool* save_manifest) |
| 85 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 86 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 87 | void MaybeIgnoreError(Status* s) const; |
| 88 | |
| 89 | // Delete any unneeded files and stale in-memory entries. |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 90 | void DeleteObsoleteFiles() EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 91 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 92 | // Compact the in-memory write buffer to disk. Switches to a new |
| 93 | // log-file/memtable and writes a new descriptor iff successful. |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 94 | // Errors are recorded in bg_error_. |
| 95 | void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 96 | |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 97 | Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest, |
| 98 | VersionEdit* edit, SequenceNumber* max_sequence) |
David Grogan | 946e5b5 | 2012-10-12 11:53:12 -0700 | [diff] [blame] | 99 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 100 | |
David Grogan | 946e5b5 | 2012-10-12 11:53:12 -0700 | [diff] [blame] | 101 | Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) |
| 102 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 103 | |
David Grogan | 946e5b5 | 2012-10-12 11:53:12 -0700 | [diff] [blame] | 104 | Status MakeRoomForWrite(bool force /* compact even if there is room? */) |
| 105 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 106 | WriteBatch* BuildBatchGroup(Writer** last_writer) |
| 107 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 108 | |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 109 | void RecordBackgroundError(const Status& s); |
| 110 | |
David Grogan | 946e5b5 | 2012-10-12 11:53:12 -0700 | [diff] [blame] | 111 | void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 112 | static void BGWork(void* db); |
| 113 | void BackgroundCall(); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 114 | void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
David Grogan | 946e5b5 | 2012-10-12 11:53:12 -0700 | [diff] [blame] | 115 | void CleanupCompaction(CompactionState* compact) |
| 116 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
| 117 | Status DoCompactionWork(CompactionState* compact) |
| 118 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 119 | |
| 120 | Status OpenCompactionOutputFile(CompactionState* compact); |
| 121 | Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); |
David Grogan | 946e5b5 | 2012-10-12 11:53:12 -0700 | [diff] [blame] | 122 | Status InstallCompactionResults(CompactionState* compact) |
| 123 | EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 124 | |
| 125 | // Constant after construction |
| 126 | Env* const env_; |
| 127 | const InternalKeyComparator internal_comparator_; |
Sanjay Ghemawat | 85584d4 | 2012-04-17 08:36:46 -0700 | [diff] [blame] | 128 | const InternalFilterPolicy internal_filter_policy_; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 129 | const Options options_; // options_.comparator == &internal_comparator_ |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 130 | const bool owns_info_log_; |
| 131 | const bool owns_cache_; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 132 | const std::string dbname_; |
| 133 | |
| 134 | // table_cache_ provides its own synchronization |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 135 | TableCache* const table_cache_; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 136 | |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 137 | // Lock over the persistent DB state. Non-null iff successfully acquired. |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 138 | FileLock* db_lock_; |
| 139 | |
| 140 | // State below is protected by mutex_ |
| 141 | port::Mutex mutex_; |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 142 | std::atomic<bool> shutting_down_; |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 143 | port::CondVar background_work_finished_signal_ GUARDED_BY(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 144 | MemTable* mem_; |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 145 | MemTable* imm_ GUARDED_BY(mutex_); // Memtable being compacted |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 146 | std::atomic<bool> has_imm_; // So bg thread can detect non-null imm_ |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 147 | WritableFile* logfile_; |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 148 | uint64_t logfile_number_ GUARDED_BY(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 149 | log::Writer* log_; |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 150 | uint32_t seed_ GUARDED_BY(mutex_); // For sampling. |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 151 | |
| 152 | // Queue of writers. |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 153 | std::deque<Writer*> writers_ GUARDED_BY(mutex_); |
| 154 | WriteBatch* tmp_batch_ GUARDED_BY(mutex_); |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 155 | |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 156 | SnapshotList snapshots_ GUARDED_BY(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 157 | |
| 158 | // Set of table files to protect from deletion because they are |
| 159 | // part of ongoing compactions. |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 160 | std::set<uint64_t> pending_outputs_ GUARDED_BY(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 161 | |
| 162 | // Has a background compaction been scheduled or is running? |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 163 | bool background_compaction_scheduled_ GUARDED_BY(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 164 | |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 165 | // Information for a manual compaction |
| 166 | struct ManualCompaction { |
| 167 | int level; |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 168 | bool done; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 169 | const InternalKey* begin; // null means beginning of key range |
| 170 | const InternalKey* end; // null means end of key range |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 171 | InternalKey tmp_storage; // Used to keep track of compaction progress |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 172 | }; |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 173 | ManualCompaction* manual_compaction_ GUARDED_BY(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 174 | |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 175 | VersionSet* const versions_; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 176 | |
| 177 | // Have we encountered a background error in paranoid mode? |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 178 | Status bg_error_ GUARDED_BY(mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 179 | |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 180 | // Per level compaction stats. stats_[level] stores the stats for |
| 181 | // compactions that produced data for the specified "level". |
| 182 | struct CompactionStats { |
| 183 | int64_t micros; |
| 184 | int64_t bytes_read; |
| 185 | int64_t bytes_written; |
| 186 | |
| 187 | CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { } |
| 188 | |
| 189 | void Add(const CompactionStats& c) { |
| 190 | this->micros += c.micros; |
| 191 | this->bytes_read += c.bytes_read; |
| 192 | this->bytes_written += c.bytes_written; |
| 193 | } |
| 194 | }; |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 195 | CompactionStats stats_[config::kNumLevels] GUARDED_BY(mutex_); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 196 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 197 | // No copying allowed |
| 198 | DBImpl(const DBImpl&); |
| 199 | void operator=(const DBImpl&); |
| 200 | |
| 201 | const Comparator* user_comparator() const { |
| 202 | return internal_comparator_.user_comparator(); |
| 203 | } |
| 204 | }; |
| 205 | |
| 206 | // Sanitize db options. The caller should delete result.info_log if |
| 207 | // it is not equal to src.info_log. |
costan | aece206 | 2018-03-12 09:14:44 -0700 | [diff] [blame] | 208 | Options SanitizeOptions(const std::string& db, |
| 209 | const InternalKeyComparator* icmp, |
| 210 | const InternalFilterPolicy* ipolicy, |
| 211 | const Options& src); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 212 | |
Hans Wennborg | 36a5f8e | 2011-10-31 17:22:06 +0000 | [diff] [blame] | 213 | } // namespace leveldb |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 214 | |
| 215 | #endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ |