jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame^] | 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
| 4 | |
| 5 | #ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ |
| 6 | #define STORAGE_LEVELDB_DB_DB_IMPL_H_ |
| 7 | |
| 8 | #include <set> |
| 9 | #include "db/dbformat.h" |
| 10 | #include "db/log_writer.h" |
| 11 | #include "db/snapshot.h" |
| 12 | #include "include/db.h" |
| 13 | #include "include/env.h" |
| 14 | #include "port/port.h" |
| 15 | |
| 16 | namespace leveldb { |
| 17 | |
| 18 | class MemTable; |
| 19 | class TableCache; |
| 20 | class Version; |
| 21 | class VersionEdit; |
| 22 | class VersionSet; |
| 23 | |
| 24 | class DBImpl : public DB { |
| 25 | public: |
| 26 | DBImpl(const Options& options, const std::string& dbname); |
| 27 | virtual ~DBImpl(); |
| 28 | |
| 29 | // Implementations of the DB interface |
| 30 | virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value); |
| 31 | virtual Status Delete(const WriteOptions&, const Slice& key); |
| 32 | virtual Status Write(const WriteOptions& options, WriteBatch* updates); |
| 33 | virtual Status Get(const ReadOptions& options, |
| 34 | const Slice& key, |
| 35 | std::string* value); |
| 36 | virtual Iterator* NewIterator(const ReadOptions&); |
| 37 | virtual const Snapshot* GetSnapshot(); |
| 38 | virtual void ReleaseSnapshot(const Snapshot* snapshot); |
| 39 | virtual bool GetProperty(const Slice& property, uint64_t* value); |
| 40 | virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); |
| 41 | |
| 42 | // Extra methods (for testing) that are not in the public DB interface |
| 43 | |
| 44 | // Compact any files in the named level that overlap [begin,end] |
| 45 | void TEST_CompactRange( |
| 46 | int level, |
| 47 | const std::string& begin, |
| 48 | const std::string& end); |
| 49 | |
| 50 | // Force current memtable contents to be compacted. |
| 51 | Status TEST_CompactMemTable(); |
| 52 | |
| 53 | // Return an internal iterator over the current state of the database. |
| 54 | // The keys of this iterator are internal keys (see format.h). |
| 55 | // The returned iterator should be deleted when no longer needed. |
| 56 | Iterator* TEST_NewInternalIterator(); |
| 57 | |
| 58 | private: |
| 59 | friend class DB; |
| 60 | |
| 61 | Iterator* NewInternalIterator(const ReadOptions&, |
| 62 | SequenceNumber* latest_snapshot); |
| 63 | |
| 64 | Status NewDB(); |
| 65 | |
| 66 | // Recover the descriptor from persistent storage. May do a significant |
| 67 | // amount of work to recover recently logged updates. Any changes to |
| 68 | // be made to the descriptor are added to *edit. |
| 69 | Status Recover(VersionEdit* edit); |
| 70 | |
| 71 | // Apply the specified updates and save the resulting descriptor to |
| 72 | // persistent storage. If cleanup_mem is non-NULL, arrange to |
| 73 | // delete it when all existing snapshots have gone away iff Install() |
| 74 | // returns OK. |
| 75 | Status Install(VersionEdit* edit, |
| 76 | uint64_t new_log_number, |
| 77 | MemTable* cleanup_mem); |
| 78 | |
| 79 | void MaybeIgnoreError(Status* s) const; |
| 80 | |
| 81 | // Delete any unneeded files and stale in-memory entries. |
| 82 | void DeleteObsoleteFiles(); |
| 83 | |
| 84 | // Called when an iterator over a particular version of the |
| 85 | // descriptor goes away. |
| 86 | static void Unref(void* arg1, void* arg2); |
| 87 | |
| 88 | // Compact the in-memory write buffer to disk. Switches to a new |
| 89 | // log-file/memtable and writes a new descriptor iff successful. |
| 90 | Status CompactMemTable(); |
| 91 | |
| 92 | Status RecoverLogFile(uint64_t log_number, |
| 93 | VersionEdit* edit, |
| 94 | SequenceNumber* max_sequence); |
| 95 | |
| 96 | Status WriteLevel0Table(MemTable* mem, VersionEdit* edit); |
| 97 | |
| 98 | bool HasLargeValues(const WriteBatch& batch) const; |
| 99 | |
| 100 | // Process data in "*updates" and return a status. "assigned_seq" |
| 101 | // is the sequence number assigned to the first mod in "*updates". |
| 102 | // If no large values are encountered, "*final" is set to "updates". |
| 103 | // If large values were encountered, registers the references of the |
| 104 | // large values with the VersionSet, writes the large values to |
| 105 | // files (if appropriate), and allocates a new WriteBatch with the |
| 106 | // large values replaced with indirect references and stores a |
| 107 | // pointer to the new WriteBatch in *final. If *final != updates on |
| 108 | // return, then the client should delete *final when no longer |
| 109 | // needed. Returns OK on success, and an appropriate error |
| 110 | // otherwise. |
| 111 | Status HandleLargeValues(SequenceNumber assigned_seq, |
| 112 | WriteBatch* updates, |
| 113 | WriteBatch** final); |
| 114 | |
| 115 | // Helper routine for HandleLargeValues |
| 116 | void MaybeCompressLargeValue( |
| 117 | const Slice& raw_value, |
| 118 | Slice* file_bytes, |
| 119 | std::string* scratch, |
| 120 | LargeValueRef* ref); |
| 121 | |
| 122 | struct CompactionState; |
| 123 | |
| 124 | void MaybeScheduleCompaction(); |
| 125 | static void BGWork(void* db); |
| 126 | void BackgroundCall(); |
| 127 | void BackgroundCompaction(); |
| 128 | void CleanupCompaction(CompactionState* compact); |
| 129 | Status DoCompactionWork(CompactionState* compact); |
| 130 | |
| 131 | Status OpenCompactionOutputFile(CompactionState* compact); |
| 132 | Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); |
| 133 | Status InstallCompactionResults(CompactionState* compact); |
| 134 | |
| 135 | // Constant after construction |
| 136 | Env* const env_; |
| 137 | const InternalKeyComparator internal_comparator_; |
| 138 | const Options options_; // options_.comparator == &internal_comparator_ |
| 139 | bool owns_info_log_; |
| 140 | const std::string dbname_; |
| 141 | |
| 142 | // table_cache_ provides its own synchronization |
| 143 | TableCache* table_cache_; |
| 144 | |
| 145 | // Lock over the persistent DB state. Non-NULL iff successfully acquired. |
| 146 | FileLock* db_lock_; |
| 147 | |
| 148 | // State below is protected by mutex_ |
| 149 | port::Mutex mutex_; |
| 150 | port::AtomicPointer shutting_down_; |
| 151 | port::CondVar bg_cv_; // Signalled when !bg_compaction_scheduled_ |
| 152 | port::CondVar compacting_cv_; // Signalled when !compacting_ |
| 153 | SequenceNumber last_sequence_; |
| 154 | MemTable* mem_; |
| 155 | WritableFile* logfile_; |
| 156 | log::Writer* log_; |
| 157 | uint64_t log_number_; |
| 158 | SnapshotList snapshots_; |
| 159 | |
| 160 | // Set of table files to protect from deletion because they are |
| 161 | // part of ongoing compactions. |
| 162 | std::set<uint64_t> pending_outputs_; |
| 163 | |
| 164 | // Has a background compaction been scheduled or is running? |
| 165 | bool bg_compaction_scheduled_; |
| 166 | |
| 167 | // Is there a compaction running? |
| 168 | bool compacting_; |
| 169 | |
| 170 | VersionSet* versions_; |
| 171 | |
| 172 | // Have we encountered a background error in paranoid mode? |
| 173 | Status bg_error_; |
| 174 | |
| 175 | // No copying allowed |
| 176 | DBImpl(const DBImpl&); |
| 177 | void operator=(const DBImpl&); |
| 178 | |
| 179 | const Comparator* user_comparator() const { |
| 180 | return internal_comparator_.user_comparator(); |
| 181 | } |
| 182 | }; |
| 183 | |
| 184 | // Sanitize db options. The caller should delete result.info_log if |
| 185 | // it is not equal to src.info_log. |
| 186 | extern Options SanitizeOptions(const std::string& db, |
| 187 | const InternalKeyComparator* icmp, |
| 188 | const Options& src); |
| 189 | |
| 190 | } |
| 191 | |
| 192 | #endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ |