blob: fc3d3f2295a2ea6a5e01adafc81f5581900d072c [file] [log] [blame]
jorlow@chromium.orgf67e15e2011-03-18 22:37:00 +00001// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5#ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_
6#define STORAGE_LEVELDB_DB_DB_IMPL_H_
7
8#include <set>
9#include "db/dbformat.h"
10#include "db/log_writer.h"
11#include "db/snapshot.h"
12#include "include/db.h"
13#include "include/env.h"
14#include "port/port.h"
15
16namespace leveldb {
17
18class MemTable;
19class TableCache;
20class Version;
21class VersionEdit;
22class VersionSet;
23
24class DBImpl : public DB {
25 public:
26 DBImpl(const Options& options, const std::string& dbname);
27 virtual ~DBImpl();
28
29 // Implementations of the DB interface
30 virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value);
31 virtual Status Delete(const WriteOptions&, const Slice& key);
32 virtual Status Write(const WriteOptions& options, WriteBatch* updates);
33 virtual Status Get(const ReadOptions& options,
34 const Slice& key,
35 std::string* value);
36 virtual Iterator* NewIterator(const ReadOptions&);
37 virtual const Snapshot* GetSnapshot();
38 virtual void ReleaseSnapshot(const Snapshot* snapshot);
39 virtual bool GetProperty(const Slice& property, uint64_t* value);
40 virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
41
42 // Extra methods (for testing) that are not in the public DB interface
43
44 // Compact any files in the named level that overlap [begin,end]
45 void TEST_CompactRange(
46 int level,
47 const std::string& begin,
48 const std::string& end);
49
50 // Force current memtable contents to be compacted.
51 Status TEST_CompactMemTable();
52
53 // Return an internal iterator over the current state of the database.
54 // The keys of this iterator are internal keys (see format.h).
55 // The returned iterator should be deleted when no longer needed.
56 Iterator* TEST_NewInternalIterator();
57
58 private:
59 friend class DB;
60
61 Iterator* NewInternalIterator(const ReadOptions&,
62 SequenceNumber* latest_snapshot);
63
64 Status NewDB();
65
66 // Recover the descriptor from persistent storage. May do a significant
67 // amount of work to recover recently logged updates. Any changes to
68 // be made to the descriptor are added to *edit.
69 Status Recover(VersionEdit* edit);
70
71 // Apply the specified updates and save the resulting descriptor to
72 // persistent storage. If cleanup_mem is non-NULL, arrange to
73 // delete it when all existing snapshots have gone away iff Install()
74 // returns OK.
75 Status Install(VersionEdit* edit,
76 uint64_t new_log_number,
77 MemTable* cleanup_mem);
78
79 void MaybeIgnoreError(Status* s) const;
80
81 // Delete any unneeded files and stale in-memory entries.
82 void DeleteObsoleteFiles();
83
84 // Called when an iterator over a particular version of the
85 // descriptor goes away.
86 static void Unref(void* arg1, void* arg2);
87
88 // Compact the in-memory write buffer to disk. Switches to a new
89 // log-file/memtable and writes a new descriptor iff successful.
90 Status CompactMemTable();
91
92 Status RecoverLogFile(uint64_t log_number,
93 VersionEdit* edit,
94 SequenceNumber* max_sequence);
95
96 Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
97
98 bool HasLargeValues(const WriteBatch& batch) const;
99
100 // Process data in "*updates" and return a status. "assigned_seq"
101 // is the sequence number assigned to the first mod in "*updates".
102 // If no large values are encountered, "*final" is set to "updates".
103 // If large values were encountered, registers the references of the
104 // large values with the VersionSet, writes the large values to
105 // files (if appropriate), and allocates a new WriteBatch with the
106 // large values replaced with indirect references and stores a
107 // pointer to the new WriteBatch in *final. If *final != updates on
108 // return, then the client should delete *final when no longer
109 // needed. Returns OK on success, and an appropriate error
110 // otherwise.
111 Status HandleLargeValues(SequenceNumber assigned_seq,
112 WriteBatch* updates,
113 WriteBatch** final);
114
115 // Helper routine for HandleLargeValues
116 void MaybeCompressLargeValue(
117 const Slice& raw_value,
118 Slice* file_bytes,
119 std::string* scratch,
120 LargeValueRef* ref);
121
122 struct CompactionState;
123
124 void MaybeScheduleCompaction();
125 static void BGWork(void* db);
126 void BackgroundCall();
127 void BackgroundCompaction();
128 void CleanupCompaction(CompactionState* compact);
129 Status DoCompactionWork(CompactionState* compact);
130
131 Status OpenCompactionOutputFile(CompactionState* compact);
132 Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);
133 Status InstallCompactionResults(CompactionState* compact);
134
135 // Constant after construction
136 Env* const env_;
137 const InternalKeyComparator internal_comparator_;
138 const Options options_; // options_.comparator == &internal_comparator_
139 bool owns_info_log_;
140 const std::string dbname_;
141
142 // table_cache_ provides its own synchronization
143 TableCache* table_cache_;
144
145 // Lock over the persistent DB state. Non-NULL iff successfully acquired.
146 FileLock* db_lock_;
147
148 // State below is protected by mutex_
149 port::Mutex mutex_;
150 port::AtomicPointer shutting_down_;
151 port::CondVar bg_cv_; // Signalled when !bg_compaction_scheduled_
152 port::CondVar compacting_cv_; // Signalled when !compacting_
153 SequenceNumber last_sequence_;
154 MemTable* mem_;
155 WritableFile* logfile_;
156 log::Writer* log_;
157 uint64_t log_number_;
158 SnapshotList snapshots_;
159
160 // Set of table files to protect from deletion because they are
161 // part of ongoing compactions.
162 std::set<uint64_t> pending_outputs_;
163
164 // Has a background compaction been scheduled or is running?
165 bool bg_compaction_scheduled_;
166
167 // Is there a compaction running?
168 bool compacting_;
169
170 VersionSet* versions_;
171
172 // Have we encountered a background error in paranoid mode?
173 Status bg_error_;
174
175 // No copying allowed
176 DBImpl(const DBImpl&);
177 void operator=(const DBImpl&);
178
179 const Comparator* user_comparator() const {
180 return internal_comparator_.user_comparator();
181 }
182};
183
184// Sanitize db options. The caller should delete result.info_log if
185// it is not equal to src.info_log.
186extern Options SanitizeOptions(const std::string& db,
187 const InternalKeyComparator* icmp,
188 const Options& src);
189
190}
191
192#endif // STORAGE_LEVELDB_DB_DB_IMPL_H_