jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
| 4 | |
| 5 | #include "db/db_impl.h" |
| 6 | |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 7 | #include <stdint.h> |
| 8 | #include <stdio.h> |
| 9 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 10 | #include <algorithm> |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 11 | #include <atomic> |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 12 | #include <set> |
| 13 | #include <string> |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 14 | #include <vector> |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 15 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 16 | #include "db/builder.h" |
| 17 | #include "db/db_iter.h" |
| 18 | #include "db/dbformat.h" |
| 19 | #include "db/filename.h" |
| 20 | #include "db/log_reader.h" |
| 21 | #include "db/log_writer.h" |
| 22 | #include "db/memtable.h" |
| 23 | #include "db/table_cache.h" |
| 24 | #include "db/version_set.h" |
| 25 | #include "db/write_batch_internal.h" |
jorlow@chromium.org | 4671a69 | 2011-03-30 18:35:40 +0000 | [diff] [blame] | 26 | #include "leveldb/db.h" |
| 27 | #include "leveldb/env.h" |
| 28 | #include "leveldb/status.h" |
| 29 | #include "leveldb/table.h" |
| 30 | #include "leveldb/table_builder.h" |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 31 | #include "port/port.h" |
| 32 | #include "table/block.h" |
| 33 | #include "table/merger.h" |
| 34 | #include "table/two_level_iterator.h" |
| 35 | #include "util/coding.h" |
| 36 | #include "util/logging.h" |
| 37 | #include "util/mutexlock.h" |
| 38 | |
| 39 | namespace leveldb { |
| 40 | |
David Grogan | 7b094f1 | 2013-06-13 16:14:06 -0700 | [diff] [blame] | 41 | const int kNumNonTableCacheFiles = 10; |
| 42 | |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 43 | // Information kept for every waiting writer |
| 44 | struct DBImpl::Writer { |
| 45 | Status status; |
| 46 | WriteBatch* batch; |
| 47 | bool sync; |
| 48 | bool done; |
| 49 | port::CondVar cv; |
| 50 | |
| 51 | explicit Writer(port::Mutex* mu) : cv(mu) { } |
| 52 | }; |
| 53 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 54 | struct DBImpl::CompactionState { |
| 55 | Compaction* const compaction; |
| 56 | |
| 57 | // Sequence numbers < smallest_snapshot are not significant since we |
| 58 | // will never have to service a snapshot below smallest_snapshot. |
| 59 | // Therefore if we have seen a sequence number S <= smallest_snapshot, |
| 60 | // we can drop all entries for the same key with sequence numbers < S. |
| 61 | SequenceNumber smallest_snapshot; |
| 62 | |
| 63 | // Files produced by compaction |
| 64 | struct Output { |
| 65 | uint64_t number; |
| 66 | uint64_t file_size; |
| 67 | InternalKey smallest, largest; |
| 68 | }; |
| 69 | std::vector<Output> outputs; |
| 70 | |
| 71 | // State kept for output being generated |
| 72 | WritableFile* outfile; |
| 73 | TableBuilder* builder; |
| 74 | |
| 75 | uint64_t total_bytes; |
| 76 | |
| 77 | Output* current_output() { return &outputs[outputs.size()-1]; } |
| 78 | |
| 79 | explicit CompactionState(Compaction* c) |
| 80 | : compaction(c), |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 81 | outfile(nullptr), |
| 82 | builder(nullptr), |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 83 | total_bytes(0) { |
| 84 | } |
| 85 | }; |
| 86 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 87 | // Fix user-supplied options to be reasonable |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 88 | template <class T, class V> |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 89 | static void ClipToRange(T* ptr, V minvalue, V maxvalue) { |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 90 | if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue; |
| 91 | if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 92 | } |
| 93 | Options SanitizeOptions(const std::string& dbname, |
| 94 | const InternalKeyComparator* icmp, |
Sanjay Ghemawat | 85584d4 | 2012-04-17 08:36:46 -0700 | [diff] [blame] | 95 | const InternalFilterPolicy* ipolicy, |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 96 | const Options& src) { |
| 97 | Options result = src; |
| 98 | result.comparator = icmp; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 99 | result.filter_policy = (src.filter_policy != nullptr) ? ipolicy : nullptr; |
David Grogan | 7b094f1 | 2013-06-13 16:14:06 -0700 | [diff] [blame] | 100 | ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000); |
| 101 | ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); |
corrado | a2fb086 | 2016-09-27 04:50:38 -0700 | [diff] [blame] | 102 | ClipToRange(&result.max_file_size, 1<<20, 1<<30); |
David Grogan | 7b094f1 | 2013-06-13 16:14:06 -0700 | [diff] [blame] | 103 | ClipToRange(&result.block_size, 1<<10, 4<<20); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 104 | if (result.info_log == nullptr) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 105 | // Open a log file in the same directory as the db |
| 106 | src.env->CreateDir(dbname); // In case it does not exist |
| 107 | src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname)); |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 108 | Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 109 | if (!s.ok()) { |
| 110 | // No place suitable for logging |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 111 | result.info_log = nullptr; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 112 | } |
| 113 | } |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 114 | if (result.block_cache == nullptr) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 115 | result.block_cache = NewLRUCache(8 << 20); |
| 116 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 117 | return result; |
| 118 | } |
| 119 | |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 120 | static int TableCacheSize(const Options& sanitized_options) { |
| 121 | // Reserve ten files or so for other uses and give the rest to TableCache. |
| 122 | return sanitized_options.max_open_files - kNumNonTableCacheFiles; |
| 123 | } |
| 124 | |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 125 | DBImpl::DBImpl(const Options& raw_options, const std::string& dbname) |
| 126 | : env_(raw_options.env), |
| 127 | internal_comparator_(raw_options.comparator), |
| 128 | internal_filter_policy_(raw_options.filter_policy), |
| 129 | options_(SanitizeOptions(dbname, &internal_comparator_, |
| 130 | &internal_filter_policy_, raw_options)), |
| 131 | owns_info_log_(options_.info_log != raw_options.info_log), |
| 132 | owns_cache_(options_.block_cache != raw_options.block_cache), |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 133 | dbname_(dbname), |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 134 | table_cache_(new TableCache(dbname_, options_, TableCacheSize(options_))), |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 135 | db_lock_(nullptr), |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 136 | shutting_down_(false), |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 137 | background_work_finished_signal_(&mutex_), |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 138 | mem_(nullptr), |
| 139 | imm_(nullptr), |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 140 | has_imm_(false), |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 141 | logfile_(nullptr), |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 142 | logfile_number_(0), |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 143 | log_(nullptr), |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 144 | seed_(0), |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 145 | tmp_batch_(new WriteBatch), |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 146 | background_compaction_scheduled_(false), |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 147 | manual_compaction_(nullptr), |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 148 | versions_(new VersionSet(dbname_, &options_, table_cache_, |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 149 | &internal_comparator_)) {} |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 150 | |
| 151 | DBImpl::~DBImpl() { |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 152 | // Wait for background work to finish. |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 153 | mutex_.Lock(); |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 154 | shutting_down_.store(true, std::memory_order_release); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 155 | while (background_compaction_scheduled_) { |
| 156 | background_work_finished_signal_.Wait(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 157 | } |
| 158 | mutex_.Unlock(); |
| 159 | |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 160 | if (db_lock_ != nullptr) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 161 | env_->UnlockFile(db_lock_); |
| 162 | } |
| 163 | |
| 164 | delete versions_; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 165 | if (mem_ != nullptr) mem_->Unref(); |
| 166 | if (imm_ != nullptr) imm_->Unref(); |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 167 | delete tmp_batch_; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 168 | delete log_; |
| 169 | delete logfile_; |
| 170 | delete table_cache_; |
| 171 | |
| 172 | if (owns_info_log_) { |
| 173 | delete options_.info_log; |
| 174 | } |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 175 | if (owns_cache_) { |
| 176 | delete options_.block_cache; |
| 177 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 178 | } |
| 179 | |
| 180 | Status DBImpl::NewDB() { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 181 | VersionEdit new_db; |
| 182 | new_db.SetComparatorName(user_comparator()->Name()); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 183 | new_db.SetLogNumber(0); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 184 | new_db.SetNextFile(2); |
| 185 | new_db.SetLastSequence(0); |
| 186 | |
| 187 | const std::string manifest = DescriptorFileName(dbname_, 1); |
| 188 | WritableFile* file; |
| 189 | Status s = env_->NewWritableFile(manifest, &file); |
| 190 | if (!s.ok()) { |
| 191 | return s; |
| 192 | } |
| 193 | { |
| 194 | log::Writer log(file); |
| 195 | std::string record; |
| 196 | new_db.EncodeTo(&record); |
| 197 | s = log.AddRecord(record); |
| 198 | if (s.ok()) { |
| 199 | s = file->Close(); |
| 200 | } |
| 201 | } |
| 202 | delete file; |
| 203 | if (s.ok()) { |
| 204 | // Make "CURRENT" file that points to the new manifest file. |
| 205 | s = SetCurrentFile(env_, dbname_, 1); |
| 206 | } else { |
| 207 | env_->DeleteFile(manifest); |
| 208 | } |
| 209 | return s; |
| 210 | } |
| 211 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 212 | void DBImpl::MaybeIgnoreError(Status* s) const { |
| 213 | if (s->ok() || options_.paranoid_checks) { |
| 214 | // No change needed |
| 215 | } else { |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 216 | Log(options_.info_log, "Ignoring error %s", s->ToString().c_str()); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 217 | *s = Status::OK(); |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | void DBImpl::DeleteObsoleteFiles() { |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 222 | mutex_.AssertHeld(); |
| 223 | |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 224 | if (!bg_error_.ok()) { |
| 225 | // After a background error, we don't know whether a new version may |
| 226 | // or may not have been committed, so we cannot safely garbage collect. |
| 227 | return; |
| 228 | } |
| 229 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 230 | // Make a set of all of the live files |
| 231 | std::set<uint64_t> live = pending_outputs_; |
| 232 | versions_->AddLiveFiles(&live); |
| 233 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 234 | std::vector<std::string> filenames; |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 235 | env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 236 | uint64_t number; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 237 | FileType type; |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 238 | for (size_t i = 0; i < filenames.size(); i++) { |
| 239 | if (ParseFileName(filenames[i], &number, &type)) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 240 | bool keep = true; |
| 241 | switch (type) { |
| 242 | case kLogFile: |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 243 | keep = ((number >= versions_->LogNumber()) || |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 244 | (number == versions_->PrevLogNumber())); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 245 | break; |
| 246 | case kDescriptorFile: |
| 247 | // Keep my manifest file, and any newer incarnations' |
| 248 | // (in case there is a race that allows other incarnations) |
| 249 | keep = (number >= versions_->ManifestFileNumber()); |
| 250 | break; |
| 251 | case kTableFile: |
| 252 | keep = (live.find(number) != live.end()); |
| 253 | break; |
| 254 | case kTempFile: |
| 255 | // Any temp files that are currently being written to must |
| 256 | // be recorded in pending_outputs_, which is inserted into "live" |
| 257 | keep = (live.find(number) != live.end()); |
| 258 | break; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 259 | case kCurrentFile: |
| 260 | case kDBLockFile: |
| 261 | case kInfoLogFile: |
| 262 | keep = true; |
| 263 | break; |
| 264 | } |
| 265 | |
| 266 | if (!keep) { |
| 267 | if (type == kTableFile) { |
| 268 | table_cache_->Evict(number); |
| 269 | } |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 270 | Log(options_.info_log, "Delete type=%d #%lld\n", |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 271 | static_cast<int>(type), |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 272 | static_cast<unsigned long long>(number)); |
| 273 | env_->DeleteFile(dbname_ + "/" + filenames[i]); |
| 274 | } |
| 275 | } |
| 276 | } |
| 277 | } |
| 278 | |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 279 | Status DBImpl::Recover(VersionEdit* edit, bool *save_manifest) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 280 | mutex_.AssertHeld(); |
| 281 | |
| 282 | // Ignore error from CreateDir since the creation of the DB is |
| 283 | // committed only when the descriptor is created, and this directory |
| 284 | // may already exist from a previous failed creation attempt. |
| 285 | env_->CreateDir(dbname_); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 286 | assert(db_lock_ == nullptr); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 287 | Status s = env_->LockFile(LockFileName(dbname_), &db_lock_); |
| 288 | if (!s.ok()) { |
| 289 | return s; |
| 290 | } |
| 291 | |
| 292 | if (!env_->FileExists(CurrentFileName(dbname_))) { |
| 293 | if (options_.create_if_missing) { |
| 294 | s = NewDB(); |
| 295 | if (!s.ok()) { |
| 296 | return s; |
| 297 | } |
| 298 | } else { |
| 299 | return Status::InvalidArgument( |
| 300 | dbname_, "does not exist (create_if_missing is false)"); |
| 301 | } |
| 302 | } else { |
| 303 | if (options_.error_if_exists) { |
| 304 | return Status::InvalidArgument( |
| 305 | dbname_, "exists (error_if_exists is true)"); |
| 306 | } |
| 307 | } |
| 308 | |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 309 | s = versions_->Recover(save_manifest); |
| 310 | if (!s.ok()) { |
| 311 | return s; |
| 312 | } |
| 313 | SequenceNumber max_sequence(0); |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 314 | |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 315 | // Recover from all newer log files than the ones named in the |
| 316 | // descriptor (new log files may have been added by the previous |
| 317 | // incarnation without registering them in the descriptor). |
| 318 | // |
| 319 | // Note that PrevLogNumber() is no longer used, but we pay |
| 320 | // attention to it in case we are recovering a database |
| 321 | // produced by an older version of leveldb. |
| 322 | const uint64_t min_log = versions_->LogNumber(); |
| 323 | const uint64_t prev_log = versions_->PrevLogNumber(); |
| 324 | std::vector<std::string> filenames; |
| 325 | s = env_->GetChildren(dbname_, &filenames); |
| 326 | if (!s.ok()) { |
| 327 | return s; |
| 328 | } |
| 329 | std::set<uint64_t> expected; |
| 330 | versions_->AddLiveFiles(&expected); |
| 331 | uint64_t number; |
| 332 | FileType type; |
| 333 | std::vector<uint64_t> logs; |
| 334 | for (size_t i = 0; i < filenames.size(); i++) { |
| 335 | if (ParseFileName(filenames[i], &number, &type)) { |
| 336 | expected.erase(number); |
| 337 | if (type == kLogFile && ((number >= min_log) || (number == prev_log))) |
| 338 | logs.push_back(number); |
| 339 | } |
| 340 | } |
| 341 | if (!expected.empty()) { |
| 342 | char buf[50]; |
| 343 | snprintf(buf, sizeof(buf), "%d missing files; e.g.", |
| 344 | static_cast<int>(expected.size())); |
| 345 | return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin()))); |
| 346 | } |
| 347 | |
| 348 | // Recover in the order in which the logs were generated |
| 349 | std::sort(logs.begin(), logs.end()); |
| 350 | for (size_t i = 0; i < logs.size(); i++) { |
| 351 | s = RecoverLogFile(logs[i], (i == logs.size() - 1), save_manifest, edit, |
| 352 | &max_sequence); |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 353 | if (!s.ok()) { |
| 354 | return s; |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 355 | } |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 356 | |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 357 | // The previous incarnation may not have written any MANIFEST |
| 358 | // records after allocating this log number. So we manually |
| 359 | // update the file number allocation counter in VersionSet. |
| 360 | versions_->MarkFileNumberUsed(logs[i]); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 361 | } |
| 362 | |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 363 | if (versions_->LastSequence() < max_sequence) { |
| 364 | versions_->SetLastSequence(max_sequence); |
| 365 | } |
| 366 | |
| 367 | return Status::OK(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 368 | } |
| 369 | |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 370 | Status DBImpl::RecoverLogFile(uint64_t log_number, bool last_log, |
| 371 | bool* save_manifest, VersionEdit* edit, |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 372 | SequenceNumber* max_sequence) { |
| 373 | struct LogReporter : public log::Reader::Reporter { |
| 374 | Env* env; |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 375 | Logger* info_log; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 376 | const char* fname; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 377 | Status* status; // null if options_.paranoid_checks==false |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 378 | virtual void Corruption(size_t bytes, const Status& s) { |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 379 | Log(info_log, "%s%s: dropping %d bytes; %s", |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 380 | (this->status == nullptr ? "(ignoring error) " : ""), |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 381 | fname, static_cast<int>(bytes), s.ToString().c_str()); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 382 | if (this->status != nullptr && this->status->ok()) *this->status = s; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 383 | } |
| 384 | }; |
| 385 | |
| 386 | mutex_.AssertHeld(); |
| 387 | |
| 388 | // Open the log file |
| 389 | std::string fname = LogFileName(dbname_, log_number); |
| 390 | SequentialFile* file; |
| 391 | Status status = env_->NewSequentialFile(fname, &file); |
| 392 | if (!status.ok()) { |
| 393 | MaybeIgnoreError(&status); |
| 394 | return status; |
| 395 | } |
| 396 | |
| 397 | // Create the log reader. |
| 398 | LogReporter reporter; |
| 399 | reporter.env = env_; |
| 400 | reporter.info_log = options_.info_log; |
| 401 | reporter.fname = fname.c_str(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 402 | reporter.status = (options_.paranoid_checks ? &status : nullptr); |
Chris Mumford | 803d692 | 2014-09-16 14:19:52 -0700 | [diff] [blame] | 403 | // We intentionally make log::Reader do checksumming even if |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 404 | // paranoid_checks==false so that corruptions cause entire commits |
| 405 | // to be skipped instead of propagating bad information (like overly |
| 406 | // large sequence numbers). |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 407 | log::Reader reader(file, &reporter, true/*checksum*/, |
| 408 | 0/*initial_offset*/); |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 409 | Log(options_.info_log, "Recovering log #%llu", |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 410 | (unsigned long long) log_number); |
| 411 | |
| 412 | // Read all the records and add to a memtable |
| 413 | std::string scratch; |
| 414 | Slice record; |
| 415 | WriteBatch batch; |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 416 | int compactions = 0; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 417 | MemTable* mem = nullptr; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 418 | while (reader.ReadRecord(&record, &scratch) && |
| 419 | status.ok()) { |
| 420 | if (record.size() < 12) { |
| 421 | reporter.Corruption( |
| 422 | record.size(), Status::Corruption("log record too small")); |
| 423 | continue; |
| 424 | } |
| 425 | WriteBatchInternal::SetContents(&batch, record); |
| 426 | |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 427 | if (mem == nullptr) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 428 | mem = new MemTable(internal_comparator_); |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 429 | mem->Ref(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 430 | } |
| 431 | status = WriteBatchInternal::InsertInto(&batch, mem); |
| 432 | MaybeIgnoreError(&status); |
| 433 | if (!status.ok()) { |
| 434 | break; |
| 435 | } |
| 436 | const SequenceNumber last_seq = |
| 437 | WriteBatchInternal::Sequence(&batch) + |
| 438 | WriteBatchInternal::Count(&batch) - 1; |
| 439 | if (last_seq > *max_sequence) { |
| 440 | *max_sequence = last_seq; |
| 441 | } |
| 442 | |
| 443 | if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) { |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 444 | compactions++; |
| 445 | *save_manifest = true; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 446 | status = WriteLevel0Table(mem, edit, nullptr); |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 447 | mem->Unref(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 448 | mem = nullptr; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 449 | if (!status.ok()) { |
| 450 | // Reflect errors immediately so that conditions like full |
| 451 | // file-systems cause the DB::Open() to fail. |
| 452 | break; |
| 453 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 454 | } |
| 455 | } |
| 456 | |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 457 | delete file; |
| 458 | |
| 459 | // See if we should keep reusing the last log file. |
| 460 | if (status.ok() && options_.reuse_logs && last_log && compactions == 0) { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 461 | assert(logfile_ == nullptr); |
| 462 | assert(log_ == nullptr); |
| 463 | assert(mem_ == nullptr); |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 464 | uint64_t lfile_size; |
| 465 | if (env_->GetFileSize(fname, &lfile_size).ok() && |
| 466 | env_->NewAppendableFile(fname, &logfile_).ok()) { |
| 467 | Log(options_.info_log, "Reusing old log %s \n", fname.c_str()); |
| 468 | log_ = new log::Writer(logfile_, lfile_size); |
| 469 | logfile_number_ = log_number; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 470 | if (mem != nullptr) { |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 471 | mem_ = mem; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 472 | mem = nullptr; |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 473 | } else { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 474 | // mem can be nullptr if lognum exists but was empty. |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 475 | mem_ = new MemTable(internal_comparator_); |
| 476 | mem_->Ref(); |
| 477 | } |
| 478 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 479 | } |
| 480 | |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 481 | if (mem != nullptr) { |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 482 | // mem did not get reused; compact it. |
| 483 | if (status.ok()) { |
| 484 | *save_manifest = true; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 485 | status = WriteLevel0Table(mem, edit, nullptr); |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 486 | } |
| 487 | mem->Unref(); |
| 488 | } |
| 489 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 490 | return status; |
| 491 | } |
| 492 | |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 493 | Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, |
| 494 | Version* base) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 495 | mutex_.AssertHeld(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 496 | const uint64_t start_micros = env_->NowMicros(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 497 | FileMetaData meta; |
| 498 | meta.number = versions_->NewFileNumber(); |
| 499 | pending_outputs_.insert(meta.number); |
| 500 | Iterator* iter = mem->NewIterator(); |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 501 | Log(options_.info_log, "Level-0 table #%llu: started", |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 502 | (unsigned long long) meta.number); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 503 | |
| 504 | Status s; |
| 505 | { |
| 506 | mutex_.Unlock(); |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 507 | s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 508 | mutex_.Lock(); |
| 509 | } |
| 510 | |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 511 | Log(options_.info_log, "Level-0 table #%llu: %lld bytes %s", |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 512 | (unsigned long long) meta.number, |
| 513 | (unsigned long long) meta.file_size, |
| 514 | s.ToString().c_str()); |
| 515 | delete iter; |
| 516 | pending_outputs_.erase(meta.number); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 517 | |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 518 | |
| 519 | // Note that if file_size is zero, the file has been deleted and |
| 520 | // should not be added to the manifest. |
| 521 | int level = 0; |
| 522 | if (s.ok() && meta.file_size > 0) { |
gabor@google.com | 6699c7e | 2011-07-15 00:20:57 +0000 | [diff] [blame] | 523 | const Slice min_user_key = meta.smallest.user_key(); |
| 524 | const Slice max_user_key = meta.largest.user_key(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 525 | if (base != nullptr) { |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 526 | level = base->PickLevelForMemTableOutput(min_user_key, max_user_key); |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 527 | } |
| 528 | edit->AddFile(level, meta.number, meta.file_size, |
| 529 | meta.smallest, meta.largest); |
| 530 | } |
| 531 | |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 532 | CompactionStats stats; |
| 533 | stats.micros = env_->NowMicros() - start_micros; |
| 534 | stats.bytes_written = meta.file_size; |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 535 | stats_[level].Add(stats); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 536 | return s; |
| 537 | } |
| 538 | |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 539 | void DBImpl::CompactMemTable() { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 540 | mutex_.AssertHeld(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 541 | assert(imm_ != nullptr); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 542 | |
| 543 | // Save the contents of the memtable as a new Table |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 544 | VersionEdit edit; |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 545 | Version* base = versions_->current(); |
| 546 | base->Ref(); |
| 547 | Status s = WriteLevel0Table(imm_, &edit, base); |
| 548 | base->Unref(); |
| 549 | |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 550 | if (s.ok() && shutting_down_.load(std::memory_order_acquire)) { |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 551 | s = Status::IOError("Deleting DB during memtable compaction"); |
| 552 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 553 | |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 554 | // Replace immutable memtable with the generated Table |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 555 | if (s.ok()) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 556 | edit.SetPrevLogNumber(0); |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 557 | edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 558 | s = versions_->LogAndApply(&edit, &mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 559 | } |
| 560 | |
| 561 | if (s.ok()) { |
| 562 | // Commit to the new state |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 563 | imm_->Unref(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 564 | imm_ = nullptr; |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 565 | has_imm_.store(false, std::memory_order_release); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 566 | DeleteObsoleteFiles(); |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 567 | } else { |
| 568 | RecordBackgroundError(s); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 569 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 570 | } |
| 571 | |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 572 | void DBImpl::CompactRange(const Slice* begin, const Slice* end) { |
| 573 | int max_level_with_files = 1; |
| 574 | { |
| 575 | MutexLock l(&mutex_); |
| 576 | Version* base = versions_->current(); |
| 577 | for (int level = 1; level < config::kNumLevels; level++) { |
| 578 | if (base->OverlapInLevel(level, begin, end)) { |
| 579 | max_level_with_files = level; |
| 580 | } |
| 581 | } |
| 582 | } |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 583 | TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 584 | for (int level = 0; level < max_level_with_files; level++) { |
| 585 | TEST_CompactRange(level, begin, end); |
| 586 | } |
| 587 | } |
| 588 | |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 589 | void DBImpl::TEST_CompactRange(int level, const Slice* begin, |
| 590 | const Slice* end) { |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 591 | assert(level >= 0); |
| 592 | assert(level + 1 < config::kNumLevels); |
| 593 | |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 594 | InternalKey begin_storage, end_storage; |
| 595 | |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 596 | ManualCompaction manual; |
| 597 | manual.level = level; |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 598 | manual.done = false; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 599 | if (begin == nullptr) { |
| 600 | manual.begin = nullptr; |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 601 | } else { |
| 602 | begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek); |
| 603 | manual.begin = &begin_storage; |
| 604 | } |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 605 | if (end == nullptr) { |
| 606 | manual.end = nullptr; |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 607 | } else { |
| 608 | end_storage = InternalKey(*end, 0, static_cast<ValueType>(0)); |
| 609 | manual.end = &end_storage; |
| 610 | } |
| 611 | |
| 612 | MutexLock l(&mutex_); |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 613 | while (!manual.done && !shutting_down_.load(std::memory_order_acquire) && |
| 614 | bg_error_.ok()) { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 615 | if (manual_compaction_ == nullptr) { // Idle |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 616 | manual_compaction_ = &manual; |
| 617 | MaybeScheduleCompaction(); |
| 618 | } else { // Running either my compaction or another compaction. |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 619 | background_work_finished_signal_.Wait(); |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 620 | } |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 621 | } |
| 622 | if (manual_compaction_ == &manual) { |
| 623 | // Cancel my manual compaction since we aborted early for some reason. |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 624 | manual_compaction_ = nullptr; |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 625 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 626 | } |
| 627 | |
| 628 | Status DBImpl::TEST_CompactMemTable() { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 629 | // nullptr batch means just wait for earlier writes to be done |
| 630 | Status s = Write(WriteOptions(), nullptr); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 631 | if (s.ok()) { |
| 632 | // Wait until the compaction completes |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 633 | MutexLock l(&mutex_); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 634 | while (imm_ != nullptr && bg_error_.ok()) { |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 635 | background_work_finished_signal_.Wait(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 636 | } |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 637 | if (imm_ != nullptr) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 638 | s = bg_error_; |
| 639 | } |
| 640 | } |
| 641 | return s; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 642 | } |
| 643 | |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 644 | void DBImpl::RecordBackgroundError(const Status& s) { |
| 645 | mutex_.AssertHeld(); |
| 646 | if (bg_error_.ok()) { |
| 647 | bg_error_ = s; |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 648 | background_work_finished_signal_.SignalAll(); |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 649 | } |
| 650 | } |
| 651 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 652 | void DBImpl::MaybeScheduleCompaction() { |
| 653 | mutex_.AssertHeld(); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 654 | if (background_compaction_scheduled_) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 655 | // Already scheduled |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 656 | } else if (shutting_down_.load(std::memory_order_acquire)) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 657 | // DB is being deleted; no more background compactions |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 658 | } else if (!bg_error_.ok()) { |
| 659 | // Already got an error; no more changes |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 660 | } else if (imm_ == nullptr && |
| 661 | manual_compaction_ == nullptr && |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 662 | !versions_->NeedsCompaction()) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 663 | // No work to be done |
| 664 | } else { |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 665 | background_compaction_scheduled_ = true; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 666 | env_->Schedule(&DBImpl::BGWork, this); |
| 667 | } |
| 668 | } |
| 669 | |
| 670 | void DBImpl::BGWork(void* db) { |
| 671 | reinterpret_cast<DBImpl*>(db)->BackgroundCall(); |
| 672 | } |
| 673 | |
| 674 | void DBImpl::BackgroundCall() { |
| 675 | MutexLock l(&mutex_); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 676 | assert(background_compaction_scheduled_); |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 677 | if (shutting_down_.load(std::memory_order_acquire)) { |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 678 | // No more background work when shutting down. |
| 679 | } else if (!bg_error_.ok()) { |
| 680 | // No more background work after a background error. |
| 681 | } else { |
| 682 | BackgroundCompaction(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 683 | } |
Sanjay Ghemawat | 075a35a | 2012-05-30 09:45:46 -0700 | [diff] [blame] | 684 | |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 685 | background_compaction_scheduled_ = false; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 686 | |
| 687 | // Previous compaction may have produced too many files in a level, |
| 688 | // so reschedule another compaction if needed. |
| 689 | MaybeScheduleCompaction(); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 690 | background_work_finished_signal_.SignalAll(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 691 | } |
| 692 | |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 693 | void DBImpl::BackgroundCompaction() { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 694 | mutex_.AssertHeld(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 695 | |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 696 | if (imm_ != nullptr) { |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 697 | CompactMemTable(); |
| 698 | return; |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 699 | } |
| 700 | |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 701 | Compaction* c; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 702 | bool is_manual = (manual_compaction_ != nullptr); |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 703 | InternalKey manual_end; |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 704 | if (is_manual) { |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 705 | ManualCompaction* m = manual_compaction_; |
| 706 | c = versions_->CompactRange(m->level, m->begin, m->end); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 707 | m->done = (c == nullptr); |
| 708 | if (c != nullptr) { |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 709 | manual_end = c->input(0, c->num_input_files(0) - 1)->largest; |
| 710 | } |
| 711 | Log(options_.info_log, |
| 712 | "Manual compaction at level-%d from %s .. %s; will stop at %s\n", |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 713 | m->level, |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 714 | (m->begin ? m->begin->DebugString().c_str() : "(begin)"), |
| 715 | (m->end ? m->end->DebugString().c_str() : "(end)"), |
| 716 | (m->done ? "(end)" : manual_end.DebugString().c_str())); |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 717 | } else { |
| 718 | c = versions_->PickCompaction(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 719 | } |
| 720 | |
| 721 | Status status; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 722 | if (c == nullptr) { |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 723 | // Nothing to do |
| 724 | } else if (!is_manual && c->IsTrivialMove()) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 725 | // Move file to next level |
jorlow@chromium.org | 13b72af | 2011-03-22 18:32:49 +0000 | [diff] [blame] | 726 | assert(c->num_input_files(0) == 1); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 727 | FileMetaData* f = c->input(0, 0); |
| 728 | c->edit()->DeleteFile(c->level(), f->number); |
| 729 | c->edit()->AddFile(c->level() + 1, f->number, f->file_size, |
| 730 | f->smallest, f->largest); |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 731 | status = versions_->LogAndApply(c->edit(), &mutex_); |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 732 | if (!status.ok()) { |
| 733 | RecordBackgroundError(status); |
| 734 | } |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 735 | VersionSet::LevelSummaryStorage tmp; |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 736 | Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n", |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 737 | static_cast<unsigned long long>(f->number), |
| 738 | c->level() + 1, |
| 739 | static_cast<unsigned long long>(f->file_size), |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 740 | status.ToString().c_str(), |
| 741 | versions_->LevelSummary(&tmp)); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 742 | } else { |
| 743 | CompactionState* compact = new CompactionState(c); |
| 744 | status = DoCompactionWork(compact); |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 745 | if (!status.ok()) { |
| 746 | RecordBackgroundError(status); |
| 747 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 748 | CleanupCompaction(compact); |
Sanjay Ghemawat | 3c8be10 | 2012-01-25 14:56:52 -0800 | [diff] [blame] | 749 | c->ReleaseInputs(); |
| 750 | DeleteObsoleteFiles(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 751 | } |
| 752 | delete c; |
| 753 | |
| 754 | if (status.ok()) { |
| 755 | // Done |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 756 | } else if (shutting_down_.load(std::memory_order_acquire)) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 757 | // Ignore compaction errors found during shutting down |
| 758 | } else { |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 759 | Log(options_.info_log, |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 760 | "Compaction error: %s", status.ToString().c_str()); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 761 | } |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 762 | |
| 763 | if (is_manual) { |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 764 | ManualCompaction* m = manual_compaction_; |
Sanjay Ghemawat | 3c8be10 | 2012-01-25 14:56:52 -0800 | [diff] [blame] | 765 | if (!status.ok()) { |
| 766 | m->done = true; |
| 767 | } |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 768 | if (!m->done) { |
| 769 | // We only compacted part of the requested range. Update *m |
| 770 | // to the range that is left to be compacted. |
| 771 | m->tmp_storage = manual_end; |
| 772 | m->begin = &m->tmp_storage; |
| 773 | } |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 774 | manual_compaction_ = nullptr; |
hans@chromium.org | 80e5b0d | 2011-06-07 14:40:26 +0000 | [diff] [blame] | 775 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 776 | } |
| 777 | |
| 778 | void DBImpl::CleanupCompaction(CompactionState* compact) { |
| 779 | mutex_.AssertHeld(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 780 | if (compact->builder != nullptr) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 781 | // May happen if we get a shutdown call in the middle of compaction |
| 782 | compact->builder->Abandon(); |
| 783 | delete compact->builder; |
| 784 | } else { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 785 | assert(compact->outfile == nullptr); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 786 | } |
| 787 | delete compact->outfile; |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 788 | for (size_t i = 0; i < compact->outputs.size(); i++) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 789 | const CompactionState::Output& out = compact->outputs[i]; |
| 790 | pending_outputs_.erase(out.number); |
| 791 | } |
| 792 | delete compact; |
| 793 | } |
| 794 | |
| 795 | Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 796 | assert(compact != nullptr); |
| 797 | assert(compact->builder == nullptr); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 798 | uint64_t file_number; |
| 799 | { |
| 800 | mutex_.Lock(); |
| 801 | file_number = versions_->NewFileNumber(); |
| 802 | pending_outputs_.insert(file_number); |
| 803 | CompactionState::Output out; |
| 804 | out.number = file_number; |
| 805 | out.smallest.Clear(); |
| 806 | out.largest.Clear(); |
| 807 | compact->outputs.push_back(out); |
| 808 | mutex_.Unlock(); |
| 809 | } |
| 810 | |
| 811 | // Make the output file |
| 812 | std::string fname = TableFileName(dbname_, file_number); |
| 813 | Status s = env_->NewWritableFile(fname, &compact->outfile); |
| 814 | if (s.ok()) { |
| 815 | compact->builder = new TableBuilder(options_, compact->outfile); |
| 816 | } |
| 817 | return s; |
| 818 | } |
| 819 | |
| 820 | Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, |
| 821 | Iterator* input) { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 822 | assert(compact != nullptr); |
| 823 | assert(compact->outfile != nullptr); |
| 824 | assert(compact->builder != nullptr); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 825 | |
| 826 | const uint64_t output_number = compact->current_output()->number; |
| 827 | assert(output_number != 0); |
| 828 | |
| 829 | // Check for iterator errors |
| 830 | Status s = input->status(); |
| 831 | const uint64_t current_entries = compact->builder->NumEntries(); |
| 832 | if (s.ok()) { |
| 833 | s = compact->builder->Finish(); |
| 834 | } else { |
| 835 | compact->builder->Abandon(); |
| 836 | } |
| 837 | const uint64_t current_bytes = compact->builder->FileSize(); |
| 838 | compact->current_output()->file_size = current_bytes; |
| 839 | compact->total_bytes += current_bytes; |
| 840 | delete compact->builder; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 841 | compact->builder = nullptr; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 842 | |
| 843 | // Finish and check for file errors |
| 844 | if (s.ok()) { |
| 845 | s = compact->outfile->Sync(); |
| 846 | } |
| 847 | if (s.ok()) { |
| 848 | s = compact->outfile->Close(); |
| 849 | } |
| 850 | delete compact->outfile; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 851 | compact->outfile = nullptr; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 852 | |
| 853 | if (s.ok() && current_entries > 0) { |
| 854 | // Verify that the table is usable |
jorlow@chromium.org | e2da744 | 2011-03-28 20:43:44 +0000 | [diff] [blame] | 855 | Iterator* iter = table_cache_->NewIterator(ReadOptions(), |
| 856 | output_number, |
| 857 | current_bytes); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 858 | s = iter->status(); |
| 859 | delete iter; |
| 860 | if (s.ok()) { |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 861 | Log(options_.info_log, |
ideawu | 8fcceb2 | 2015-04-20 12:39:14 +0800 | [diff] [blame] | 862 | "Generated table #%llu@%d: %lld keys, %lld bytes", |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 863 | (unsigned long long) output_number, |
ideawu | 76bba13 | 2015-04-20 12:41:01 +0800 | [diff] [blame] | 864 | compact->compaction->level(), |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 865 | (unsigned long long) current_entries, |
| 866 | (unsigned long long) current_bytes); |
| 867 | } |
| 868 | } |
| 869 | return s; |
| 870 | } |
| 871 | |
| 872 | |
| 873 | Status DBImpl::InstallCompactionResults(CompactionState* compact) { |
| 874 | mutex_.AssertHeld(); |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 875 | Log(options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes", |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 876 | compact->compaction->num_input_files(0), |
| 877 | compact->compaction->level(), |
| 878 | compact->compaction->num_input_files(1), |
| 879 | compact->compaction->level() + 1, |
| 880 | static_cast<long long>(compact->total_bytes)); |
| 881 | |
| 882 | // Add compaction outputs |
| 883 | compact->compaction->AddInputDeletions(compact->compaction->edit()); |
| 884 | const int level = compact->compaction->level(); |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 885 | for (size_t i = 0; i < compact->outputs.size(); i++) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 886 | const CompactionState::Output& out = compact->outputs[i]; |
| 887 | compact->compaction->edit()->AddFile( |
| 888 | level + 1, |
| 889 | out.number, out.file_size, out.smallest, out.largest); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 890 | } |
Sanjay Ghemawat | 3c8be10 | 2012-01-25 14:56:52 -0800 | [diff] [blame] | 891 | return versions_->LogAndApply(compact->compaction->edit(), &mutex_); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 892 | } |
| 893 | |
| 894 | Status DBImpl::DoCompactionWork(CompactionState* compact) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 895 | const uint64_t start_micros = env_->NowMicros(); |
| 896 | int64_t imm_micros = 0; // Micros spent doing imm_ compactions |
| 897 | |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 898 | Log(options_.info_log, "Compacting %d@%d + %d@%d files", |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 899 | compact->compaction->num_input_files(0), |
| 900 | compact->compaction->level(), |
| 901 | compact->compaction->num_input_files(1), |
| 902 | compact->compaction->level() + 1); |
| 903 | |
| 904 | assert(versions_->NumLevelFiles(compact->compaction->level()) > 0); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 905 | assert(compact->builder == nullptr); |
| 906 | assert(compact->outfile == nullptr); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 907 | if (snapshots_.empty()) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 908 | compact->smallest_snapshot = versions_->LastSequence(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 909 | } else { |
costan | 1868398 | 2018-04-30 15:11:03 -0700 | [diff] [blame] | 910 | compact->smallest_snapshot = snapshots_.oldest()->sequence_number(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 911 | } |
| 912 | |
| 913 | // Release mutex while we're actually doing the compaction work |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 914 | mutex_.Unlock(); |
| 915 | |
| 916 | Iterator* input = versions_->MakeInputIterator(compact->compaction); |
| 917 | input->SeekToFirst(); |
| 918 | Status status; |
| 919 | ParsedInternalKey ikey; |
| 920 | std::string current_user_key; |
| 921 | bool has_current_user_key = false; |
| 922 | SequenceNumber last_sequence_for_key = kMaxSequenceNumber; |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 923 | for (; input->Valid() && !shutting_down_.load(std::memory_order_acquire); ) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 924 | // Prioritize immutable compaction work |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 925 | if (has_imm_.load(std::memory_order_relaxed)) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 926 | const uint64_t imm_start = env_->NowMicros(); |
| 927 | mutex_.Lock(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 928 | if (imm_ != nullptr) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 929 | CompactMemTable(); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 930 | // Wake up MakeRoomForWrite() if necessary. |
| 931 | background_work_finished_signal_.SignalAll(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 932 | } |
| 933 | mutex_.Unlock(); |
| 934 | imm_micros += (env_->NowMicros() - imm_start); |
| 935 | } |
| 936 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 937 | Slice key = input->key(); |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 938 | if (compact->compaction->ShouldStopBefore(key) && |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 939 | compact->builder != nullptr) { |
jorlow@chromium.org | 13b72af | 2011-03-22 18:32:49 +0000 | [diff] [blame] | 940 | status = FinishCompactionOutputFile(compact, input); |
| 941 | if (!status.ok()) { |
| 942 | break; |
| 943 | } |
| 944 | } |
| 945 | |
| 946 | // Handle key/value, add to state, etc. |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 947 | bool drop = false; |
| 948 | if (!ParseInternalKey(key, &ikey)) { |
| 949 | // Do not hide error keys |
| 950 | current_user_key.clear(); |
| 951 | has_current_user_key = false; |
| 952 | last_sequence_for_key = kMaxSequenceNumber; |
| 953 | } else { |
| 954 | if (!has_current_user_key || |
| 955 | user_comparator()->Compare(ikey.user_key, |
| 956 | Slice(current_user_key)) != 0) { |
| 957 | // First occurrence of this user key |
| 958 | current_user_key.assign(ikey.user_key.data(), ikey.user_key.size()); |
| 959 | has_current_user_key = true; |
| 960 | last_sequence_for_key = kMaxSequenceNumber; |
| 961 | } |
| 962 | |
| 963 | if (last_sequence_for_key <= compact->smallest_snapshot) { |
| 964 | // Hidden by an newer entry for same user key |
| 965 | drop = true; // (A) |
| 966 | } else if (ikey.type == kTypeDeletion && |
| 967 | ikey.sequence <= compact->smallest_snapshot && |
| 968 | compact->compaction->IsBaseLevelForKey(ikey.user_key)) { |
| 969 | // For this user key: |
| 970 | // (1) there is no data in higher levels |
| 971 | // (2) data in lower levels will have larger sequence numbers |
| 972 | // (3) data in layers that are being compacted here and have |
| 973 | // smaller sequence numbers will be dropped in the next |
| 974 | // few iterations of this loop (by rule (A) above). |
| 975 | // Therefore this deletion marker is obsolete and can be dropped. |
| 976 | drop = true; |
| 977 | } |
| 978 | |
| 979 | last_sequence_for_key = ikey.sequence; |
| 980 | } |
| 981 | #if 0 |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 982 | Log(options_.info_log, |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 983 | " Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, " |
| 984 | "%d smallest_snapshot: %d", |
| 985 | ikey.user_key.ToString().c_str(), |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 986 | (int)ikey.sequence, ikey.type, kTypeValue, drop, |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 987 | compact->compaction->IsBaseLevelForKey(ikey.user_key), |
| 988 | (int)last_sequence_for_key, (int)compact->smallest_snapshot); |
| 989 | #endif |
| 990 | |
| 991 | if (!drop) { |
| 992 | // Open output file if necessary |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 993 | if (compact->builder == nullptr) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 994 | status = OpenCompactionOutputFile(compact); |
| 995 | if (!status.ok()) { |
| 996 | break; |
| 997 | } |
| 998 | } |
| 999 | if (compact->builder->NumEntries() == 0) { |
| 1000 | compact->current_output()->smallest.DecodeFrom(key); |
| 1001 | } |
| 1002 | compact->current_output()->largest.DecodeFrom(key); |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 1003 | compact->builder->Add(key, input->value()); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1004 | |
| 1005 | // Close output file if it is big enough |
| 1006 | if (compact->builder->FileSize() >= |
| 1007 | compact->compaction->MaxOutputFileSize()) { |
| 1008 | status = FinishCompactionOutputFile(compact, input); |
| 1009 | if (!status.ok()) { |
| 1010 | break; |
| 1011 | } |
| 1012 | } |
| 1013 | } |
| 1014 | |
| 1015 | input->Next(); |
| 1016 | } |
| 1017 | |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 1018 | if (status.ok() && shutting_down_.load(std::memory_order_acquire)) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1019 | status = Status::IOError("Deleting DB during compaction"); |
| 1020 | } |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1021 | if (status.ok() && compact->builder != nullptr) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1022 | status = FinishCompactionOutputFile(compact, input); |
| 1023 | } |
| 1024 | if (status.ok()) { |
| 1025 | status = input->status(); |
| 1026 | } |
| 1027 | delete input; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1028 | input = nullptr; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1029 | |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1030 | CompactionStats stats; |
| 1031 | stats.micros = env_->NowMicros() - start_micros - imm_micros; |
| 1032 | for (int which = 0; which < 2; which++) { |
| 1033 | for (int i = 0; i < compact->compaction->num_input_files(which); i++) { |
| 1034 | stats.bytes_read += compact->compaction->input(which, i)->file_size; |
| 1035 | } |
| 1036 | } |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 1037 | for (size_t i = 0; i < compact->outputs.size(); i++) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1038 | stats.bytes_written += compact->outputs[i].file_size; |
| 1039 | } |
| 1040 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1041 | mutex_.Lock(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1042 | stats_[compact->compaction->level() + 1].Add(stats); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1043 | |
| 1044 | if (status.ok()) { |
| 1045 | status = InstallCompactionResults(compact); |
| 1046 | } |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 1047 | if (!status.ok()) { |
| 1048 | RecordBackgroundError(status); |
| 1049 | } |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 1050 | VersionSet::LevelSummaryStorage tmp; |
gabor@google.com | 60bd801 | 2011-07-21 02:40:18 +0000 | [diff] [blame] | 1051 | Log(options_.info_log, |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 1052 | "compacted to: %s", versions_->LevelSummary(&tmp)); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1053 | return status; |
| 1054 | } |
| 1055 | |
dgrogan@chromium.org | 740d8b3 | 2011-05-28 00:53:58 +0000 | [diff] [blame] | 1056 | namespace { |
costan | 0db3041 | 2018-03-23 12:50:14 -0700 | [diff] [blame] | 1057 | |
dgrogan@chromium.org | 740d8b3 | 2011-05-28 00:53:58 +0000 | [diff] [blame] | 1058 | struct IterState { |
costan | 0db3041 | 2018-03-23 12:50:14 -0700 | [diff] [blame] | 1059 | port::Mutex* const mu; |
| 1060 | Version* const version GUARDED_BY(mu); |
| 1061 | MemTable* const mem GUARDED_BY(mu); |
| 1062 | MemTable* const imm GUARDED_BY(mu); |
| 1063 | |
| 1064 | IterState(port::Mutex* mutex, MemTable* mem, MemTable* imm, Version* version) |
| 1065 | : mu(mutex), version(version), mem(mem), imm(imm) { } |
dgrogan@chromium.org | 740d8b3 | 2011-05-28 00:53:58 +0000 | [diff] [blame] | 1066 | }; |
| 1067 | |
| 1068 | static void CleanupIteratorState(void* arg1, void* arg2) { |
| 1069 | IterState* state = reinterpret_cast<IterState*>(arg1); |
| 1070 | state->mu->Lock(); |
| 1071 | state->mem->Unref(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1072 | if (state->imm != nullptr) state->imm->Unref(); |
dgrogan@chromium.org | 740d8b3 | 2011-05-28 00:53:58 +0000 | [diff] [blame] | 1073 | state->version->Unref(); |
| 1074 | state->mu->Unlock(); |
| 1075 | delete state; |
| 1076 | } |
costan | 0db3041 | 2018-03-23 12:50:14 -0700 | [diff] [blame] | 1077 | |
| 1078 | } // anonymous namespace |
dgrogan@chromium.org | 740d8b3 | 2011-05-28 00:53:58 +0000 | [diff] [blame] | 1079 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1080 | Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 1081 | SequenceNumber* latest_snapshot, |
| 1082 | uint32_t* seed) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1083 | mutex_.Lock(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1084 | *latest_snapshot = versions_->LastSequence(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1085 | |
| 1086 | // Collect together all needed child iterators |
| 1087 | std::vector<Iterator*> list; |
| 1088 | list.push_back(mem_->NewIterator()); |
dgrogan@chromium.org | 740d8b3 | 2011-05-28 00:53:58 +0000 | [diff] [blame] | 1089 | mem_->Ref(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1090 | if (imm_ != nullptr) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1091 | list.push_back(imm_->NewIterator()); |
dgrogan@chromium.org | 740d8b3 | 2011-05-28 00:53:58 +0000 | [diff] [blame] | 1092 | imm_->Ref(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1093 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1094 | versions_->current()->AddIterators(options, &list); |
| 1095 | Iterator* internal_iter = |
| 1096 | NewMergingIterator(&internal_comparator_, &list[0], list.size()); |
| 1097 | versions_->current()->Ref(); |
dgrogan@chromium.org | 740d8b3 | 2011-05-28 00:53:58 +0000 | [diff] [blame] | 1098 | |
costan | 0db3041 | 2018-03-23 12:50:14 -0700 | [diff] [blame] | 1099 | IterState* cleanup = new IterState(&mutex_, mem_, imm_, versions_->current()); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1100 | internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1101 | |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 1102 | *seed = ++seed_; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1103 | mutex_.Unlock(); |
| 1104 | return internal_iter; |
| 1105 | } |
| 1106 | |
| 1107 | Iterator* DBImpl::TEST_NewInternalIterator() { |
| 1108 | SequenceNumber ignored; |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 1109 | uint32_t ignored_seed; |
| 1110 | return NewInternalIterator(ReadOptions(), &ignored, &ignored_seed); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1111 | } |
| 1112 | |
jorlow@chromium.org | 8303bb1 | 2011-03-22 23:24:02 +0000 | [diff] [blame] | 1113 | int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() { |
jorlow@chromium.org | 13b72af | 2011-03-22 18:32:49 +0000 | [diff] [blame] | 1114 | MutexLock l(&mutex_); |
| 1115 | return versions_->MaxNextLevelOverlappingBytes(); |
| 1116 | } |
| 1117 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1118 | Status DBImpl::Get(const ReadOptions& options, |
| 1119 | const Slice& key, |
| 1120 | std::string* value) { |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1121 | Status s; |
| 1122 | MutexLock l(&mutex_); |
| 1123 | SequenceNumber snapshot; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1124 | if (options.snapshot != nullptr) { |
costan | 1868398 | 2018-04-30 15:11:03 -0700 | [diff] [blame] | 1125 | snapshot = |
| 1126 | static_cast<const SnapshotImpl*>(options.snapshot)->sequence_number(); |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1127 | } else { |
| 1128 | snapshot = versions_->LastSequence(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1129 | } |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1130 | |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1131 | MemTable* mem = mem_; |
| 1132 | MemTable* imm = imm_; |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1133 | Version* current = versions_->current(); |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1134 | mem->Ref(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1135 | if (imm != nullptr) imm->Ref(); |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1136 | current->Ref(); |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1137 | |
| 1138 | bool have_stat_update = false; |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1139 | Version::GetStats stats; |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1140 | |
| 1141 | // Unlock while reading from files and memtables |
| 1142 | { |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1143 | mutex_.Unlock(); |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1144 | // First look in the memtable, then in the immutable memtable (if any). |
| 1145 | LookupKey lkey(key, snapshot); |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 1146 | if (mem->Get(lkey, value, &s)) { |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1147 | // Done |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1148 | } else if (imm != nullptr && imm->Get(lkey, value, &s)) { |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1149 | // Done |
| 1150 | } else { |
| 1151 | s = current->Get(options, lkey, value, &stats); |
| 1152 | have_stat_update = true; |
| 1153 | } |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1154 | mutex_.Lock(); |
| 1155 | } |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1156 | |
| 1157 | if (have_stat_update && current->UpdateStats(stats)) { |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1158 | MaybeScheduleCompaction(); |
| 1159 | } |
gabor@google.com | e3584f9 | 2011-08-22 21:08:51 +0000 | [diff] [blame] | 1160 | mem->Unref(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1161 | if (imm != nullptr) imm->Unref(); |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1162 | current->Unref(); |
| 1163 | return s; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1164 | } |
| 1165 | |
| 1166 | Iterator* DBImpl::NewIterator(const ReadOptions& options) { |
| 1167 | SequenceNumber latest_snapshot; |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 1168 | uint32_t seed; |
| 1169 | Iterator* iter = NewInternalIterator(options, &latest_snapshot, &seed); |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 1170 | return NewDBIterator( |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 1171 | this, user_comparator(), iter, |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1172 | (options.snapshot != nullptr |
costan | 1868398 | 2018-04-30 15:11:03 -0700 | [diff] [blame] | 1173 | ? static_cast<const SnapshotImpl*>(options.snapshot)->sequence_number() |
David Grogan | 748539c | 2013-08-21 11:12:47 -0700 | [diff] [blame] | 1174 | : latest_snapshot), |
| 1175 | seed); |
| 1176 | } |
| 1177 | |
| 1178 | void DBImpl::RecordReadSample(Slice key) { |
| 1179 | MutexLock l(&mutex_); |
| 1180 | if (versions_->current()->RecordReadSample(key)) { |
| 1181 | MaybeScheduleCompaction(); |
| 1182 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1183 | } |
| 1184 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1185 | const Snapshot* DBImpl::GetSnapshot() { |
| 1186 | MutexLock l(&mutex_); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1187 | return snapshots_.New(versions_->LastSequence()); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1188 | } |
| 1189 | |
costan | 1868398 | 2018-04-30 15:11:03 -0700 | [diff] [blame] | 1190 | void DBImpl::ReleaseSnapshot(const Snapshot* snapshot) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1191 | MutexLock l(&mutex_); |
costan | 1868398 | 2018-04-30 15:11:03 -0700 | [diff] [blame] | 1192 | snapshots_.Delete(static_cast<const SnapshotImpl*>(snapshot)); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1193 | } |
| 1194 | |
| 1195 | // Convenience methods |
| 1196 | Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) { |
| 1197 | return DB::Put(o, key, val); |
| 1198 | } |
| 1199 | |
| 1200 | Status DBImpl::Delete(const WriteOptions& options, const Slice& key) { |
| 1201 | return DB::Delete(options, key); |
| 1202 | } |
| 1203 | |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1204 | Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
| 1205 | Writer w(&mutex_); |
| 1206 | w.batch = my_batch; |
| 1207 | w.sync = options.sync; |
| 1208 | w.done = false; |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 1209 | |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 1210 | MutexLock l(&mutex_); |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1211 | writers_.push_back(&w); |
| 1212 | while (!w.done && &w != writers_.front()) { |
| 1213 | w.cv.Wait(); |
| 1214 | } |
| 1215 | if (w.done) { |
| 1216 | return w.status; |
| 1217 | } |
| 1218 | |
| 1219 | // May temporarily unlock and wait. |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1220 | Status status = MakeRoomForWrite(my_batch == nullptr); |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 1221 | uint64_t last_sequence = versions_->LastSequence(); |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1222 | Writer* last_writer = &w; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1223 | if (status.ok() && my_batch != nullptr) { // nullptr batch is for compactions |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1224 | WriteBatch* updates = BuildBatchGroup(&last_writer); |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 1225 | WriteBatchInternal::SetSequence(updates, last_sequence + 1); |
| 1226 | last_sequence += WriteBatchInternal::Count(updates); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1227 | |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1228 | // Add to log and apply to memtable. We can release the lock |
| 1229 | // during this phase since &w is currently responsible for logging |
| 1230 | // and protects against concurrent loggers and concurrent writes |
| 1231 | // into mem_. |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 1232 | { |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 1233 | mutex_.Unlock(); |
| 1234 | status = log_->AddRecord(WriteBatchInternal::Contents(updates)); |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 1235 | bool sync_error = false; |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 1236 | if (status.ok() && options.sync) { |
| 1237 | status = logfile_->Sync(); |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 1238 | if (!status.ok()) { |
| 1239 | sync_error = true; |
| 1240 | } |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 1241 | } |
| 1242 | if (status.ok()) { |
| 1243 | status = WriteBatchInternal::InsertInto(updates, mem_); |
| 1244 | } |
| 1245 | mutex_.Lock(); |
David Grogan | 0cfb990 | 2013-12-10 10:36:31 -0800 | [diff] [blame] | 1246 | if (sync_error) { |
| 1247 | // The state of the log file is indeterminate: the log record we |
| 1248 | // just added may or may not show up when the DB is re-opened. |
| 1249 | // So we force the DB into a mode where all future writes fail. |
| 1250 | RecordBackgroundError(status); |
| 1251 | } |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1252 | } |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1253 | if (updates == tmp_batch_) tmp_batch_->Clear(); |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 1254 | |
| 1255 | versions_->SetLastSequence(last_sequence); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1256 | } |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1257 | |
| 1258 | while (true) { |
| 1259 | Writer* ready = writers_.front(); |
| 1260 | writers_.pop_front(); |
| 1261 | if (ready != &w) { |
| 1262 | ready->status = status; |
| 1263 | ready->done = true; |
| 1264 | ready->cv.Signal(); |
| 1265 | } |
| 1266 | if (ready == last_writer) break; |
| 1267 | } |
| 1268 | |
| 1269 | // Notify new head of write queue |
| 1270 | if (!writers_.empty()) { |
| 1271 | writers_.front()->cv.Signal(); |
| 1272 | } |
| 1273 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1274 | return status; |
| 1275 | } |
| 1276 | |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1277 | // REQUIRES: Writer list must be non-empty |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1278 | // REQUIRES: First writer must have a non-null batch |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1279 | WriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) { |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 1280 | mutex_.AssertHeld(); |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1281 | assert(!writers_.empty()); |
| 1282 | Writer* first = writers_.front(); |
| 1283 | WriteBatch* result = first->batch; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1284 | assert(result != nullptr); |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1285 | |
| 1286 | size_t size = WriteBatchInternal::ByteSize(first->batch); |
| 1287 | |
| 1288 | // Allow the group to grow up to a maximum size, but if the |
| 1289 | // original write is small, limit the growth so we do not slow |
| 1290 | // down the small write too much. |
| 1291 | size_t max_size = 1 << 20; |
| 1292 | if (size <= (128<<10)) { |
| 1293 | max_size = size + (128<<10); |
| 1294 | } |
| 1295 | |
| 1296 | *last_writer = first; |
| 1297 | std::deque<Writer*>::iterator iter = writers_.begin(); |
| 1298 | ++iter; // Advance past "first" |
| 1299 | for (; iter != writers_.end(); ++iter) { |
| 1300 | Writer* w = *iter; |
| 1301 | if (w->sync && !first->sync) { |
| 1302 | // Do not include a sync write into a batch handled by a non-sync write. |
| 1303 | break; |
| 1304 | } |
| 1305 | |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1306 | if (w->batch != nullptr) { |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1307 | size += WriteBatchInternal::ByteSize(w->batch); |
| 1308 | if (size > max_size) { |
| 1309 | // Do not make batch too big |
| 1310 | break; |
| 1311 | } |
| 1312 | |
Chris Mumford | 803d692 | 2014-09-16 14:19:52 -0700 | [diff] [blame] | 1313 | // Append to *result |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1314 | if (result == first->batch) { |
| 1315 | // Switch to temporary batch instead of disturbing caller's batch |
| 1316 | result = tmp_batch_; |
| 1317 | assert(WriteBatchInternal::Count(result) == 0); |
| 1318 | WriteBatchInternal::Append(result, first->batch); |
| 1319 | } |
| 1320 | WriteBatchInternal::Append(result, w->batch); |
| 1321 | } |
| 1322 | *last_writer = w; |
| 1323 | } |
| 1324 | return result; |
| 1325 | } |
| 1326 | |
gabor@google.com | 7263023 | 2011-09-01 19:08:02 +0000 | [diff] [blame] | 1327 | // REQUIRES: mutex_ is held |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1328 | // REQUIRES: this thread is currently at the front of the writer queue |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1329 | Status DBImpl::MakeRoomForWrite(bool force) { |
| 1330 | mutex_.AssertHeld(); |
Sanjay Ghemawat | d79762e | 2012-03-08 16:23:21 -0800 | [diff] [blame] | 1331 | assert(!writers_.empty()); |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 1332 | bool allow_delay = !force; |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1333 | Status s; |
| 1334 | while (true) { |
| 1335 | if (!bg_error_.ok()) { |
| 1336 | // Yield previous error |
| 1337 | s = bg_error_; |
| 1338 | break; |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 1339 | } else if ( |
| 1340 | allow_delay && |
| 1341 | versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) { |
| 1342 | // We are getting close to hitting a hard limit on the number of |
| 1343 | // L0 files. Rather than delaying a single write by several |
| 1344 | // seconds when we hit the hard limit, start delaying each |
| 1345 | // individual write by 1ms to reduce latency variance. Also, |
| 1346 | // this delay hands over some CPU to the compaction thread in |
| 1347 | // case it is sharing the same core as the writer. |
| 1348 | mutex_.Unlock(); |
| 1349 | env_->SleepForMicroseconds(1000); |
| 1350 | allow_delay = false; // Do not delay a single write more than once |
| 1351 | mutex_.Lock(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1352 | } else if (!force && |
| 1353 | (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) { |
| 1354 | // There is room in current memtable |
| 1355 | break; |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1356 | } else if (imm_ != nullptr) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1357 | // We have filled up the current memtable, but the previous |
| 1358 | // one is still being compacted, so we wait. |
David Grogan | 28dad91 | 2013-05-14 16:52:56 -0700 | [diff] [blame] | 1359 | Log(options_.info_log, "Current memtable full; waiting...\n"); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 1360 | background_work_finished_signal_.Wait(); |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 1361 | } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) { |
| 1362 | // There are too many level-0 files. |
David Grogan | 28dad91 | 2013-05-14 16:52:56 -0700 | [diff] [blame] | 1363 | Log(options_.info_log, "Too many L0 files; waiting...\n"); |
costan | 0fa5a4f | 2018-03-16 10:06:35 -0700 | [diff] [blame] | 1364 | background_work_finished_signal_.Wait(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1365 | } else { |
| 1366 | // Attempt to switch to a new memtable and trigger compaction of old |
| 1367 | assert(versions_->PrevLogNumber() == 0); |
| 1368 | uint64_t new_log_number = versions_->NewFileNumber(); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1369 | WritableFile* lfile = nullptr; |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1370 | s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile); |
| 1371 | if (!s.ok()) { |
Sanjay Ghemawat | 075a35a | 2012-05-30 09:45:46 -0700 | [diff] [blame] | 1372 | // Avoid chewing through file number space in a tight loop. |
| 1373 | versions_->ReuseFileNumber(new_log_number); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1374 | break; |
| 1375 | } |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1376 | delete log_; |
| 1377 | delete logfile_; |
| 1378 | logfile_ = lfile; |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1379 | logfile_number_ = new_log_number; |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1380 | log_ = new log::Writer(lfile); |
| 1381 | imm_ = mem_; |
costan | 7d8e41e | 2019-03-11 13:04:53 -0700 | [diff] [blame] | 1382 | has_imm_.store(true, std::memory_order_release); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1383 | mem_ = new MemTable(internal_comparator_); |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 1384 | mem_->Ref(); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1385 | force = false; // Do not force another compaction if have room |
| 1386 | MaybeScheduleCompaction(); |
| 1387 | } |
| 1388 | } |
| 1389 | return s; |
| 1390 | } |
| 1391 | |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1392 | bool DBImpl::GetProperty(const Slice& property, std::string* value) { |
| 1393 | value->clear(); |
| 1394 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1395 | MutexLock l(&mutex_); |
| 1396 | Slice in = property; |
| 1397 | Slice prefix("leveldb."); |
| 1398 | if (!in.starts_with(prefix)) return false; |
| 1399 | in.remove_prefix(prefix.size()); |
| 1400 | |
| 1401 | if (in.starts_with("num-files-at-level")) { |
| 1402 | in.remove_prefix(strlen("num-files-at-level")); |
| 1403 | uint64_t level; |
| 1404 | bool ok = ConsumeDecimalNumber(&in, &level) && in.empty(); |
dgrogan@chromium.org | a05525d | 2011-08-06 00:19:37 +0000 | [diff] [blame] | 1405 | if (!ok || level >= config::kNumLevels) { |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1406 | return false; |
| 1407 | } else { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1408 | char buf[100]; |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 1409 | snprintf(buf, sizeof(buf), "%d", |
| 1410 | versions_->NumLevelFiles(static_cast<int>(level))); |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1411 | *value = buf; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1412 | return true; |
| 1413 | } |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1414 | } else if (in == "stats") { |
| 1415 | char buf[200]; |
| 1416 | snprintf(buf, sizeof(buf), |
| 1417 | " Compactions\n" |
| 1418 | "Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n" |
| 1419 | "--------------------------------------------------\n" |
| 1420 | ); |
| 1421 | value->append(buf); |
| 1422 | for (int level = 0; level < config::kNumLevels; level++) { |
| 1423 | int files = versions_->NumLevelFiles(level); |
| 1424 | if (stats_[level].micros > 0 || files > 0) { |
| 1425 | snprintf( |
| 1426 | buf, sizeof(buf), |
| 1427 | "%3d %8d %8.0f %9.0f %8.0f %9.0f\n", |
| 1428 | level, |
| 1429 | files, |
| 1430 | versions_->NumLevelBytes(level) / 1048576.0, |
| 1431 | stats_[level].micros / 1e6, |
| 1432 | stats_[level].bytes_read / 1048576.0, |
| 1433 | stats_[level].bytes_written / 1048576.0); |
| 1434 | value->append(buf); |
| 1435 | } |
| 1436 | } |
| 1437 | return true; |
Gabor Cselle | 299cced | 2011-10-05 16:30:28 -0700 | [diff] [blame] | 1438 | } else if (in == "sstables") { |
| 1439 | *value = versions_->current()->DebugString(); |
| 1440 | return true; |
ssid | 528c2bc | 2015-09-29 11:52:21 -0700 | [diff] [blame] | 1441 | } else if (in == "approximate-memory-usage") { |
| 1442 | size_t total_usage = options_.block_cache->TotalCharge(); |
| 1443 | if (mem_) { |
| 1444 | total_usage += mem_->ApproximateMemoryUsage(); |
| 1445 | } |
| 1446 | if (imm_) { |
| 1447 | total_usage += imm_->ApproximateMemoryUsage(); |
| 1448 | } |
| 1449 | char buf[50]; |
| 1450 | snprintf(buf, sizeof(buf), "%llu", |
| 1451 | static_cast<unsigned long long>(total_usage)); |
| 1452 | value->append(buf); |
| 1453 | return true; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1454 | } |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1455 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1456 | return false; |
| 1457 | } |
| 1458 | |
| 1459 | void DBImpl::GetApproximateSizes( |
| 1460 | const Range* range, int n, |
| 1461 | uint64_t* sizes) { |
| 1462 | // TODO(opt): better implementation |
| 1463 | Version* v; |
| 1464 | { |
| 1465 | MutexLock l(&mutex_); |
| 1466 | versions_->current()->Ref(); |
| 1467 | v = versions_->current(); |
| 1468 | } |
| 1469 | |
| 1470 | for (int i = 0; i < n; i++) { |
| 1471 | // Convert user_key into a corresponding internal key. |
| 1472 | InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); |
| 1473 | InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); |
| 1474 | uint64_t start = versions_->ApproximateOffsetOf(v, k1); |
| 1475 | uint64_t limit = versions_->ApproximateOffsetOf(v, k2); |
| 1476 | sizes[i] = (limit >= start ? limit - start : 0); |
| 1477 | } |
| 1478 | |
| 1479 | { |
| 1480 | MutexLock l(&mutex_); |
| 1481 | v->Unref(); |
| 1482 | } |
| 1483 | } |
| 1484 | |
| 1485 | // Default implementations of convenience methods that subclasses of DB |
| 1486 | // can call if they wish |
| 1487 | Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { |
| 1488 | WriteBatch batch; |
| 1489 | batch.Put(key, value); |
| 1490 | return Write(opt, &batch); |
| 1491 | } |
| 1492 | |
| 1493 | Status DB::Delete(const WriteOptions& opt, const Slice& key) { |
| 1494 | WriteBatch batch; |
| 1495 | batch.Delete(key); |
| 1496 | return Write(opt, &batch); |
| 1497 | } |
| 1498 | |
| 1499 | DB::~DB() { } |
| 1500 | |
| 1501 | Status DB::Open(const Options& options, const std::string& dbname, |
| 1502 | DB** dbptr) { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1503 | *dbptr = nullptr; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1504 | |
| 1505 | DBImpl* impl = new DBImpl(options, dbname); |
| 1506 | impl->mutex_.Lock(); |
| 1507 | VersionEdit edit; |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 1508 | // Recover handles create_if_missing, error_if_exists |
| 1509 | bool save_manifest = false; |
| 1510 | Status s = impl->Recover(&edit, &save_manifest); |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1511 | if (s.ok() && impl->mem_ == nullptr) { |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 1512 | // Create new log and a corresponding memtable. |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1513 | uint64_t new_log_number = impl->versions_->NewFileNumber(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1514 | WritableFile* lfile; |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1515 | s = options.env->NewWritableFile(LogFileName(dbname, new_log_number), |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1516 | &lfile); |
| 1517 | if (s.ok()) { |
dgrogan@chromium.org | f779e7a | 2011-04-12 19:38:58 +0000 | [diff] [blame] | 1518 | edit.SetLogNumber(new_log_number); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1519 | impl->logfile_ = lfile; |
gabor@google.com | ccf0fcd | 2011-06-22 02:36:45 +0000 | [diff] [blame] | 1520 | impl->logfile_number_ = new_log_number; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1521 | impl->log_ = new log::Writer(lfile); |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 1522 | impl->mem_ = new MemTable(impl->internal_comparator_); |
| 1523 | impl->mem_->Ref(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1524 | } |
Sanjay Ghemawat | ac1d69d | 2014-12-11 08:13:18 -0800 | [diff] [blame] | 1525 | } |
| 1526 | if (s.ok() && save_manifest) { |
| 1527 | edit.SetPrevLogNumber(0); // No older logs needed after recovery. |
| 1528 | edit.SetLogNumber(impl->logfile_number_); |
| 1529 | s = impl->versions_->LogAndApply(&edit, &impl->mutex_); |
| 1530 | } |
| 1531 | if (s.ok()) { |
| 1532 | impl->DeleteObsoleteFiles(); |
| 1533 | impl->MaybeScheduleCompaction(); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1534 | } |
| 1535 | impl->mutex_.Unlock(); |
| 1536 | if (s.ok()) { |
costan | 09217fd | 2018-04-10 16:18:06 -0700 | [diff] [blame] | 1537 | assert(impl->mem_ != nullptr); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1538 | *dbptr = impl; |
| 1539 | } else { |
| 1540 | delete impl; |
| 1541 | } |
| 1542 | return s; |
| 1543 | } |
| 1544 | |
dgrogan@chromium.org | da79909 | 2011-05-21 02:17:43 +0000 | [diff] [blame] | 1545 | Snapshot::~Snapshot() { |
| 1546 | } |
| 1547 | |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1548 | Status DestroyDB(const std::string& dbname, const Options& options) { |
| 1549 | Env* env = options.env; |
| 1550 | std::vector<std::string> filenames; |
cmumford | 0509414 | 2017-10-17 13:05:47 -0700 | [diff] [blame] | 1551 | Status result = env->GetChildren(dbname, &filenames); |
| 1552 | if (!result.ok()) { |
| 1553 | // Ignore error in case directory does not exist |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1554 | return Status::OK(); |
| 1555 | } |
| 1556 | |
| 1557 | FileLock* lock; |
gabor@google.com | 6699c7e | 2011-07-15 00:20:57 +0000 | [diff] [blame] | 1558 | const std::string lockname = LockFileName(dbname); |
cmumford | 0509414 | 2017-10-17 13:05:47 -0700 | [diff] [blame] | 1559 | result = env->LockFile(lockname, &lock); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1560 | if (result.ok()) { |
| 1561 | uint64_t number; |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1562 | FileType type; |
dgrogan@chromium.org | ba6dac0 | 2011-04-20 22:48:11 +0000 | [diff] [blame] | 1563 | for (size_t i = 0; i < filenames.size(); i++) { |
gabor@google.com | 6699c7e | 2011-07-15 00:20:57 +0000 | [diff] [blame] | 1564 | if (ParseFileName(filenames[i], &number, &type) && |
Sanjay Ghemawat | 583f149 | 2012-03-09 07:51:04 -0800 | [diff] [blame] | 1565 | type != kDBLockFile) { // Lock file will be deleted at end |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1566 | Status del = env->DeleteFile(dbname + "/" + filenames[i]); |
| 1567 | if (result.ok() && !del.ok()) { |
| 1568 | result = del; |
| 1569 | } |
| 1570 | } |
| 1571 | } |
| 1572 | env->UnlockFile(lock); // Ignore error since state is already gone |
gabor@google.com | 6699c7e | 2011-07-15 00:20:57 +0000 | [diff] [blame] | 1573 | env->DeleteFile(lockname); |
jorlow@chromium.org | f67e15e | 2011-03-18 22:37:00 +0000 | [diff] [blame] | 1574 | env->DeleteDir(dbname); // Ignore error in case dir contains other files |
| 1575 | } |
| 1576 | return result; |
| 1577 | } |
| 1578 | |
Hans Wennborg | 36a5f8e | 2011-10-31 17:22:06 +0000 | [diff] [blame] | 1579 | } // namespace leveldb |