Squashed 'src/leveldb/' changes from aca1ffc..ae6c262
ae6c262 Merge branch 'leveldb' into ripple-fork 28fa222 Looks like a bit more delay is needed to smooth the latency. a18f3e6 Tidy up JobQueue, add ripple_core module ab82e57 Release leveldb 1.12 02c6259 Release leveldb 1.11 5bbb544 Rate limit compactions with a 25ms pause after each complete file. 8c29c47 LevelDB issue 178 fix: cannot resize a level 0 compaction set 18b245c Added GNU/kFreeBSD kernel name (TARGET_OS) 8be9d12 CondVar::SignalAll was broken, leading to deadlocks on Windows builds. http://code.google.com/p/leveldb/issues/detail?id=149 c9fc070 Upgrade LevelDB to 1.10.0, mostly for better write stall logging. 8215b15 Tweak to variable name to support unity build git-subtree-dir: src/leveldb git-subtree-split: ae6c2620b2ef3d5c69e63dc0eda865d6a39fa061
This commit is contained in:
@@ -35,6 +35,8 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
const int kNumNonTableCacheFiles = 10;
|
||||
|
||||
// Information kept for every waiting writer
|
||||
struct DBImpl::Writer {
|
||||
Status status;
|
||||
@@ -92,9 +94,9 @@ Options SanitizeOptions(const std::string& dbname,
|
||||
Options result = src;
|
||||
result.comparator = icmp;
|
||||
result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
|
||||
ClipToRange(&result.max_open_files, 20, 50000);
|
||||
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
||||
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
||||
ClipToRange(&result.max_open_files, 64 + kNumNonTableCacheFiles, 50000);
|
||||
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
||||
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
||||
if (result.info_log == NULL) {
|
||||
// Open a log file in the same directory as the db
|
||||
src.env->CreateDir(dbname); // In case it does not exist
|
||||
@@ -130,12 +132,13 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
||||
log_(NULL),
|
||||
tmp_batch_(new WriteBatch),
|
||||
bg_compaction_scheduled_(false),
|
||||
manual_compaction_(NULL) {
|
||||
manual_compaction_(NULL),
|
||||
consecutive_compaction_errors_(0) {
|
||||
mem_->Ref();
|
||||
has_imm_.Release_Store(NULL);
|
||||
|
||||
// Reserve ten files or so for other uses and give the rest to TableCache.
|
||||
const int table_cache_size = options.max_open_files - 10;
|
||||
const int table_cache_size = options.max_open_files - kNumNonTableCacheFiles;
|
||||
table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
|
||||
|
||||
versions_ = new VersionSet(dbname_, &options_, table_cache_,
|
||||
@@ -310,16 +313,24 @@ Status DBImpl::Recover(VersionEdit* edit) {
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
std::set<uint64_t> expected;
|
||||
versions_->AddLiveFiles(&expected);
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
std::vector<uint64_t> logs;
|
||||
for (size_t i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type)
|
||||
&& type == kLogFile
|
||||
&& ((number >= min_log) || (number == prev_log))) {
|
||||
if (ParseFileName(filenames[i], &number, &type)) {
|
||||
expected.erase(number);
|
||||
if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
|
||||
logs.push_back(number);
|
||||
}
|
||||
}
|
||||
if (!expected.empty()) {
|
||||
char buf[50];
|
||||
snprintf(buf, sizeof(buf), "%d missing files; e.g.",
|
||||
static_cast<int>(expected.size()));
|
||||
return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
|
||||
}
|
||||
|
||||
// Recover in the order in which the logs were generated
|
||||
std::sort(logs.begin(), logs.end());
|
||||
@@ -611,6 +622,7 @@ void DBImpl::BackgroundCall() {
|
||||
Status s = BackgroundCompaction();
|
||||
if (s.ok()) {
|
||||
// Success
|
||||
consecutive_compaction_errors_ = 0;
|
||||
} else if (shutting_down_.Acquire_Load()) {
|
||||
// Error most likely due to shutdown; do not wait
|
||||
} else {
|
||||
@@ -622,7 +634,12 @@ void DBImpl::BackgroundCall() {
|
||||
Log(options_.info_log, "Waiting after background compaction error: %s",
|
||||
s.ToString().c_str());
|
||||
mutex_.Unlock();
|
||||
env_->SleepForMicroseconds(1000000);
|
||||
++consecutive_compaction_errors_;
|
||||
int seconds_to_sleep = 1;
|
||||
for (int i = 0; i < 3 && i < consecutive_compaction_errors_ - 1; ++i) {
|
||||
seconds_to_sleep *= 2;
|
||||
}
|
||||
env_->SleepForMicroseconds(seconds_to_sleep * 1000000);
|
||||
mutex_.Lock();
|
||||
}
|
||||
}
|
||||
@@ -805,6 +822,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
|
||||
(unsigned long long) output_number,
|
||||
(unsigned long long) current_entries,
|
||||
(unsigned long long) current_bytes);
|
||||
|
||||
// rate-limit compaction file creation with a 100ms pause
|
||||
env_->SleepForMicroseconds(100000);
|
||||
}
|
||||
}
|
||||
return s;
|
||||
@@ -1268,10 +1288,11 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
||||
} else if (imm_ != NULL) {
|
||||
// We have filled up the current memtable, but the previous
|
||||
// one is still being compacted, so we wait.
|
||||
Log(options_.info_log, "Current memtable full; waiting...\n");
|
||||
bg_cv_.Wait();
|
||||
} else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
|
||||
// There are too many level-0 files.
|
||||
Log(options_.info_log, "waiting...\n");
|
||||
Log(options_.info_log, "Too many L0 files; waiting...\n");
|
||||
bg_cv_.Wait();
|
||||
} else {
|
||||
// Attempt to switch to a new memtable and trigger compaction of old
|
||||
|
||||
@@ -163,6 +163,7 @@ class DBImpl : public DB {
|
||||
|
||||
// Have we encountered a background error in paranoid mode?
|
||||
Status bg_error_;
|
||||
int consecutive_compaction_errors_;
|
||||
|
||||
// Per level compaction stats. stats_[level] stores the stats for
|
||||
// compactions that produced data for the specified "level".
|
||||
|
||||
@@ -33,8 +33,11 @@ class AtomicCounter {
|
||||
public:
|
||||
AtomicCounter() : count_(0) { }
|
||||
void Increment() {
|
||||
IncrementBy(1);
|
||||
}
|
||||
void IncrementBy(int count) {
|
||||
MutexLock l(&mu_);
|
||||
count_++;
|
||||
count_ += count;
|
||||
}
|
||||
int Read() {
|
||||
MutexLock l(&mu_);
|
||||
@@ -45,6 +48,10 @@ class AtomicCounter {
|
||||
count_ = 0;
|
||||
}
|
||||
};
|
||||
|
||||
void DelayMilliseconds(int millis) {
|
||||
Env::Default()->SleepForMicroseconds(millis * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
// Special Env used to delay background operations
|
||||
@@ -69,6 +76,7 @@ class SpecialEnv : public EnvWrapper {
|
||||
AtomicCounter random_read_counter_;
|
||||
|
||||
AtomicCounter sleep_counter_;
|
||||
AtomicCounter sleep_time_counter_;
|
||||
|
||||
explicit SpecialEnv(Env* base) : EnvWrapper(base) {
|
||||
delay_sstable_sync_.Release_Store(NULL);
|
||||
@@ -103,7 +111,7 @@ class SpecialEnv : public EnvWrapper {
|
||||
Status Flush() { return base_->Flush(); }
|
||||
Status Sync() {
|
||||
while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
|
||||
env_->SleepForMicroseconds(100000);
|
||||
DelayMilliseconds(100);
|
||||
}
|
||||
return base_->Sync();
|
||||
}
|
||||
@@ -174,8 +182,9 @@ class SpecialEnv : public EnvWrapper {
|
||||
|
||||
virtual void SleepForMicroseconds(int micros) {
|
||||
sleep_counter_.Increment();
|
||||
target()->SleepForMicroseconds(micros);
|
||||
sleep_time_counter_.IncrementBy(micros);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class DBTest {
|
||||
@@ -461,6 +470,20 @@ class DBTest {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool DeleteAnSSTFile() {
|
||||
std::vector<std::string> filenames;
|
||||
ASSERT_OK(env_->GetChildren(dbname_, &filenames));
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
for (size_t i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
|
||||
ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number)));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(DBTest, Empty) {
|
||||
@@ -611,7 +634,7 @@ TEST(DBTest, GetEncountersEmptyLevel) {
|
||||
}
|
||||
|
||||
// Step 4: Wait for compaction to finish
|
||||
env_->SleepForMicroseconds(1000000);
|
||||
DelayMilliseconds(1000);
|
||||
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||
} while (ChangeOptions());
|
||||
@@ -1295,7 +1318,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_a) {
|
||||
Reopen();
|
||||
Reopen();
|
||||
ASSERT_EQ("(a->v)", Contents());
|
||||
env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
|
||||
DelayMilliseconds(1000); // Wait for compaction to finish
|
||||
ASSERT_EQ("(a->v)", Contents());
|
||||
}
|
||||
|
||||
@@ -1311,7 +1334,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_b) {
|
||||
Put("","");
|
||||
Reopen();
|
||||
Put("","");
|
||||
env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
|
||||
DelayMilliseconds(1000); // Wait for compaction to finish
|
||||
Reopen();
|
||||
Put("d","dv");
|
||||
Reopen();
|
||||
@@ -1321,7 +1344,7 @@ TEST(DBTest, L0_CompactionBug_Issue44_b) {
|
||||
Delete("b");
|
||||
Reopen();
|
||||
ASSERT_EQ("(->)(c->cv)", Contents());
|
||||
env_->SleepForMicroseconds(1000000); // Wait for compaction to finish
|
||||
DelayMilliseconds(1000); // Wait for compaction to finish
|
||||
ASSERT_EQ("(->)(c->cv)", Contents());
|
||||
}
|
||||
|
||||
@@ -1506,6 +1529,30 @@ TEST(DBTest, NoSpace) {
|
||||
ASSERT_GE(env_->sleep_counter_.Read(), 5);
|
||||
}
|
||||
|
||||
TEST(DBTest, ExponentialBackoff) {
|
||||
Options options = CurrentOptions();
|
||||
options.env = env_;
|
||||
Reopen(&options);
|
||||
|
||||
ASSERT_OK(Put("foo", "v1"));
|
||||
ASSERT_EQ("v1", Get("foo"));
|
||||
Compact("a", "z");
|
||||
env_->non_writable_.Release_Store(env_); // Force errors for new files
|
||||
env_->sleep_counter_.Reset();
|
||||
env_->sleep_time_counter_.Reset();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
dbfull()->TEST_CompactRange(2, NULL, NULL);
|
||||
}
|
||||
env_->non_writable_.Release_Store(NULL);
|
||||
|
||||
// Wait for compaction to finish
|
||||
DelayMilliseconds(1000);
|
||||
|
||||
ASSERT_GE(env_->sleep_counter_.Read(), 5);
|
||||
ASSERT_LT(env_->sleep_counter_.Read(), 10);
|
||||
ASSERT_GE(env_->sleep_time_counter_.Read(), 10e6);
|
||||
}
|
||||
|
||||
TEST(DBTest, NonWritableFileSystem) {
|
||||
Options options = CurrentOptions();
|
||||
options.write_buffer_size = 1000;
|
||||
@@ -1519,7 +1566,7 @@ TEST(DBTest, NonWritableFileSystem) {
|
||||
fprintf(stderr, "iter %d; errors %d\n", i, errors);
|
||||
if (!Put("foo", big).ok()) {
|
||||
errors++;
|
||||
env_->SleepForMicroseconds(100000);
|
||||
DelayMilliseconds(100);
|
||||
}
|
||||
}
|
||||
ASSERT_GT(errors, 0);
|
||||
@@ -1567,6 +1614,24 @@ TEST(DBTest, ManifestWriteError) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DBTest, MissingSSTFile) {
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
|
||||
// Dump the memtable to disk.
|
||||
dbfull()->TEST_CompactMemTable();
|
||||
ASSERT_EQ("bar", Get("foo"));
|
||||
|
||||
Close();
|
||||
ASSERT_TRUE(DeleteAnSSTFile());
|
||||
Options options = CurrentOptions();
|
||||
options.paranoid_checks = true;
|
||||
Status s = TryReopen(&options);
|
||||
ASSERT_TRUE(!s.ok());
|
||||
ASSERT_TRUE(s.ToString().find("issing") != std::string::npos)
|
||||
<< s.ToString();
|
||||
}
|
||||
|
||||
TEST(DBTest, FilesDeletedAfterCompaction) {
|
||||
ASSERT_OK(Put("foo", "v2"));
|
||||
Compact("a", "z");
|
||||
@@ -1711,13 +1776,13 @@ TEST(DBTest, MultiThreaded) {
|
||||
}
|
||||
|
||||
// Let them run for a while
|
||||
env_->SleepForMicroseconds(kTestSeconds * 1000000);
|
||||
DelayMilliseconds(kTestSeconds * 1000);
|
||||
|
||||
// Stop the threads and wait for them to finish
|
||||
mt.stop.Release_Store(&mt);
|
||||
for (int id = 0; id < kNumThreads; id++) {
|
||||
while (mt.thread_done[id].Acquire_Load() == NULL) {
|
||||
env_->SleepForMicroseconds(100000);
|
||||
DelayMilliseconds(100);
|
||||
}
|
||||
}
|
||||
} while (ChangeOptions());
|
||||
|
||||
@@ -26,7 +26,7 @@ std::string ParsedInternalKey::DebugString() const {
|
||||
(unsigned long long) sequence,
|
||||
int(type));
|
||||
std::string result = "'";
|
||||
result += user_key.ToString();
|
||||
result += EscapeString(user_key.ToString());
|
||||
result += buf;
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ TEST(FileNameTest, Parse) {
|
||||
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
||||
std::string f = errors[i];
|
||||
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
TEST(FileNameTest, Construction) {
|
||||
|
||||
@@ -1331,14 +1331,19 @@ Compaction* VersionSet::CompactRange(
|
||||
}
|
||||
|
||||
// Avoid compacting too much in one shot in case the range is large.
|
||||
const uint64_t limit = MaxFileSizeForLevel(level);
|
||||
uint64_t total = 0;
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
uint64_t s = inputs[i]->file_size;
|
||||
total += s;
|
||||
if (total >= limit) {
|
||||
inputs.resize(i + 1);
|
||||
break;
|
||||
// But we cannot do this for level-0 since level-0 files can overlap
|
||||
// and we must not pick one file and drop another older file if the
|
||||
// two files overlap.
|
||||
if (level > 0) {
|
||||
const uint64_t limit = MaxFileSizeForLevel(level);
|
||||
uint64_t total = 0;
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
uint64_t s = inputs[i]->file_size;
|
||||
total += s;
|
||||
if (total >= limit) {
|
||||
inputs.resize(i + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user