Merge src/leveldb changes for LevelDB 1.15

This commit is contained in:
Pieter Wuille
2013-12-12 22:08:18 +01:00
31 changed files with 449 additions and 366 deletions

View File

@@ -176,147 +176,43 @@ class PosixMmapReadableFile: public RandomAccessFile {
}
};
// We preallocate up to an extra megabyte and use memcpy to append new
// data to the file. This is safe since we either properly close the
// file before reading from it, or for log files, the reading code
// knows enough to skip zero suffixes.
class PosixMmapFile : public WritableFile {
class PosixWritableFile : public WritableFile {
private:
std::string filename_;
int fd_;
size_t page_size_;
size_t map_size_; // How much extra memory to map at a time
char* base_; // The mapped region
char* limit_; // Limit of the mapped region
char* dst_; // Where to write next (in range [base_,limit_])
char* last_sync_; // Where have we synced up to
uint64_t file_offset_; // Offset of base_ in file
// Have we done an munmap of unsynced data?
bool pending_sync_;
// Roundup x to a multiple of y
static size_t Roundup(size_t x, size_t y) {
return ((x + y - 1) / y) * y;
}
size_t TruncateToPageBoundary(size_t s) {
s -= (s & (page_size_ - 1));
assert((s % page_size_) == 0);
return s;
}
bool UnmapCurrentRegion() {
bool result = true;
if (base_ != NULL) {
if (last_sync_ < limit_) {
// Defer syncing this data until next Sync() call, if any
pending_sync_ = true;
}
if (munmap(base_, limit_ - base_) != 0) {
result = false;
}
file_offset_ += limit_ - base_;
base_ = NULL;
limit_ = NULL;
last_sync_ = NULL;
dst_ = NULL;
// Increase the amount we map the next time, but capped at 1MB
if (map_size_ < (1<<20)) {
map_size_ *= 2;
}
}
return result;
}
bool MapNewRegion() {
assert(base_ == NULL);
if (ftruncate(fd_, file_offset_ + map_size_) < 0) {
return false;
}
void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED,
fd_, file_offset_);
if (ptr == MAP_FAILED) {
return false;
}
base_ = reinterpret_cast<char*>(ptr);
limit_ = base_ + map_size_;
dst_ = base_;
last_sync_ = base_;
return true;
}
FILE* file_;
public:
PosixMmapFile(const std::string& fname, int fd, size_t page_size)
: filename_(fname),
fd_(fd),
page_size_(page_size),
map_size_(Roundup(65536, page_size)),
base_(NULL),
limit_(NULL),
dst_(NULL),
last_sync_(NULL),
file_offset_(0),
pending_sync_(false) {
assert((page_size & (page_size - 1)) == 0);
}
PosixWritableFile(const std::string& fname, FILE* f)
: filename_(fname), file_(f) { }
~PosixMmapFile() {
if (fd_ >= 0) {
PosixMmapFile::Close();
~PosixWritableFile() {
if (file_ != NULL) {
// Ignoring any potential errors
fclose(file_);
}
}
virtual Status Append(const Slice& data) {
const char* src = data.data();
size_t left = data.size();
while (left > 0) {
assert(base_ <= dst_);
assert(dst_ <= limit_);
size_t avail = limit_ - dst_;
if (avail == 0) {
if (!UnmapCurrentRegion() ||
!MapNewRegion()) {
return IOError(filename_, errno);
}
}
size_t n = (left <= avail) ? left : avail;
memcpy(dst_, src, n);
dst_ += n;
src += n;
left -= n;
size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_);
if (r != data.size()) {
return IOError(filename_, errno);
}
return Status::OK();
}
virtual Status Close() {
Status s;
size_t unused = limit_ - dst_;
if (!UnmapCurrentRegion()) {
s = IOError(filename_, errno);
} else if (unused > 0) {
// Trim the extra space at the end of the file
if (ftruncate(fd_, file_offset_ - unused) < 0) {
s = IOError(filename_, errno);
}
Status result;
if (fclose(file_) != 0) {
result = IOError(filename_, errno);
}
if (close(fd_) < 0) {
if (s.ok()) {
s = IOError(filename_, errno);
}
}
fd_ = -1;
base_ = NULL;
limit_ = NULL;
return s;
file_ = NULL;
return result;
}
virtual Status Flush() {
if (fflush_unlocked(file_) != 0) {
return IOError(filename_, errno);
}
return Status::OK();
}
@@ -353,26 +249,10 @@ class PosixMmapFile : public WritableFile {
if (!s.ok()) {
return s;
}
if (pending_sync_) {
// Some unmapped data was not synced
pending_sync_ = false;
if (fdatasync(fd_) < 0) {
s = IOError(filename_, errno);
}
if (fflush_unlocked(file_) != 0 ||
fdatasync(fileno(file_)) != 0) {
s = Status::IOError(filename_, strerror(errno));
}
if (dst_ > last_sync_) {
// Find the beginnings of the pages that contain the first and last
// bytes to be synced.
size_t p1 = TruncateToPageBoundary(last_sync_ - base_);
size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1);
last_sync_ = dst_;
if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) {
s = IOError(filename_, errno);
}
}
return s;
}
};
@@ -463,12 +343,12 @@ class PosixEnv : public Env {
virtual Status NewWritableFile(const std::string& fname,
WritableFile** result) {
Status s;
const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
if (fd < 0) {
FILE* f = fopen(fname.c_str(), "w");
if (f == NULL) {
*result = NULL;
s = IOError(fname, errno);
} else {
*result = new PosixMmapFile(fname, fd, page_size_);
*result = new PosixWritableFile(fname, f);
}
return s;
}
@@ -631,7 +511,6 @@ class PosixEnv : public Env {
return NULL;
}
size_t page_size_;
pthread_mutex_t mu_;
pthread_cond_t bgsignal_;
pthread_t bgthread_;
@@ -646,8 +525,7 @@ class PosixEnv : public Env {
MmapLimiter mmap_limit_;
};
PosixEnv::PosixEnv() : page_size_(getpagesize()),
started_bgthread_(false) {
PosixEnv::PosixEnv() : started_bgthread_(false) {
PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL));
PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL));
}