8000 Support GetFileSize API in FSRandomAccessFile by xingbowang · Pull Request #13676 · facebook/rocksdb · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Support GetFileSize API in FSRandomAccessFile #13676

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions db/c_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2230,10 +2230,6 @@ int main(int argc, char** argv) {
rocksdb_options_set_skip_stats_update_on_db_open(o, 1);
CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(o));

rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(o, 1);
CheckCondition(
1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(o));

rocksdb_options_set_max_write_buffer_number(o, 97);
CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(o));

Expand Down Expand Up @@ -2493,8 +2489,6 @@ int main(int argc, char** argv) {
CheckCondition(2.0 ==
rocksdb_options_get_max_bytes_for_level_multiplier(copy));
CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(copy));
CheckCondition(
1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(copy));
CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(copy));
CheckCondition(23 ==
rocksdb_options_get_min_write_buffer_number_to_merge(copy));
Expand Down Expand Up @@ -2681,12 +2675,6 @@ int main(int argc, char** argv) {
CheckCondition(0 == rocksdb_options_get_skip_stats_update_on_db_open(copy));
CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(o));

rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(copy, 0);
CheckCondition(
0 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(copy));
CheckCondition(
1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(o));

rocksdb_options_set_max_write_buffer_number(copy, 2000);
CheckCondition(2000 == rocksdb_options_get_max_write_buffer_number(copy));
CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(o));
Expand Down
4 changes: 2 additions & 2 deletions db/corruption_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -579,9 +579,9 @@ TEST_F(CorruptionTest, TableFileWrongSize) {
// DB actually accepts this without paranoid checks, relying on size
// recorded in manifest to locate the SST footer.
options_.paranoid_checks = false;
options_.skip_checking_sst_file_sizes_on_db_open = false;
Reopen();
Check(100, 100);
// As footer could not be extraced, file is completely unreadable
Check(0, 0);

// But reports the issue with paranoid checks
options_.paranoid_checks = true;
Expand Down
57 changes: 53 additions & 4 deletions db/db_encryption_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ class DBEncryptionTest : public DBTestBase {
public:
DBEncryptionTest()
: DBTestBase("db_encryption_test", /*env_do_fsync=*/true) {}
Env* GetTargetEnv() {
Env* GetNonEncryptedEnv() {
if (encrypted_env_ != < 6D4E span class="pl-c1">nullptr) {
return (static_cast<EnvWrapper*>(encrypted_env_))->target();
return (dynamic_cast<CompositeEnvWrapper*>(encrypted_env_))->env_target();
} else {
return env_;
}
Expand All @@ -38,7 +38,7 @@ TEST_F(DBEncryptionTest, CheckEncrypted) {
auto status = env_->GetChildren(dbname_, &fileNames);
ASSERT_OK(status);

Env* target = GetTargetEnv();
Env* target = GetNonEncryptedEnv();
int hits = 0;
for (auto it = fileNames.begin(); it != fileNames.end(); ++it) {
if (*it == "LOCK") {
Expand Down Expand Up @@ -89,7 +89,7 @@ TEST_F(DBEncryptionTest, CheckEncrypted) {
}

TEST_F(DBEncryptionTest, ReadEmptyFile) {
auto defaultEnv = GetTargetEnv();
auto defaultEnv = GetNonEncryptedEnv();

// create empty file for reading it back in later
auto envOptions = EnvOptions(CurrentOptions());
Expand All @@ -116,6 +116,55 @@ TEST_F(DBEncryptionTest, ReadEmptyFile) {
ASSERT_TRUE(data.empty());
}

TEST_F(DBEncryptionTest, NotSupportedGetFileSize) {
// Encrypted envs do not support GetFileSize.
// Validate ReadFooterFromFile allowing not supported GetFileSize file system.
std::shared_ptr<FileSystem> fs;
if (encrypted_env_) {
fs = encrypted_env_->GetFileSystem();
} else {
fs = env_->GetFileSystem();
}

// create empty file for reading it back in later
auto envOptions = EnvOptions(CurrentOptions());
auto filePath = dbname_ + "/empty.empty";

Status status;
{
// create an empty file
std::unique_ptr<WritableFile> writableFile;
status = env_->NewWritableFile(filePath, &writableFile, envOptions);
ASSERT_OK(status);
}

// Open it for reading footer
std::unique_ptr<FSRandomAccessFile> randomAccessFile;
status = fs->NewRandomAccessFile(filePath, FileOptions(), &randomAccessFile,
nullptr);
ASSERT_OK(status);

auto randomAccessFileReader = std::make_unique<RandomAccessFileReader>(
std::move(randomAccessFile), filePath);

status = ReadFooterFromFile(IOOptions(), randomAccessFileReader.get(), *fs,
nullptr, 16 /* Wrong file size */, nullptr);
if (encrypted_env_) {
// For encrypted env, the GetFileSize API is not supported, so file size
// matching check is skipped. Instead it will return file size too short for
// footer validation.
ASSERT_TRUE(status.IsCorruption());
ASSERT_TRUE(status.ToString().find("file is too short") !=
std::string::npos);
} else {
// For non encrypted env, the GetFileSize API is supported, and should
// return value 0, which triggers file size not match error.
ASSERT_TRUE(status.IsCorruption());
ASSERT_TRUE(status.ToString().find("file size mismatch") !=
std::string::npos);
}
}

} // namespace ROCKSDB_NAMESPACE

int main(int argc, char** argv) {
Expand Down
79 changes: 0 additions & 79 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5044,85 +5044,6 @@ void DBImpl::GetAllColumnFamilyMetaData(
}
}

Status DBImpl::CheckConsistency() {
mutex_.AssertHeld();
std::vector<LiveFileMetaData> metadata;
versions_->GetLiveFilesMetaData(&metadata);
TEST_SYNC_POINT("DBImpl::CheckConsistency:AfterGetLiveFilesMetaData");

std::string corruption_messages;

if (immutable_db_options_.skip_checking_sst_file_sizes_on_db_open) {
// Instead of calling GetFileSize() for each expected file, call
// GetChildren() for the DB directory and check that all expected files
// are listed, without checking their sizes.
// Since sst files might be in different directories, do it for each
// directory separately.
std::map<std::string, std::vector<std::string>> files_by_directory;
for (const auto& md : metadata) {
// md.name has a leading "/". Remove it.
std::string fname = md.name;
if (!fname.empty() && fname[0] == '/') {
fname = fname.substr(1);
}
files_by_directory[md.db_path].push_back(fname);
}

IOOptions io_opts;
io_opts.do_not_recurse = true;
for (const auto& dir_files : files_by_directory) {
std::string directory = dir_files.first;
std::vector<std::string> existing_files;
Status s = fs_->GetChildren(directory, io_opts, &existing_files,
/*IODebugContext*=*/nullptr);
if (!s.ok()) {
corruption_messages +=
"Can't list files in " + directory + ": " + s.ToString() + "\n";
continue;
}
std::sort(existing_files.begin(), existing_files.end());

for (const std::string& fname : dir_files.second) {
if (!std::binary_search(existing_files.begin(), existing_files.end(),
fname) &&
!std::binary_search(existing_files.begin(), existing_files.end(),
Rocks2LevelTableFileName(fname))) {
corruption_messages +=
"Missing sst file " + fname + " in " + directory + "\n";
}
}
}
} else {
for (const auto& md : metadata) {
// md.name has a leading "/".
std::string file_path = md.db_path + md.name;

uint64_t fsize = 0;
TEST_SYNC_POINT("DBImpl::CheckConsistency:BeforeGetFileSize");
Status s = env_->GetFileSize(file_path, &fsize);
if (!s.ok() &&
env_->GetFileSize(Rocks2LevelTableFileName(file_path), &fsize).ok()) {
s = Status::OK();
}
if (!s.ok()) {
corruption_messages +=
"Can't access " + md.name + ": " + s.ToString() + "\n";
} else if (fsize != md.size) {
corruption_messages += "Sst file size mismatch: " + file_path +
". Size recorded in manifest " +
std::to_string(md.size) + ", actual size " +
std::to_string(fsize) + "\n";
}
}
}

if (corruption_messages.size() == 0) {
return Status::OK();
} else {
return Status::Corruption(corruption_messages);
}
}

Status DBImpl::GetDbIdentity(std::string& identity) const {
identity.assign(db_id_);
return Status::OK();
Expand Down
4 changes: 0 additions & 4 deletions db/db_impl/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -804,10 +804,6 @@ class DBImpl : public DB {
// being detected.
const Snapshot* GetSnapshotForWriteConflictBoundary();

// checks if all live files exist on file system and that their file sizes
// match to our in-memory records
virtual Status CheckConsistency();

// max_file_num_to_ignore allows bottom level compaction to filter out newly
// compacted SST files. Setting max_file_num_to_ignore to kMaxUint64 will
// disable the filtering
Expand Down
3 changes: 0 additions & 3 deletions db/db_impl/db_impl_follower.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ Status DBImplFollower::Recover(
}
return s;
}
if (immutable_db_options_.paranoid_checks && s.ok()) {
s = CheckConsistency();
}
if (s.ok()) {
default_cf_handle_ = new ColumnFamilyHandleImpl(
versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_);
Expand Down
9 changes: 0 additions & 9 deletions db/db_impl/db_impl_open.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,6 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src,
"wal_compression is disabled since only zstd is supported");
}

if (!result.paranoid_checks) {
result.skip_checking_sst_file_sizes_on_db_open = true;
ROCKS_LOG_INFO(result.info_log,
"file size check will be skipped during open.");
}

return result;
}

Expand Down Expand Up @@ -694,9 +688,6 @@ Status DBImpl::Recover(
s = MaybeUpdateNextFileNumber(recovery_ctx);
}

if (immutable_db_options_.paranoid_checks && s.ok()) {
s = CheckConsistency();
}
if (s.ok() && !read_only) {
// TODO: share file descriptors (FSDirectory) with SetDirectories above
std::map<std::string, std::shared_ptr<FSDirectory>> created_dirs;
Expand Down
46 changes: 0 additions & 46 deletions db/db_impl/db_impl_secondary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ Status DBImplSecondary::Recover(
}
return s;
}
if (immutable_db_options_.paranoid_checks && s.ok()) {
s = CheckConsistency();
}
// Initial max_total_in_memory_state_ before recovery logs.
max_total_in_memory_state_ = 0;
for (auto cfd : *versions_->GetColumnFamilySet()) {
Expand Down Expand Up @@ -653,49 +650,6 @@ Status DBImplSecondary::NewIterators(
return Status::OK();
}

Status DBImplSecondary::CheckConsistency() {
mutex_.AssertHeld();
Status s = DBImpl::CheckConsistency();
// If DBImpl::CheckConsistency() which is stricter returns success, then we
// do not need to give a second chance.
if (s.ok()) {
return s;
}
// It's possible that DBImpl::CheckConssitency() can fail because the primary
// may have removed certain files, causing the GetFileSize(name) call to
// fail and returning a PathNotFound. In this case, we take a best-effort
// approach and just proceed.
TEST_SYNC_POINT_CALLBACK(
"DBImplSecondary::CheckConsistency:AfterFirstAttempt", &s);

if (immutable_db_options_.skip_checking_sst_file_sizes_on_db_open) {
return Status::OK();
}

std::vector<LiveFileMetaData> metadata;
versions_->GetLiveFilesMetaData(&metadata);

std::string corruption_messages;
for (const auto& md : metadata) {
// md.name has a leading "/".
std::string file_path = md.db_path + md.name;

uint64_t fsize = 0;
s = env_->GetFileSize(file_path, &fsize);
if (!s.ok() &&
(env_->GetFileSize(Rocks2LevelTableFileName(file_path), &fsize).ok() ||
s.IsPathNotFound())) {
s = Status::OK();
}
if (!s.ok()) {
corruption_messages +=
"Can't access " + md.name + ": " + s.ToString() + "\n";
}
}
return corruption_messages.empty() ? Status::OK()
: Status::Corruption(corruption_messages);
}

Status DBImplSecondary::TryCatchUpWithPrimary() {
assert(versions_.get() != nullptr);
Status s;
Expand Down
6 changes: 0 additions & 6 deletions db/db_impl/db_impl_secondary.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,6 @@ class DBImplSecondary : public DBImpl {
Status MaybeInitLogReader(uint64_t log_number,
log::FragmentBufferedReader** log_reader);

// Check if all live files exist on file system and that their file sizes
// matche to the in-memory records. It is possible that some live files may
// have been deleted by the primary. In this case, CheckConsistency() does
// not flag the missing file as inconsistency.
Status CheckConsistency() override;

#ifndef NDEBUG
Status TEST_CompactWithoutInstallation(const OpenAndCompactOptions& options,
ColumnFamilyHandle* cfh,
Expand Down
43 changes: 3 additions & 40 deletions db/db_secondary_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,9 @@ class TraceFileEnv : public EnvWrapper {
char* scratch) const override {
return target_->Read(offset, n, result, scratch);
}
Status GetFileSize(uint64_t* file_size) override {
return target_->GetFileSize(file_size);
}

private:
std::unique_ptr<RandomAccessFile> target_;
Expand Down Expand Up @@ -1291,46 +1294,6 @@ TEST_F(DBSecondaryTest, CatchUpAfterFlush) {
ASSERT_OK(iter3->status());
}

TEST_F(DBSecondaryTest, CheckConsistencyWhenOpen) {
bool called = false;
Options options;
options.env = env_;
options.disable_auto_compactions = true;
Reopen(options);
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"DBImplSecondary::CheckConsistency:AfterFirstAttempt", [&](void* arg) {
ASSERT_NE(nullptr, arg);
called = true;
auto* s = static_cast<Status*>(arg);
ASSERT_NOK(*s);
});
SyncPoint::GetInstance()->LoadDependency(
{{"DBImpl::CheckConsistency:AfterGetLiveFilesMetaData",
"BackgroundCallCompaction:0"},
{"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles",
"DBImpl::CheckConsistency:BeforeGetFileSize"}});
SyncPoint::GetInstance()->EnableProcessing();

ASSERT_OK(Put("a", "value0"));
ASSERT_OK(Put("c", "value0"));
ASSERT_OK(Flush());
ASSERT_OK(Put("b", "value1"));
ASSERT_OK(Put("d", "value1"));
ASSERT_OK(Flush());
port::Thread thread([this]() {
Options opts;
opts.env = env_;
opts.max_open_files = -1;
OpenSecondary(opts);
});
ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_OK(dbfull()->TEST_WaitForCompact());
thread.join();
ASSERT_TRUE(called);
}

TEST_F(DBSecondaryTest, StartFromInconsistent) {
Options options = CurrentOptions();
DestroyAndReopen(options);
Expand Down
Loading
Loading
0