// Note that if file_size is zero, the file has been deleted and // should not be added to the manifest. int level = 0; if (s.ok() && meta.file_size > 0) { const Slice min_user_key = meta.smallest.user_key(); const Slice max_user_key = meta.largest.user_key(); if (base != nullptr) { // 在所有无文件和该 SSTable 不发生 key 重叠的 Level 中 // 选择一个最大的 Level 插入该文件 level = base->PickLevelForMemTableOutput(min_user_key, max_user_key); } // 将文件加入到文件列表中 edit->AddFile(level, meta.number, meta.file_size, meta.smallest, meta.largest); }
// table.cc Status Table::Open(const Options& options, RandomAccessFile* file, uint64_t size, Table** table){ *table = nullptr; if (size < Footer::kEncodedLength) { return Status::Corruption("file is too short to be an sstable"); }
// 读取 footer char footer_space[Footer::kEncodedLength]; Slice footer_input; Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength, &footer_input, footer_space); if (!s.ok()) return s;
Footer footer; s = footer.DecodeFrom(&footer_input); if (!s.ok()) return s;
// 读取 index block BlockContents index_block_contents; ReadOptions opt; if (options.paranoid_checks) { opt.verify_checksums = true; } s = ReadBlock(file, opt, footer.index_handle(), &index_block_contents);
if (s.ok()) { // We've successfully read the footer and the index block: we're // ready to serve requests. Block* index_block = newBlock(index_block_contents); Rep* rep = new Table::Rep; rep->options = options; rep->file = file; rep->metaindex_handle = footer.metaindex_handle(); rep->index_block = index_block; rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0); rep->filter_data = nullptr; rep->filter = nullptr; // 生成新的 Table 对象,并且读取 BloomFilter *table = newTable(rep); (*table)->ReadMeta(footer); }
return s; }
上面是在 Open 函数中读取文件 Footer 和 IndexBlock 的代码,在 table->ReadMeta 中会读取文件的 MetaIndexBlock(如果存在的话)。
// block.cc voidSeek(const Slice& target)override{ // Binary search in restart array to find the last restart point // with a key < target uint32_t left = 0; uint32_t right = num_restarts_ - 1; int current_key_compare = 0;
// 如果已经进行过查找了,那么就检查上一轮查找最终结束的位置是在当前 key 的左边还是右边 if (Valid()) { // If we're already scanning, use the current position as a starting // point. This is beneficial if the key we're seeking to is ahead of the // current position. current_key_compare = Compare(key_, target); if (current_key_compare < 0) { // key_ is smaller than target left = restart_index_; } elseif (current_key_compare > 0) { right = restart_index_; } else { // We're seeking to the key we're already at. return; } }
// 使用二分查找寻找这个 key 所在的位置 while (left < right) { uint32_t mid = (left + right + 1) / 2; uint32_t region_offset = GetRestartPoint(mid); uint32_t shared, non_shared, value_length; constchar* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_, &shared, &non_shared, &value_length); if (key_ptr == nullptr || (shared != 0)) { CorruptionError(); return; } Slice mid_key(key_ptr, non_shared); if (Compare(mid_key, target) < 0) { // Key at "mid" is smaller than "target". Therefore all // blocks before "mid" are uninteresting. left = mid; } else { // Key at "mid" is >= "target". Therefore all blocks at or // after "mid" are uninteresting. right = mid - 1; } }
// We might be able to use our current position within the restart block. // This is true if we determined the key we desire is in the current block // and is after than the current key. assert(current_key_compare == 0 || Valid()); // 如果我们当前已经在这个 RestartPoint 指向的 Entry 里了(上一次查找) // 并且我们要找的键就在这个 Entry 中,那么我们就可以不用 seek bool skip_seek = left == restart_index_ && current_key_compare < 0; if (!skip_seek) { // 跳转到 key 所在的 RestartPoint 指向的 Entry SeekToRestartPoint(left); } // 在一个 Entry 中从头往后读 // Linear search (within restart block) for first key >= target while (true) { if (!ParseNextKey()) { return; } if (Compare(key_, target) >= 0) { return; } } }