/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/plain_table_index.cc

Source (jump to first uncovered line)
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
// The following only applies to changes made to this file as part of YugaByte development.
//
// Portions Copyright (c) YugaByte, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License.  You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied.  See the License for the specific language governing permissions and limitations
// under the License.
//

#ifndef ROCKSDB_LITE

#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif

#include <inttypes.h>

#include "yb/rocksdb/table/plain_table_index.h"
#include "yb/rocksdb/env.h"
#include "yb/rocksdb/util/coding.h"
#include "yb/rocksdb/util/hash.h"

namespace rocksdb {

namespace {
inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) {
  assert(num_buckets > 0);
  return hash % num_buckets;
}
}

Status PlainTableIndex::InitFromRawData(Slice data) {
  if (!GetVarint32(&data, &index_size_)) {
    return STATUS(Corruption, "Couldn't read the index size!");
  }
  assert(index_size_ > 0);
  if (!GetVarint32(&data, &num_prefixes_)) {
    return STATUS(Corruption, "Couldn't read the index size!");
  }
  sub_index_size_ =
      static_cast<uint32_t>(data.size()) - index_size_ * kOffsetLen;

  char* index_data_begin = const_cast<char*>(data.cdata());
  index_ = reinterpret_cast<uint32_t*>(index_data_begin);
  sub_index_ = reinterpret_cast<char*>(index_ + index_size_);
  return Status::OK();
}

PlainTableIndex::IndexSearchResult PlainTableIndex::GetOffset(
    uint32_t prefix_hash, uint32_t* bucket_value) const {
  int bucket = GetBucketIdFromHash(prefix_hash, index_size_);
  *bucket_value = index_[bucket];
  if ((*bucket_value & kSubIndexMask) == kSubIndexMask) {
    *bucket_value ^= kSubIndexMask;
    return kSubindex;
  }
  if (*bucket_value >= kMaxFileSize) {
    return kNoPrefixForBucket;
  } else {
    // point directly to the file
    return kDirectToFile;
  }
}

void PlainTableIndexBuilder::IndexRecordList::AddRecord(uint32_t hash,
                                                        uint32_t offset) {
  if (num_records_in_current_group_ == kNumRecordsPerGroup) {
    current_group_ = AllocateNewGroup();
    num_records_in_current_group_ = 0;
  }
  auto& new_record = current_group_[num_records_in_current_group_++];
  new_record.hash = hash;
  new_record.offset = offset;
  new_record.next = nullptr;
}

void PlainTableIndexBuilder::AddKeyPrefix(Slice key_prefix_slice,
                                          uint32_t key_offset) {
  if (is_first_record_ || prev_key_prefix_ != key_prefix_slice.ToString()972k) {
    ++num_prefixes_;
    if (!is_first_record_) {
      keys_per_prefix_hist_.Add(num_keys_per_prefix_);
    }
    num_keys_per_prefix_ = 0;
    prev_key_prefix_ = key_prefix_slice.ToString();
    prev_key_prefix_hash_ = GetSliceHash(key_prefix_slice);
    due_index_ = true;
  }

  if (due_index_) {
    // Add an index key for every kIndexIntervalForSamePrefixKeys keys
    record_list_.AddRecord(prev_key_prefix_hash_, key_offset);
    due_index_ = false;
  }

  num_keys_per_prefix_++;
  if (index_sparseness_ == 0 || num_keys_per_prefix_ % index_sparseness_ == 0975k) {
    due_index_ = true;
  }
  is_first_record_ = false;
}

Slice PlainTableIndexBuilder::Finish() {
  AllocateIndex();
  std::vector<IndexRecord*> hash_to_offsets(index_size_, nullptr);
  std::vector<uint32_t> entries_per_bucket(index_size_, 0);
  BucketizeIndexes(&hash_to_offsets, &entries_per_bucket);

  keys_per_prefix_hist_.Add(num_keys_per_prefix_);
  RLOG(InfoLogLevel::INFO_LEVEL, ioptions_.info_log,
      "Number of Keys per prefix Histogram: %s",
      keys_per_prefix_hist_.ToString().c_str());

  // From the temp data structure, populate indexes.
  return FillIndexes(hash_to_offsets, entries_per_bucket);
}

void PlainTableIndexBuilder::AllocateIndex() {
  if (prefix_extractor_ == nullptr || hash_table_ratio_ <= 02.75k) {
    // Fall back to pure binary search if the user fails to specify a prefix
    // extractor.
    index_size_ = 1;
  } else {
    double hash_table_size_multipier = 1.0 / hash_table_ratio_;
    index_size_ =
      static_cast<uint32_t>(num_prefixes_ * hash_table_size_multipier) + 1;
    assert(index_size_ > 0);
  }
}

void PlainTableIndexBuilder::BucketizeIndexes(
    std::vector<IndexRecord*>* hash_to_offsets,
    std::vector<uint32_t>* entries_per_bucket) {
  bool first = true;
  uint32_t prev_hash = 0;
  size_t num_records = record_list_.GetNumRecords();
  for (size_t i = 0; i < num_records; i++690k) {
    IndexRecord* index_record = record_list_.At(i);
    uint32_t cur_hash = index_record->hash;
    if (first || prev_hash != cur_hash687k) {
      prev_hash = cur_hash;
      first = false;
    }
    uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_);
    IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket];
    index_record->next = prev_bucket_head;
    (*hash_to_offsets)[bucket] = index_record;
    (*entries_per_bucket)[bucket]++;
  }

  sub_index_size_ = 0;
  for (auto entry_count : *entries_per_bucket) {
    if (entry_count <= 1) {
      continue;
    }
    // Only buckets with more than 1 entry will have subindex.
    sub_index_size_ += VarintLength(entry_count);
    // total bytes needed to store these entries' in-file offsets.
    sub_index_size_ += entry_count * PlainTableIndex::kOffsetLen;
  }
}

Slice PlainTableIndexBuilder::FillIndexes(
    const std::vector<IndexRecord*>& hash_to_offsets,
    const std::vector<uint32_t>& entries_per_bucket) {
  RLOG(InfoLogLevel::DEBUG_LEVEL, ioptions_.info_log,
      "Reserving %" PRIu32 " bytes for plain table's sub_index",
      sub_index_size_);
  auto total_allocate_size = GetTotalSize();
  char* allocated = arena_->AllocateAligned(
      total_allocate_size, huge_page_tlb_size_, ioptions_.info_log);

  auto temp_ptr = EncodeVarint32(allocated, index_size_);
  uint32_t* index =
      reinterpret_cast<uint32_t*>(EncodeVarint32(temp_ptr, num_prefixes_));
  char* sub_index = reinterpret_cast<char*>(index + index_size_);

  uint32_t sub_index_offset = 0;
  for (uint32_t i = 0; i < index_size_; i++900k) {
    uint32_t num_keys_for_bucket = entries_per_bucket[i];
    switch (num_keys_for_bucket) {
      case 0:
        // No key for bucket
        index[i] = PlainTableIndex::kMaxFileSize;
        break;
      case 1:
        // point directly to the file offset
        index[i] = hash_to_offsets[i]->offset;
        break;
      default:
        // point to second level indexes.
        index[i] = sub_index_offset | PlainTableIndex::kSubIndexMask;
        char* prev_ptr = &sub_index[sub_index_offset];
        char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket);
        sub_index_offset += static_cast<uint32_t>(cur_ptr - prev_ptr);
        char* sub_index_pos = &sub_index[sub_index_offset];
        IndexRecord* record = hash_to_offsets[i];
        int j;
        for (j = num_keys_for_bucket - 1; j >= 0 && record375k;
             j--, record = record->next) {
          EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset);
        }
        assert(j == -1 && record == nullptr);
        sub_index_offset += PlainTableIndex::kOffsetLen * num_keys_for_bucket;
        assert(sub_index_offset <= sub_index_size_);
        break;
    }
  }
  assert(sub_index_offset == sub_index_size_);

  RLOG(InfoLogLevel::DEBUG_LEVEL, ioptions_.info_log,
      "hash table size: %d, suffix_map length %" ROCKSDB_PRIszt, index_size_,
      sub_index_size_);
  return Slice(allocated, GetTotalSize());
}

const std::string PlainTableIndexBuilder::kPlainTableIndexBlock =
    "PlainTableIndexBlock";
};  // namespace rocksdb

#endif  // ROCKSDB_LITE

YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2		// This source code is licensed under the BSD-style license found in the
3		// LICENSE file in the root directory of this source tree. An additional grant
4		// of patent rights can be found in the PATENTS file in the same directory.
5		//
6		// The following only applies to changes made to this file as part of YugaByte development.
7		//
8		// Portions Copyright (c) YugaByte, Inc.
9		//
10		// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11		// in compliance with the License. You may obtain a copy of the License at
12		//
13		// http://www.apache.org/licenses/LICENSE-2.0
14		//
15		// Unless required by applicable law or agreed to in writing, software distributed under the License
16		// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17		// or implied. See the License for the specific language governing permissions and limitations
18		// under the License.
19		//
20
21		#ifndef ROCKSDB_LITE
22
23		#ifndef __STDC_FORMAT_MACROS
24		#define __STDC_FORMAT_MACROS
25		#endif
26
27		#include <inttypes.h>
28
29		#include "yb/rocksdb/table/plain_table_index.h"
30		#include "yb/rocksdb/env.h"
31		#include "yb/rocksdb/util/coding.h"
32		#include "yb/rocksdb/util/hash.h"
33
34		namespace rocksdb {
35
36		namespace {
37	814k	inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) {
38	814k	assert(num_buckets > 0);
39	0	return hash % num_buckets;
40	814k	}
41		}
42
43	3.10k	Status PlainTableIndex::InitFromRawData(Slice data) {
44	3.10k	if (!GetVarint32(&data, &index_size_)) {
45	0	return STATUS(Corruption, "Couldn't read the index size!");
46	0	}
47	3.10k	assert(index_size_ > 0);
48	3.10k	if (!GetVarint32(&data, &num_prefixes_)) {
49	0	return STATUS(Corruption, "Couldn't read the index size!");
50	0	}
51	3.10k	sub_index_size_ =
52	3.10k	static_cast<uint32_t>(data.size()) - index_size_ * kOffsetLen;
53
54	3.10k	char* index_data_begin = const_cast<char*>(data.cdata());
55	3.10k	index_ = reinterpret_cast<uint32_t*>(index_data_begin);
56	3.10k	sub_index_ = reinterpret_cast<char*>(index_ + index_size_);
57	3.10k	return Status::OK();
58	3.10k	}
59
60		PlainTableIndex::IndexSearchResult PlainTableIndex::GetOffset(
61	123k	uint32_t prefix_hash, uint32_t* bucket_value) const {
62	123k	int bucket = GetBucketIdFromHash(prefix_hash, index_size_);
63	123k	*bucket_value = index_[bucket];
64	123k	if ((*bucket_value & kSubIndexMask) == kSubIndexMask) {
65	87.0k	*bucket_value ^= kSubIndexMask;
66	87.0k	return kSubindex;
67	87.0k	}
68	36.2k	if (*bucket_value >= kMaxFileSize) {
69	876	return kNoPrefixForBucket;
70	35.4k	} else {
71		// point directly to the file
72	35.4k	return kDirectToFile;
73	35.4k	}
74	36.2k	}
75
76		void PlainTableIndexBuilder::IndexRecordList::AddRecord(uint32_t hash,
77	691k	uint32_t offset) {
78	691k	if (num_records_in_current_group_ == kNumRecordsPerGroup) {
79	5.18k	current_group_ = AllocateNewGroup();
80	5.18k	num_records_in_current_group_ = 0;
81	5.18k	}
82	691k	auto& new_record = current_group_[num_records_in_current_group_++];
83	691k	new_record.hash = hash;
84	691k	new_record.offset = offset;
85	691k	new_record.next = nullptr;
86	691k	}
87
88		void PlainTableIndexBuilder::AddKeyPrefix(Slice key_prefix_slice,
89	975k	uint32_t key_offset) {
90	975k	if (is_first_record_ \|\| prev_key_prefix_ != key_prefix_slice.ToString()972k ) {
91	674k	++num_prefixes_;
92	674k	if (!is_first_record_) {
93	671k	keys_per_prefix_hist_.Add(num_keys_per_prefix_);
94	671k	}
95	674k	num_keys_per_prefix_ = 0;
96	674k	prev_key_prefix_ = key_prefix_slice.ToString();
97	674k	prev_key_prefix_hash_ = GetSliceHash(key_prefix_slice);
98	674k	due_index_ = true;
99	674k	}
100
101	975k	if (due_index_) {
102		// Add an index key for every kIndexIntervalForSamePrefixKeys keys
103	691k	record_list_.AddRecord(prev_key_prefix_hash_, key_offset);
104	691k	due_index_ = false;
105	691k	}
106
107	975k	num_keys_per_prefix_++;
108	975k	if (index_sparseness_ == 0 \|\| num_keys_per_prefix_ % index_sparseness_ == 0975k ) {
109	17.0k	due_index_ = true;
110	17.0k	}
111	975k	is_first_record_ = false;
112	975k	}
113
114	3.10k	Slice PlainTableIndexBuilder::Finish() {
115	3.10k	AllocateIndex();
116	3.10k	std::vector<IndexRecord*> hash_to_offsets(index_size_, nullptr);
117	3.10k	std::vector<uint32_t> entries_per_bucket(index_size_, 0);
118	3.10k	BucketizeIndexes(&hash_to_offsets, &entries_per_bucket);
119
120	3.10k	keys_per_prefix_hist_.Add(num_keys_per_prefix_);
121	3.10k	RLOG(InfoLogLevel::INFO_LEVEL, ioptions_.info_log,
122	3.10k	"Number of Keys per prefix Histogram: %s",
123	3.10k	keys_per_prefix_hist_.ToString().c_str());
124
125		// From the temp data structure, populate indexes.
126	3.10k	return FillIndexes(hash_to_offsets, entries_per_bucket);
127	3.10k	}
128
129	3.10k	void PlainTableIndexBuilder::AllocateIndex() {
130	3.10k	if (prefix_extractor_ == nullptr \|\| hash_table_ratio_ <= 02.75k ) {
131		// Fall back to pure binary search if the user fails to specify a prefix
132		// extractor.
133	370	index_size_ = 1;
134	2.73k	} else {
135	2.73k	double hash_table_size_multipier = 1.0 / hash_table_ratio_;
136	2.73k	index_size_ =
137	2.73k	static_cast<uint32_t>(num_prefixes_ * hash_table_size_multipier) + 1;
138	2.73k	assert(index_size_ > 0);
139	2.73k	}
140	3.10k	}
141
142		void PlainTableIndexBuilder::BucketizeIndexes(
143		std::vector<IndexRecord> hash_to_offsets,
144	3.10k	std::vector<uint32_t>* entries_per_bucket) {
145	3.10k	bool first = true;
146	3.10k	uint32_t prev_hash = 0;
147	3.10k	size_t num_records = record_list_.GetNumRecords();
148	694k	for (size_t i = 0; i < num_records; i++690k ) {
149	690k	IndexRecord* index_record = record_list_.At(i);
150	690k	uint32_t cur_hash = index_record->hash;
151	690k	if (first \|\| prev_hash != cur_hash687k ) {
152	674k	prev_hash = cur_hash;
153	674k	first = false;
154	674k	}
155	690k	uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_);
156	690k	IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket];
157	690k	index_record->next = prev_bucket_head;
158	690k	(*hash_to_offsets)[bucket] = index_record;
159	690k	(*entries_per_bucket)[bucket]++;
160	690k	}
161
162	3.10k	sub_index_size_ = 0;
163	900k	for (auto entry_count : *entries_per_bucket) {
164	900k	if (entry_count <= 1) {
165	744k	continue;
166	744k	}
167		// Only buckets with more than 1 entry will have subindex.
168	156k	sub_index_size_ += VarintLength(entry_count);
169		// total bytes needed to store these entries' in-file offsets.
170	156k	sub_index_size_ += entry_count * PlainTableIndex::kOffsetLen;
171	156k	}
172	3.10k	}
173
174		Slice PlainTableIndexBuilder::FillIndexes(
175		const std::vector<IndexRecord*>& hash_to_offsets,
176	3.10k	const std::vector<uint32_t>& entries_per_bucket) {
177	3.10k	RLOG(InfoLogLevel::DEBUG_LEVEL, ioptions_.info_log,
178	3.10k	"Reserving %" PRIu32 " bytes for plain table's sub_index",
179	3.10k	sub_index_size_);
180	3.10k	auto total_allocate_size = GetTotalSize();
181	3.10k	char* allocated = arena_->AllocateAligned(
182	3.10k	total_allocate_size, huge_page_tlb_size_, ioptions_.info_log);
183
184	3.10k	auto temp_ptr = EncodeVarint32(allocated, index_size_);
185	3.10k	uint32_t* index =
186	3.10k	reinterpret_cast<uint32_t*>(EncodeVarint32(temp_ptr, num_prefixes_));
187	3.10k	char* sub_index = reinterpret_cast<char*>(index + index_size_);
188
189	3.10k	uint32_t sub_index_offset = 0;
190	903k	for (uint32_t i = 0; i < index_size_; i++900k ) {
191	900k	uint32_t num_keys_for_bucket = entries_per_bucket[i];
192	900k	switch (num_keys_for_bucket) {
193	428k	case 0:
194		// No key for bucket
195	428k	index[i] = PlainTableIndex::kMaxFileSize;
196	428k	break;
197	315k	case 1:
198		// point directly to the file offset
199	315k	index[i] = hash_to_offsets[i]->offset;
200	315k	break;
201	156k	default:
202		// point to second level indexes.
203	156k	index[i] = sub_index_offset \| PlainTableIndex::kSubIndexMask;
204	156k	char* prev_ptr = &sub_index[sub_index_offset];
205	156k	char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket);
206	156k	sub_index_offset += static_cast<uint32_t>(cur_ptr - prev_ptr);
207	156k	char* sub_index_pos = &sub_index[sub_index_offset];
208	156k	IndexRecord* record = hash_to_offsets[i];
209	156k	int j;
210	531k	for (j = num_keys_for_bucket - 1; j >= 0 && record375k ;
211	375k	j--, record = record->next) {
212	375k	EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset);
213	375k	}
214	156k	assert(j == -1 && record == nullptr);
215	0	sub_index_offset += PlainTableIndex::kOffsetLen * num_keys_for_bucket;
216	156k	assert(sub_index_offset <= sub_index_size_);
217	0	break;
218	900k	}
219	900k	}
220	3.09k	assert(sub_index_offset == sub_index_size_);
221
222	3.09k	RLOG(InfoLogLevel::DEBUG_LEVEL, ioptions_.info_log,
223	3.09k	"hash table size: %d, suffix_map length %" ROCKSDB_PRIszt, index_size_,
224	3.09k	sub_index_size_);
225	3.09k	return Slice(allocated, GetTotalSize());
226	3.10k	}
227
228		const std::string PlainTableIndexBuilder::kPlainTableIndexBlock =
229		"PlainTableIndexBlock";
230		}; // namespace rocksdb
231
232		#endif // ROCKSDB_LITE