YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/plain_table_index.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
21
#ifndef ROCKSDB_LITE
22
23
#ifndef __STDC_FORMAT_MACROS
24
#define __STDC_FORMAT_MACROS
25
#endif
26
27
#include <inttypes.h>
28
29
#include "yb/rocksdb/table/plain_table_index.h"
30
#include "yb/rocksdb/env.h"
31
#include "yb/rocksdb/util/coding.h"
32
#include "yb/rocksdb/util/hash.h"
33
34
namespace rocksdb {
35
36
namespace {
37
814k
inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) {
38
814k
  assert(num_buckets > 0);
39
0
  return hash % num_buckets;
40
814k
}
41
}
42
43
3.10k
Status PlainTableIndex::InitFromRawData(Slice data) {
44
3.10k
  if (!GetVarint32(&data, &index_size_)) {
45
0
    return STATUS(Corruption, "Couldn't read the index size!");
46
0
  }
47
3.10k
  assert(index_size_ > 0);
48
3.10k
  if (!GetVarint32(&data, &num_prefixes_)) {
49
0
    return STATUS(Corruption, "Couldn't read the index size!");
50
0
  }
51
3.10k
  sub_index_size_ =
52
3.10k
      static_cast<uint32_t>(data.size()) - index_size_ * kOffsetLen;
53
54
3.10k
  char* index_data_begin = const_cast<char*>(data.cdata());
55
3.10k
  index_ = reinterpret_cast<uint32_t*>(index_data_begin);
56
3.10k
  sub_index_ = reinterpret_cast<char*>(index_ + index_size_);
57
3.10k
  return Status::OK();
58
3.10k
}
59
60
PlainTableIndex::IndexSearchResult PlainTableIndex::GetOffset(
61
123k
    uint32_t prefix_hash, uint32_t* bucket_value) const {
62
123k
  int bucket = GetBucketIdFromHash(prefix_hash, index_size_);
63
123k
  *bucket_value = index_[bucket];
64
123k
  if ((*bucket_value & kSubIndexMask) == kSubIndexMask) {
65
87.0k
    *bucket_value ^= kSubIndexMask;
66
87.0k
    return kSubindex;
67
87.0k
  }
68
36.2k
  if (*bucket_value >= kMaxFileSize) {
69
876
    return kNoPrefixForBucket;
70
35.4k
  } else {
71
    // point directly to the file
72
35.4k
    return kDirectToFile;
73
35.4k
  }
74
36.2k
}
75
76
void PlainTableIndexBuilder::IndexRecordList::AddRecord(uint32_t hash,
77
691k
                                                        uint32_t offset) {
78
691k
  if (num_records_in_current_group_ == kNumRecordsPerGroup) {
79
5.18k
    current_group_ = AllocateNewGroup();
80
5.18k
    num_records_in_current_group_ = 0;
81
5.18k
  }
82
691k
  auto& new_record = current_group_[num_records_in_current_group_++];
83
691k
  new_record.hash = hash;
84
691k
  new_record.offset = offset;
85
691k
  new_record.next = nullptr;
86
691k
}
87
88
void PlainTableIndexBuilder::AddKeyPrefix(Slice key_prefix_slice,
89
975k
                                          uint32_t key_offset) {
90
975k
  if (is_first_record_ || 
prev_key_prefix_ != key_prefix_slice.ToString()972k
) {
91
674k
    ++num_prefixes_;
92
674k
    if (!is_first_record_) {
93
671k
      keys_per_prefix_hist_.Add(num_keys_per_prefix_);
94
671k
    }
95
674k
    num_keys_per_prefix_ = 0;
96
674k
    prev_key_prefix_ = key_prefix_slice.ToString();
97
674k
    prev_key_prefix_hash_ = GetSliceHash(key_prefix_slice);
98
674k
    due_index_ = true;
99
674k
  }
100
101
975k
  if (due_index_) {
102
    // Add an index key for every kIndexIntervalForSamePrefixKeys keys
103
691k
    record_list_.AddRecord(prev_key_prefix_hash_, key_offset);
104
691k
    due_index_ = false;
105
691k
  }
106
107
975k
  num_keys_per_prefix_++;
108
975k
  if (index_sparseness_ == 0 || 
num_keys_per_prefix_ % index_sparseness_ == 0975k
) {
109
17.0k
    due_index_ = true;
110
17.0k
  }
111
975k
  is_first_record_ = false;
112
975k
}
113
114
3.10k
Slice PlainTableIndexBuilder::Finish() {
115
3.10k
  AllocateIndex();
116
3.10k
  std::vector<IndexRecord*> hash_to_offsets(index_size_, nullptr);
117
3.10k
  std::vector<uint32_t> entries_per_bucket(index_size_, 0);
118
3.10k
  BucketizeIndexes(&hash_to_offsets, &entries_per_bucket);
119
120
3.10k
  keys_per_prefix_hist_.Add(num_keys_per_prefix_);
121
3.10k
  RLOG(InfoLogLevel::INFO_LEVEL, ioptions_.info_log,
122
3.10k
      "Number of Keys per prefix Histogram: %s",
123
3.10k
      keys_per_prefix_hist_.ToString().c_str());
124
125
  // From the temp data structure, populate indexes.
126
3.10k
  return FillIndexes(hash_to_offsets, entries_per_bucket);
127
3.10k
}
128
129
3.10k
void PlainTableIndexBuilder::AllocateIndex() {
130
3.10k
  if (prefix_extractor_ == nullptr || 
hash_table_ratio_ <= 02.75k
) {
131
    // Fall back to pure binary search if the user fails to specify a prefix
132
    // extractor.
133
370
    index_size_ = 1;
134
2.73k
  } else {
135
2.73k
    double hash_table_size_multipier = 1.0 / hash_table_ratio_;
136
2.73k
    index_size_ =
137
2.73k
      static_cast<uint32_t>(num_prefixes_ * hash_table_size_multipier) + 1;
138
2.73k
    assert(index_size_ > 0);
139
2.73k
  }
140
3.10k
}
141
142
void PlainTableIndexBuilder::BucketizeIndexes(
143
    std::vector<IndexRecord*>* hash_to_offsets,
144
3.10k
    std::vector<uint32_t>* entries_per_bucket) {
145
3.10k
  bool first = true;
146
3.10k
  uint32_t prev_hash = 0;
147
3.10k
  size_t num_records = record_list_.GetNumRecords();
148
694k
  for (size_t i = 0; i < num_records; 
i++690k
) {
149
690k
    IndexRecord* index_record = record_list_.At(i);
150
690k
    uint32_t cur_hash = index_record->hash;
151
690k
    if (first || 
prev_hash != cur_hash687k
) {
152
674k
      prev_hash = cur_hash;
153
674k
      first = false;
154
674k
    }
155
690k
    uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_);
156
690k
    IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket];
157
690k
    index_record->next = prev_bucket_head;
158
690k
    (*hash_to_offsets)[bucket] = index_record;
159
690k
    (*entries_per_bucket)[bucket]++;
160
690k
  }
161
162
3.10k
  sub_index_size_ = 0;
163
900k
  for (auto entry_count : *entries_per_bucket) {
164
900k
    if (entry_count <= 1) {
165
744k
      continue;
166
744k
    }
167
    // Only buckets with more than 1 entry will have subindex.
168
156k
    sub_index_size_ += VarintLength(entry_count);
169
    // total bytes needed to store these entries' in-file offsets.
170
156k
    sub_index_size_ += entry_count * PlainTableIndex::kOffsetLen;
171
156k
  }
172
3.10k
}
173
174
Slice PlainTableIndexBuilder::FillIndexes(
175
    const std::vector<IndexRecord*>& hash_to_offsets,
176
3.10k
    const std::vector<uint32_t>& entries_per_bucket) {
177
3.10k
  RLOG(InfoLogLevel::DEBUG_LEVEL, ioptions_.info_log,
178
3.10k
      "Reserving %" PRIu32 " bytes for plain table's sub_index",
179
3.10k
      sub_index_size_);
180
3.10k
  auto total_allocate_size = GetTotalSize();
181
3.10k
  char* allocated = arena_->AllocateAligned(
182
3.10k
      total_allocate_size, huge_page_tlb_size_, ioptions_.info_log);
183
184
3.10k
  auto temp_ptr = EncodeVarint32(allocated, index_size_);
185
3.10k
  uint32_t* index =
186
3.10k
      reinterpret_cast<uint32_t*>(EncodeVarint32(temp_ptr, num_prefixes_));
187
3.10k
  char* sub_index = reinterpret_cast<char*>(index + index_size_);
188
189
3.10k
  uint32_t sub_index_offset = 0;
190
903k
  for (uint32_t i = 0; i < index_size_; 
i++900k
) {
191
900k
    uint32_t num_keys_for_bucket = entries_per_bucket[i];
192
900k
    switch (num_keys_for_bucket) {
193
428k
      case 0:
194
        // No key for bucket
195
428k
        index[i] = PlainTableIndex::kMaxFileSize;
196
428k
        break;
197
315k
      case 1:
198
        // point directly to the file offset
199
315k
        index[i] = hash_to_offsets[i]->offset;
200
315k
        break;
201
156k
      default:
202
        // point to second level indexes.
203
156k
        index[i] = sub_index_offset | PlainTableIndex::kSubIndexMask;
204
156k
        char* prev_ptr = &sub_index[sub_index_offset];
205
156k
        char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket);
206
156k
        sub_index_offset += static_cast<uint32_t>(cur_ptr - prev_ptr);
207
156k
        char* sub_index_pos = &sub_index[sub_index_offset];
208
156k
        IndexRecord* record = hash_to_offsets[i];
209
156k
        int j;
210
531k
        for (j = num_keys_for_bucket - 1; j >= 0 && 
record375k
;
211
375k
             j--, record = record->next) {
212
375k
          EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset);
213
375k
        }
214
156k
        assert(j == -1 && record == nullptr);
215
0
        sub_index_offset += PlainTableIndex::kOffsetLen * num_keys_for_bucket;
216
156k
        assert(sub_index_offset <= sub_index_size_);
217
0
        break;
218
900k
    }
219
900k
  }
220
3.09k
  assert(sub_index_offset == sub_index_size_);
221
222
3.09k
  RLOG(InfoLogLevel::DEBUG_LEVEL, ioptions_.info_log,
223
3.09k
      "hash table size: %d, suffix_map length %" ROCKSDB_PRIszt, index_size_,
224
3.09k
      sub_index_size_);
225
3.09k
  return Slice(allocated, GetTotalSize());
226
3.10k
}
227
228
const std::string PlainTableIndexBuilder::kPlainTableIndexBlock =
229
    "PlainTableIndexBlock";
230
};  // namespace rocksdb
231
232
#endif  // ROCKSDB_LITE