/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/plain_table_factory.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | // |
5 | | // The following only applies to changes made to this file as part of YugaByte development. |
6 | | // |
7 | | // Portions Copyright (c) YugaByte, Inc. |
8 | | // |
9 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
10 | | // in compliance with the License. You may obtain a copy of the License at |
11 | | // |
12 | | // http://www.apache.org/licenses/LICENSE-2.0 |
13 | | // |
14 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
15 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
16 | | // or implied. See the License for the specific language governing permissions and limitations |
17 | | // under the License. |
18 | | // |
19 | | |
20 | | #ifndef YB_ROCKSDB_TABLE_PLAIN_TABLE_FACTORY_H |
21 | | #define YB_ROCKSDB_TABLE_PLAIN_TABLE_FACTORY_H |
22 | | |
23 | | #ifndef ROCKSDB_LITE |
24 | | #include <stdint.h> |
25 | | #include <memory> |
26 | | #include <string> |
27 | | |
28 | | #include "yb/rocksdb/options.h" |
29 | | #include "yb/rocksdb/status.h" |
30 | | #include "yb/rocksdb/table.h" |
31 | | |
32 | | namespace rocksdb { |
33 | | |
34 | | struct EnvOptions; |
35 | | |
36 | | using std::unique_ptr; |
37 | | class WritableFile; |
38 | | class Table; |
39 | | class TableBuilder; |
40 | | |
41 | | // IndexedTable requires fixed length key, configured as a constructor |
42 | | // parameter of the factory class. Output file format: |
43 | | // +-------------+-----------------+ |
44 | | // | version | user_key_length | |
45 | | // +------------++------------+-----------------+ <= key1 offset |
46 | | // | encoded key1 | value_size | | |
47 | | // +------------+-------------+-------------+ | |
48 | | // | value1 | |
49 | | // | | |
50 | | // +--------------------------+-------------+---+ <= key2 offset |
51 | | // | encoded key2 | value_size | | |
52 | | // +------------+-------------+-------------+ | |
53 | | // | value2 | |
54 | | // | | |
55 | | // | ...... | |
56 | | // +-----------------+--------------------------+ |
57 | | // |
58 | | // When the key encoding type is kPlain. Key part is encoded as: |
59 | | // +------------+--------------------+ |
60 | | // | [key_size] | internal key | |
61 | | // +------------+--------------------+ |
62 | | // for the case of user_key_len = kPlainTableVariableLength case, |
63 | | // and simply: |
64 | | // +----------------------+ |
65 | | // | internal key | |
66 | | // +----------------------+ |
67 | | // for user_key_len != kPlainTableVariableLength case. |
68 | | // |
69 | | // If key encoding type is kPrefix. Keys are encoding in this format. |
70 | | // There are three ways to encode a key: |
71 | | // (1) Full Key |
72 | | // +---------------+---------------+-------------------+ |
73 | | // | Full Key Flag | Full Key Size | Full Internal Key | |
74 | | // +---------------+---------------+-------------------+ |
75 | | // which simply encodes a full key |
76 | | // |
77 | | // (2) A key shared the same prefix as the previous key, which is encoded as |
78 | | // format of (1). |
79 | | // +-------------+-------------+-------------+-------------+------------+ |
80 | | // | Prefix Flag | Prefix Size | Suffix Flag | Suffix Size | Key Suffix | |
81 | | // +-------------+-------------+-------------+-------------+------------+ |
82 | | // where key is the suffix part of the key, including the internal bytes. |
83 | | // the actual key will be constructed by concatenating prefix part of the |
84 | | // previous key, with the suffix part of the key here, with sizes given here. |
85 | | // |
86 | | // (3) A key shared the same prefix as the previous key, which is encoded as |
87 | | // the format of (2). |
88 | | // +-----------------+-----------------+------------------------+ |
89 | | // | Key Suffix Flag | Key Suffix Size | Suffix of Internal Key | |
90 | | // +-----------------+-----------------+------------------------+ |
91 | | // The key will be constructed by concatenating previous key's prefix (which is |
92 | | // also a prefix which the last key encoded in the format of (1)) and the |
93 | | // key given here. |
94 | | // |
95 | | // For example, we for following keys (prefix and suffix are separated by |
96 | | // spaces): |
97 | | // 0000 0001 |
98 | | // 0000 00021 |
99 | | // 0000 0002 |
100 | | // 00011 00 |
101 | | // 0002 0001 |
102 | | // Will be encoded like this: |
103 | | // FK 8 00000001 |
104 | | // PF 4 SF 5 00021 |
105 | | // SF 4 0002 |
106 | | // FK 7 0001100 |
107 | | // FK 8 00020001 |
108 | | // (where FK means full key flag, PF means prefix flag and SF means suffix flag) |
109 | | // |
110 | | // All those "key flag + key size" shown above are in this format: |
111 | | // The 8 bits of the first byte: |
112 | | // +----+----+----+----+----+----+----+----+ |
113 | | // | Type | Size | |
114 | | // +----+----+----+----+----+----+----+----+ |
115 | | // Type indicates: full key, prefix, or suffix. |
116 | | // The last 6 bits are for size. If the size bits are not all 1, it means the |
117 | | // size of the key. Otherwise, varint32 is read after this byte. This varint |
118 | | // value + 0x3F (the value of all 1) will be the key size. |
119 | | // |
120 | | // For example, full key with length 16 will be encoded as (binary): |
121 | | // 00 010000 |
122 | | // (00 means full key) |
123 | | // and a prefix with 100 bytes will be encoded as: |
124 | | // 01 111111 00100101 |
125 | | // (63) (37) |
126 | | // (01 means key suffix) |
127 | | // |
128 | | // All the internal keys above (including kPlain and kPrefix) are encoded in |
129 | | // this format: |
130 | | // There are two types: |
131 | | // (1) normal internal key format |
132 | | // +----------- ...... -------------+----+---+---+---+---+---+---+---+ |
133 | | // | user key |type| sequence ID | |
134 | | // +----------- ..... --------------+----+---+---+---+---+---+---+---+ |
135 | | // (2) Special case for keys whose sequence ID is 0 and is value type |
136 | | // +----------- ...... -------------+----+ |
137 | | // | user key |0x80| |
138 | | // +----------- ..... --------------+----+ |
139 | | // To save 7 bytes for the special case where sequence ID = 0. |
140 | | // |
141 | | // |
142 | | class PlainTableFactory : public TableFactory { |
143 | | public: |
144 | 4.02k | ~PlainTableFactory() {} |
145 | | // user_key_len is the length of the user key. If it is set to be |
146 | | // kPlainTableVariableLength, then it means variable length. Otherwise, all |
147 | | // the keys need to have the fix length of this value. bloom_bits_per_key is |
148 | | // number of bits used for bloom filer per key. hash_table_ratio is |
149 | | // the desired utilization of the hash table used for prefix hashing. |
150 | | // hash_table_ratio = number of prefixes / #buckets in the hash table |
151 | | // hash_table_ratio = 0 means skip hash table but only replying on binary |
152 | | // search. |
153 | | // index_sparseness determines index interval for keys |
154 | | // inside the same prefix. It will be the maximum number of linear search |
155 | | // required after hash and binary search. |
156 | | // index_sparseness = 0 means index for every key. |
157 | | // huge_page_tlb_size determines whether to allocate hash indexes from huge |
158 | | // page TLB and the page size if allocating from there. See comments of |
159 | | // Arena::AllocateAligned() for details. |
160 | | explicit PlainTableFactory( |
161 | | const PlainTableOptions& _table_options = PlainTableOptions()) |
162 | 4.02k | : table_options_(_table_options) {} |
163 | | |
164 | 11.1k | const char* Name() const override { return "PlainTable"; } |
165 | | Status NewTableReader(const TableReaderOptions& table_reader_options, |
166 | | unique_ptr<RandomAccessFileReader>&& file, |
167 | | uint64_t file_size, |
168 | | unique_ptr<TableReader>* table) const override; |
169 | | |
170 | 1.49k | bool IsSplitSstForWriteSupported() const override { return false; } |
171 | | |
172 | | TableBuilder *NewTableBuilder(const TableBuilderOptions &table_builder_options, |
173 | | uint32_t column_family_id, WritableFileWriter *base_file, |
174 | | WritableFileWriter *data_file = nullptr) const override; |
175 | | |
176 | | std::string GetPrintableTableOptions() const override; |
177 | | |
178 | | const PlainTableOptions& table_options() const; |
179 | | |
180 | | static const char kValueTypeSeqId0 = '\xff'; |
181 | | |
182 | | // Sanitizes the specified DB Options. |
183 | | Status SanitizeOptions(const DBOptions& db_opts, |
184 | 1.20k | const ColumnFamilyOptions& cf_opts) const override { |
185 | 1.20k | return Status::OK(); |
186 | 1.20k | } |
187 | | |
188 | 0 | void* GetOptions() override { return &table_options_; } |
189 | | |
190 | | private: |
191 | | PlainTableOptions table_options_; |
192 | | }; |
193 | | |
194 | | } // namespace rocksdb |
195 | | #endif // ROCKSDB_LITE |
196 | | |
197 | | #endif // YB_ROCKSDB_TABLE_PLAIN_TABLE_FACTORY_H |