/Users/deen/code/yugabyte-db/src/yb/docdb/docdb_rocksdb_util.h
Line | Count | Source |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #ifndef YB_DOCDB_DOCDB_ROCKSDB_UTIL_H_ |
15 | | #define YB_DOCDB_DOCDB_ROCKSDB_UTIL_H_ |
16 | | |
17 | | #include <boost/optional.hpp> |
18 | | |
19 | | #include "yb/docdb/bounded_rocksdb_iterator.h" |
20 | | |
21 | | #include "yb/rocksdb/cache.h" |
22 | | #include "yb/rocksdb/db.h" |
23 | | #include "yb/rocksdb/options.h" |
24 | | #include "yb/rocksdb/rate_limiter.h" |
25 | | #include "yb/rocksdb/table.h" |
26 | | |
27 | | #include "yb/tablet/tablet_options.h" |
28 | | |
29 | | #include "yb/util/slice.h" |
30 | | |
31 | | namespace yb { |
32 | | namespace docdb { |
33 | | |
34 | | class IntentAwareIterator; |
35 | | |
36 | | // See to a rocksdb point that is at least sub_doc_key. |
37 | | // If the iterator is already positioned far enough, does not perform a seek. |
38 | | void SeekForward(const rocksdb::Slice& slice, rocksdb::Iterator *iter); |
39 | | |
40 | | void SeekForward(const KeyBytes& key_bytes, rocksdb::Iterator *iter); |
41 | | |
42 | | // When we replace HybridTime::kMin in the end of seek key, next seek will skip older versions of |
43 | | // this key, but will not skip any subkeys in its subtree. If the iterator is already positioned far |
44 | | // enough, does not perform a seek. |
45 | | void SeekPastSubKey(const Slice& key, rocksdb::Iterator* iter); |
46 | | |
47 | | // Seek out of the given SubDocKey. For efficiency, the method that takes a non-const KeyBytes |
48 | | // pointer avoids memory allocation by using the KeyBytes buffer to prepare the key to seek to by |
49 | | // appending an extra byte. The appended byte is removed when the method returns. |
50 | | void SeekOutOfSubKey(KeyBytes* key_bytes, rocksdb::Iterator* iter); |
51 | | |
52 | | KeyBytes AppendDocHt(const Slice& key, const DocHybridTime& doc_ht); |
53 | | |
54 | | // A wrapper around the RocksDB seek operation that uses Next() up to the configured number of |
55 | | // times to avoid invalidating iterator state. In debug mode it also allows printing detailed |
56 | | // information about RocksDB seeks. |
57 | | void PerformRocksDBSeek( |
58 | | rocksdb::Iterator *iter, |
59 | | const rocksdb::Slice &seek_key, |
60 | | const char* file_name, |
61 | | int line); |
62 | | |
63 | | // TODO: is there too much overhead in passing file name and line here in release mode? |
64 | | #define ROCKSDB_SEEK(iter, key) \ |
65 | 539M | do { \ |
66 | 271M | PerformRocksDBSeek((iter), (key), __FILE__, __LINE__); \ |
67 | 271M | } while (0) |
68 | | |
69 | | enum class BloomFilterMode { |
70 | | USE_BLOOM_FILTER, |
71 | | DONT_USE_BLOOM_FILTER, |
72 | | }; |
73 | | |
74 | | // It is only allowed to use bloom filters on scans within the same hashed components of the key, |
75 | | // because BloomFilterAwareIterator relies on it and ignores SST file completely if there are no |
76 | | // keys with the same hashed components as key specified for seek operation. |
77 | | // Note: bloom_filter_mode should be specified explicitly to avoid using it incorrectly by default. |
78 | | // user_key_for_filter is used with BloomFilterMode::USE_BLOOM_FILTER to exclude SST files which |
79 | | // have the same hashed components as (Sub)DocKey encoded in user_key_for_filter. |
80 | | BoundedRocksDbIterator CreateRocksDBIterator( |
81 | | rocksdb::DB* rocksdb, |
82 | | const KeyBounds* docdb_key_bounds, |
83 | | BloomFilterMode bloom_filter_mode, |
84 | | const boost::optional<const Slice>& user_key_for_filter, |
85 | | const rocksdb::QueryId query_id, |
86 | | std::shared_ptr<rocksdb::ReadFileFilter> file_filter = nullptr, |
87 | | const Slice* iterate_upper_bound = nullptr); |
88 | | |
89 | | // Values and transactions committed later than high_ht can be skipped, so we won't spend time |
90 | | // for re-requesting pending transaction status if we already know it wasn't committed at high_ht. |
91 | | std::unique_ptr<IntentAwareIterator> CreateIntentAwareIterator( |
92 | | const DocDB& doc_db, |
93 | | BloomFilterMode bloom_filter_mode, |
94 | | const boost::optional<const Slice>& user_key_for_filter, |
95 | | const rocksdb::QueryId query_id, |
96 | | const TransactionOperationContext& transaction_context, |
97 | | CoarseTimePoint deadline, |
98 | | const ReadHybridTime& read_time, |
99 | | std::shared_ptr<rocksdb::ReadFileFilter> file_filter = nullptr, |
100 | | const Slice* iterate_upper_bound = nullptr); |
101 | | |
102 | | // Request RocksDB compaction and wait until it completes. |
103 | | CHECKED_STATUS ForceRocksDBCompact(rocksdb::DB* db); |
104 | | |
105 | | rocksdb::Options TEST_AutoInitFromRocksDBFlags(); |
106 | | |
107 | | rocksdb::BlockBasedTableOptions TEST_AutoInitFromRocksDbTableFlags(); |
108 | | |
109 | | Result<rocksdb::KeyValueEncodingFormat> GetConfiguredKeyValueEncodingFormat( |
110 | | const std::string& flag_value); |
111 | | |
112 | | // Defines how rate limiter is shared across a node |
113 | | YB_DEFINE_ENUM(RateLimiterSharingMode, (NONE)(TSERVER)); |
114 | | |
115 | | // Extracts rate limiter's sharing mode depending on the value of |
116 | | // flag `FLAGS_rocksdb_compact_flush_rate_limit_sharing_mode`; |
117 | | // `RateLimiterSharingMode::NONE` is returned if extraction failed |
118 | | RateLimiterSharingMode GetRocksDBRateLimiterSharingMode(); |
119 | | |
120 | | // Creates `rocksdb::RateLimiter` taking into account related GFlags, |
121 | | // calls `rocksdb::NewGenericRateLimiter` internally |
122 | | std::shared_ptr<rocksdb::RateLimiter> CreateRocksDBRateLimiter(); |
123 | | |
124 | | // Initialize the RocksDB 'options'. |
125 | | // The 'statistics' object provided by the caller will be used by RocksDB to maintain the stats for |
126 | | // the tablet. |
127 | | void InitRocksDBOptions( |
128 | | rocksdb::Options* options, const std::string& log_prefix, |
129 | | const std::shared_ptr<rocksdb::Statistics>& statistics, |
130 | | const tablet::TabletOptions& tablet_options, |
131 | | rocksdb::BlockBasedTableOptions table_options = rocksdb::BlockBasedTableOptions()); |
132 | | |
133 | | // Sets logs prefix for RocksDB options. This will also reinitialize options->info_log. |
134 | | void SetLogPrefix(rocksdb::Options* options, const std::string& log_prefix); |
135 | | |
136 | | // Gets the configured size of the node-global RocksDB priority thread pool. |
137 | | int32_t GetGlobalRocksDBPriorityThreadPoolSize(); |
138 | | |
139 | | // Class to edit RocksDB manifest w/o fully loading DB into memory. |
140 | | class RocksDBPatcher { |
141 | | public: |
142 | | explicit RocksDBPatcher(const std::string& dbpath, const rocksdb::Options& options); |
143 | | ~RocksDBPatcher(); |
144 | | |
145 | | // Loads DB into patcher. |
146 | | CHECKED_STATUS Load(); |
147 | | |
148 | | // Set hybrid time filter for DB. |
149 | | CHECKED_STATUS SetHybridTimeFilter(HybridTime value); |
150 | | |
151 | | // Modify flushed frontier and clean up smallest/largest op id in per-SST file metadata. |
152 | | CHECKED_STATUS ModifyFlushedFrontier(const ConsensusFrontier& frontier); |
153 | | |
154 | | // Update file sizes in manifest if actual file size was changed because of direct manipulation |
155 | | // with .sst files. |
156 | | // Like all other methods in this class it updates manifest file. |
157 | | CHECKED_STATUS UpdateFileSizes(); |
158 | | |
159 | | private: |
160 | | class Impl; |
161 | | std::unique_ptr<Impl> impl_; |
162 | | }; |
163 | | |
164 | | } // namespace docdb |
165 | | } // namespace yb |
166 | | |
167 | | #endif // YB_DOCDB_DOCDB_ROCKSDB_UTIL_H_ |