YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_write_batch.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#ifndef YB_DOCDB_DOC_WRITE_BATCH_H
15
#define YB_DOCDB_DOC_WRITE_BATCH_H
16
17
#include "yb/bfql/tserver_opcodes.h"
18
19
#include "yb/common/constants.h"
20
#include "yb/common/hybrid_time.h"
21
#include "yb/common/read_hybrid_time.h"
22
23
#include "yb/docdb/doc_write_batch_cache.h"
24
#include "yb/docdb/docdb_types.h"
25
#include "yb/docdb/intent_aware_iterator.h"
26
#include "yb/docdb/key_bounds.h"
27
#include "yb/docdb/value.h"
28
29
#include "yb/rocksdb/cache.h"
30
31
#include "yb/rocksutil/write_batch_formatter.h"
32
33
#include "yb/util/enums.h"
34
#include "yb/util/monotime.h"
35
36
namespace rocksdb {
37
class DB;
38
}
39
40
namespace yb {
41
namespace docdb {
42
43
class KeyValueWriteBatchPB;
44
class IntentAwareIterator;
45
46
struct LazyIterator {
47
  std::function<std::unique_ptr<IntentAwareIterator>()>* creator;
48
  std::unique_ptr<IntentAwareIterator> iterator;
49
50
  explicit LazyIterator(std::function<std::unique_ptr<IntentAwareIterator>()>* c)
51
71.6M
    : iterator(nullptr) {
52
71.6M
    creator = c;
53
71.6M
  }
54
55
13
  explicit LazyIterator(std::unique_ptr<IntentAwareIterator> i) {
56
13
    iterator = std::move(i);
57
13
  }
58
59
71.6M
  ~LazyIterator() {}
60
61
411k
  IntentAwareIterator* Iterator() {
62
411k
    if (!iterator)
63
270k
      iterator = (*creator)();
64
411k
    return iterator.get();
65
411k
  }
66
};
67
68
YB_DEFINE_ENUM(ValueRefType, (kPb)(kValueType));
69
70
// This class references value that should be inserted to DocWriteBatch.
71
// Also it contains various options for this value.
72
class ValueRef {
73
 public:
74
  explicit ValueRef(const QLValuePB& value_pb,
75
                    SortingType sorting_type = SortingType::kNotSpecified,
76
                    bfql::TSOpcode write_instruction = bfql::TSOpcode::kScalarInsert)
77
      : value_pb_(&value_pb), sorting_type_(sorting_type), write_instruction_(write_instruction),
78
55.2M
        list_extend_order_(ListExtendOrder::APPEND), value_type_(ValueType::kLowest) {
79
55.2M
  }
80
81
  explicit ValueRef(const QLValuePB& value_pb,
82
                    const ValueRef& value_ref)
83
      : value_pb_(&value_pb), sorting_type_(value_ref.sorting_type_),
84
        write_instruction_(value_ref.write_instruction_),
85
217k
        list_extend_order_(value_ref.list_extend_order_), value_type_(ValueType::kLowest) {
86
217k
  }
87
88
  explicit ValueRef(const QLValuePB& value_pb,
89
                    ListExtendOrder list_extend_order)
90
      : value_pb_(&value_pb), sorting_type_(SortingType::kNotSpecified),
91
        write_instruction_(bfql::TSOpcode::kScalarInsert),
92
        list_extend_order_(list_extend_order),
93
        value_type_(ValueType::kLowest) {
94
  }
95
96
  explicit ValueRef(ValueType value_type);
97
98
71.7M
  const QLValuePB& value_pb() const {
99
71.7M
    return *value_pb_;
100
71.7M
  }
101
102
0
  void set_sorting_type(SortingType value) {
103
0
    sorting_type_ = value;
104
0
  }
105
106
71.8M
  SortingType sorting_type() const {
107
71.8M
    return sorting_type_;
108
71.8M
  }
109
110
218k
  ListExtendOrder list_extend_order() const {
111
218k
    return list_extend_order_;
112
218k
  }
113
114
201
  void set_list_extend_order(ListExtendOrder value) {
115
201
    list_extend_order_ = value;
116
201
  }
117
118
21.3k
  void set_custom_value_type(ValueType value) {
119
21.3k
    value_type_ = value;
120
21.3k
  }
121
122
160M
  ValueType custom_value_type() const {
123
160M
    return value_type_;
124
160M
  }
125
126
217k
  bfql::TSOpcode write_instruction() const {
127
217k
    return write_instruction_;
128
217k
  }
129
130
16
  void set_write_instruction(bfql::TSOpcode value) {
131
16
    write_instruction_ = value;
132
16
  }
133
134
  bool is_array() const;
135
136
  bool is_set() const;
137
138
  bool is_map() const;
139
140
  ValueType ContainerValueType() const;
141
142
  bool IsTombstoneOrPrimitive() const;
143
144
  std::string ToString() const;
145
146
 private:
147
  const QLValuePB* value_pb_;
148
  SortingType sorting_type_;
149
  bfql::TSOpcode write_instruction_;
150
  ListExtendOrder list_extend_order_;
151
  ValueType value_type_;
152
};
153
154
// This controls whether "init markers" are required at all intermediate levels.
155
YB_DEFINE_ENUM(InitMarkerBehavior,
156
               // This is used in Redis. We need to keep track of document types such as strings,
157
               // hashes, sets, because there is no schema and due to Redis's error checking.
158
               (kRequired)
159
160
               // This is used in CQL. Existence of "a.b.c" implies existence of "a" and "a.b",
161
               // unless there are delete markers / TTL expiration involved.
162
               (kOptional));
163
164
// The DocWriteBatch class is used to build a RocksDB write batch for a DocDB batch of operations
165
// that may include a mix of write (set) or delete operations. It may read from RocksDB while
166
// writing, and builds up an internal rocksdb::WriteBatch while handling the operations.
167
// When all the operations are applied, the rocksdb::WriteBatch should be taken as output.
168
// Take ownership of it using std::move if it needs to live longer than this DocWriteBatch.
169
class DocWriteBatch {
170
 public:
171
  explicit DocWriteBatch(const DocDB& doc_db,
172
                         InitMarkerBehavior init_marker_behavior,
173
                         std::atomic<int64_t>* monotonic_counter = nullptr);
174
175
  Status SeekToKeyPrefix(LazyIterator* doc_iter, bool has_ancestor = false);
176
  Status SeekToKeyPrefix(IntentAwareIterator* doc_iter, bool has_ancestor);
177
178
  // Set the primitive at the given path to the given value. Intermediate subdocuments are created
179
  // if necessary and possible.
180
  CHECKED_STATUS SetPrimitive(
181
      const DocPath& doc_path,
182
      const ValueControlFields& control_fields,
183
      const ValueRef& value,
184
      LazyIterator* doc_iter);
185
186
  CHECKED_STATUS SetPrimitive(
187
      const DocPath& doc_path,
188
      const ValueControlFields& control_fields,
189
      const ValueRef& value,
190
      const ReadHybridTime& read_ht = ReadHybridTime::Max(),
191
      const CoarseTimePoint deadline = CoarseTimePoint::max(),
192
      rocksdb::QueryId query_id = rocksdb::kDefaultQueryId);
193
194
  CHECKED_STATUS SetPrimitive(
195
      const DocPath& doc_path,
196
      const ValueControlFields& control_fields,
197
      const ValueRef& value,
198
13
      std::unique_ptr<IntentAwareIterator> intent_iter) {
199
13
    LazyIterator iter(std::move(intent_iter));
200
13
    return SetPrimitive(doc_path, control_fields, value, &iter);
201
13
  }
202
203
204
  CHECKED_STATUS SetPrimitive(
205
      const DocPath& doc_path,
206
      const ValueRef& value,
207
      const ReadHybridTime& read_ht = ReadHybridTime::Max(),
208
      const CoarseTimePoint deadline = CoarseTimePoint::max(),
209
      rocksdb::QueryId query_id = rocksdb::kDefaultQueryId,
210
1.54M
      UserTimeMicros user_timestamp = ValueControlFields::kInvalidUserTimestamp) {
211
1.54M
    return SetPrimitive(
212
1.54M
        doc_path, ValueControlFields { .user_timestamp = user_timestamp }, value, read_ht,
213
1.54M
        deadline, query_id);
214
1.54M
  }
215
216
  // Extend the SubDocument in the given key. We'll support List with Append and Prepend mode later.
217
  // TODO(akashnil): 03/20/17 ENG-1107
218
  // In each SetPrimitive call, some common work is repeated. It may be made more
219
  // efficient by not calling SetPrimitive internally.
220
  CHECKED_STATUS ExtendSubDocument(
221
      const DocPath& doc_path,
222
      const ValueRef& value,
223
      const ReadHybridTime& read_ht = ReadHybridTime::Max(),
224
      const CoarseTimePoint deadline = CoarseTimePoint::max(),
225
      rocksdb::QueryId query_id = rocksdb::kDefaultQueryId,
226
      MonoDelta ttl = ValueControlFields::kMaxTtl,
227
      UserTimeMicros user_timestamp = ValueControlFields::kInvalidUserTimestamp);
228
229
  CHECKED_STATUS InsertSubDocument(
230
      const DocPath& doc_path,
231
      const ValueRef& value,
232
      const ReadHybridTime& read_ht = ReadHybridTime::Max(),
233
      const CoarseTimePoint deadline = CoarseTimePoint::max(),
234
      rocksdb::QueryId query_id = rocksdb::kDefaultQueryId,
235
      MonoDelta ttl = ValueControlFields::kMaxTtl,
236
      UserTimeMicros user_timestamp = ValueControlFields::kInvalidUserTimestamp,
237
      bool init_marker_ttl = true);
238
239
  CHECKED_STATUS ExtendList(
240
      const DocPath& doc_path,
241
      const ValueRef& value,
242
      const ReadHybridTime& read_ht = ReadHybridTime::Max(),
243
      const CoarseTimePoint deadline = CoarseTimePoint::max(),
244
      rocksdb::QueryId query_id = rocksdb::kDefaultQueryId,
245
      MonoDelta ttl = ValueControlFields::kMaxTtl,
246
      UserTimeMicros user_timestamp = ValueControlFields::kInvalidUserTimestamp);
247
248
  // 'indices' must be sorted. List indexes are not zero indexed, the first element is list[1].
249
  CHECKED_STATUS ReplaceRedisInList(
250
      const DocPath& doc_path,
251
      int64_t index,
252
      const ValueRef& value,
253
      const ReadHybridTime& read_ht,
254
      const CoarseTimePoint deadline,
255
      const rocksdb::QueryId query_id,
256
      const Direction dir = Direction::kForward,
257
      const int64_t start_index = 0,
258
      std::vector<string>* results = nullptr,
259
      MonoDelta default_ttl = ValueControlFields::kMaxTtl,
260
      MonoDelta write_ttl = ValueControlFields::kMaxTtl);
261
262
  CHECKED_STATUS ReplaceCqlInList(
263
      const DocPath &doc_path,
264
      const int index,
265
      const ValueRef& value,
266
      const ReadHybridTime& read_ht,
267
      const CoarseTimePoint deadline,
268
      const rocksdb::QueryId query_id,
269
      MonoDelta default_ttl = ValueControlFields::kMaxTtl,
270
      MonoDelta write_ttl = ValueControlFields::kMaxTtl);
271
272
  CHECKED_STATUS DeleteSubDoc(
273
      const DocPath& doc_path,
274
      const ReadHybridTime& read_ht = ReadHybridTime::Max(),
275
      const CoarseTimePoint deadline = CoarseTimePoint::max(),
276
      rocksdb::QueryId query_id = rocksdb::kDefaultQueryId,
277
      UserTimeMicros user_timestamp = ValueControlFields::kInvalidUserTimestamp);
278
279
  void Clear();
280
762k
  bool IsEmpty() const { return put_batch_.empty(); }
281
282
0
  size_t size() const { return put_batch_.size(); }
283
284
1.52M
  const std::vector<std::pair<std::string, std::string>>& key_value_pairs() const {
285
1.52M
    return put_batch_;
286
1.52M
  }
287
288
  void MoveToWriteBatchPB(KeyValueWriteBatchPB *kv_pb);
289
290
  // This method has worse performance comparing to MoveToWriteBatchPB and intented to be used in
291
  // testing. Consider using MoveToWriteBatchPB in production code.
292
  void TEST_CopyToWriteBatchPB(KeyValueWriteBatchPB *kv_pb) const;
293
294
  // This is used in tests when measuring the number of seeks that a given update to this batch
295
  // performs. The internal seek count is reset.
296
  int GetAndResetNumRocksDBSeeks();
297
298
6.31M
  const DocDB& doc_db() { return doc_db_; }
299
300
41.3k
  boost::optional<DocWriteBatchCache::Entry> LookupCache(const KeyBytes& encoded_key_prefix) {
301
41.3k
    return cache_.Get(encoded_key_prefix);
302
41.3k
  }
303
304
474
  std::pair<std::string, std::string>& AddRaw() {
305
474
    put_batch_.emplace_back();
306
474
    return put_batch_.back();
307
474
  }
308
309
  void UpdateMaxValueTtl(const MonoDelta& ttl);
310
311
446
  int64_t ttl_ns() const {
312
446
    return ttl_.ToNanoseconds();
313
446
  }
314
315
3.11M
  bool has_ttl() const {
316
3.11M
    return ttl_.Initialized();
317
3.11M
  }
318
319
 private:
320
  // This member function performs the necessary operations to set a primitive value for a given
321
  // docpath assuming the appropriate operations have been taken care of for subkeys with index <
322
  // subkey_index. This method assumes responsibility of ensuring the proper DocDB structure
323
  // (e.g: init markers) is maintained for subdocuments starting at the given subkey_index.
324
  CHECKED_STATUS SetPrimitiveInternal(
325
      const DocPath& doc_path,
326
      const ValueControlFields& control_fields,
327
      const ValueRef& value,
328
      LazyIterator* doc_iter,
329
      bool is_deletion,
330
      size_t num_subkeys);
331
332
  // Handle the user provided timestamp during writes.
333
  Result<bool> SetPrimitiveInternalHandleUserTimestamp(const ValueControlFields& control_fields,
334
                                                       LazyIterator* doc_iter);
335
336
142M
  bool required_init_markers() {
337
142M
    return init_marker_behavior_ == InitMarkerBehavior::kRequired;
338
142M
  }
339
340
141M
  bool optional_init_markers() {
341
141M
    return init_marker_behavior_ == InitMarkerBehavior::kOptional;
342
141M
  }
343
344
  DocWriteBatchCache cache_;
345
346
  DocDB doc_db_;
347
348
  InitMarkerBehavior init_marker_behavior_;
349
  std::atomic<int64_t>* monotonic_counter_;
350
  std::vector<std::pair<std::string, std::string>> put_batch_;
351
352
  // Taken from internal_doc_iterator
353
  KeyBytes key_prefix_;
354
  bool subdoc_exists_ = true;
355
  DocWriteBatchCache::Entry current_entry_;
356
357
  MonoDelta ttl_;
358
};
359
360
// Converts a RocksDB WriteBatch to a string.
361
// line_prefix is the prefix to be added to each line of the result. Could be used for indentation.
362
Result<std::string> WriteBatchToString(
363
    const rocksdb::WriteBatch& write_batch,
364
    StorageDbType storage_db_type,
365
    BinaryOutputFormat binary_output_format,
366
    WriteBatchOutputFormat batch_output_format,
367
    const std::string& line_prefix);
368
369
}  // namespace docdb
370
}  // namespace yb
371
372
#endif // YB_DOCDB_DOC_WRITE_BATCH_H