YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/docdb/docdb.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#ifndef YB_DOCDB_DOCDB_H_
15
#define YB_DOCDB_DOCDB_H_
16
17
#include <cstdint>
18
#include <ostream>
19
#include <string>
20
#include <vector>
21
22
#include <boost/function.hpp>
23
24
#include "yb/common/doc_hybrid_time.h"
25
#include "yb/common/hybrid_time.h"
26
#include "yb/common/read_hybrid_time.h"
27
#include "yb/common/transaction.h"
28
29
#include "yb/docdb/docdb_fwd.h"
30
#include "yb/docdb/shared_lock_manager_fwd.h"
31
#include "yb/docdb/doc_path.h"
32
#include "yb/docdb/doc_write_batch.h"
33
#include "yb/docdb/docdb.pb.h"
34
#include "yb/docdb/docdb_types.h"
35
#include "yb/docdb/lock_batch.h"
36
#include "yb/docdb/subdocument.h"
37
#include "yb/docdb/value.h"
38
39
#include "yb/rocksdb/rocksdb_fwd.h"
40
41
#include "yb/util/result.h"
42
#include "yb/util/strongly_typed_bool.h"
43
44
// DocDB mapping on top of the key-value map in RocksDB:
45
// <document_key> <hybrid_time> -> <doc_type>
46
// <document_key> <hybrid_time> <key_a> <gen_ts_a> -> <subdoc_a_type_or_value>
47
//
48
// Assuming the type of subdocument corresponding to key_a in the above example is "object", the
49
// contents of that subdocument are stored in a similar way:
50
// <document_key> <hybrid_time> <key_a> <gen_ts_a> <key_aa> <gen_ts_aa> -> <subdoc_aa_type_or_value>
51
// <document_key> <hybrid_time> <key_a> <gen_ts_a> <key_ab> <gen_ts_ab> -> <subdoc_ab_type_or_value>
52
// ...
53
//
54
// See doc_key.h for the encoding of the <document_key> part.
55
//
56
// <key_a>, <key_aa> are subkeys indicating a path inside a document.
57
// Their encoding is as follows:
58
//   <value_type> -- one byte, see the ValueType enum.
59
//   <value_specific_encoding> -- e.g. a big-endian 8-byte integer, or a string in a "zero encoded"
60
//                                format. This is empty for null or true/false values.
61
//
62
// <hybrid_time>, <gen_ts_a>, <gen_ts_ab> are "generation hybrid_times" corresponding to hybrid
63
// clock hybrid_times of the last time a particular top-level document / subdocument was fully
64
// overwritten or deleted.
65
//
66
// <subdoc_a_type_or_value>, <subdoc_aa_type_or_value>, <subdoc_ab_type_or_value> are values of the
67
// following form:
68
//   - One-byte value type (see the ValueType enum).
69
//   - For primitive values, the encoded value. Note: the value encoding may be different from the
70
//     key encoding for the same data type. E.g. we only flip the sign bit for signed 64-bit
71
//     integers when encoded as part of a RocksDB key, not value.
72
//
73
// Also see this document for a high-level overview of how we lay out JSON documents on top of
74
// RocksDB:
75
// https://docs.google.com/document/d/1uEOHUqGBVkijw_CGD568FMt8UOJdHtiE3JROUOppYBU/edit
76
77
namespace yb {
78
79
class Histogram;
80
81
namespace docdb {
82
83
class DocOperation;
84
85
// This function prepares the transaction by taking locks. The set of keys locked are returned to
86
// the caller via the keys_locked argument (because they need to be saved and unlocked when the
87
// transaction commits). A flag is also returned to indicate if any of the write operations
88
// requires a clean read snapshot to be taken before being applied (see DocOperation for details).
89
//
90
// Example: doc_write_ops might consist of the following operations:
91
// a.b = {}, a.b.c = 1, a.b.d = 2, e.d = 3
92
// We will generate all the lock_prefixes for the keys with lock types
93
// a - shared, a.b - exclusive, a - shared, a.b - shared, a.b.c - exclusive ...
94
// Then we will deduplicate the keys and promote shared locks to exclusive, and sort them.
95
// Finally, the locks taken will be in order:
96
// a - shared, a.b - exclusive, a.b.c - exclusive, a.b.d - exclusive, e - shared, e.d - exclusive.
97
// Then the sorted lock key list will be returned. (Type is not returned because it is not needed
98
// for unlocking)
99
// TODO(akashnil): If a.b is exclusive, we don't need to lock any sub-paths under it.
100
//
101
// Input: doc_write_ops
102
// Context: lock_manager
103
104
struct PrepareDocWriteOperationResult {
105
  LockBatch lock_batch;
106
  bool need_read_snapshot = false;
107
};
108
109
Result<PrepareDocWriteOperationResult> PrepareDocWriteOperation(
110
    const std::vector<std::unique_ptr<DocOperation>>& doc_write_ops,
111
    const google::protobuf::RepeatedPtrField<KeyValuePairPB>& read_pairs,
112
    const scoped_refptr<Histogram>& write_lock_latency,
113
    const IsolationLevel isolation_level,
114
    const OperationKind operation_kind,
115
    const RowMarkType row_mark_type,
116
    bool transactional_table,
117
    CoarseTimePoint deadline,
118
    PartialRangeKeyIntents partial_range_key_intents,
119
    SharedLockManager *lock_manager);
120
121
// This constructs a DocWriteBatch using the given list of DocOperations, reading the previous
122
// state of data from RocksDB when necessary.
123
//
124
// Input: doc_write_ops, read snapshot hybrid_time if requested in PrepareDocWriteOperation().
125
// Context: rocksdb
126
// Outputs: keys_locked, write_batch
127
CHECKED_STATUS AssembleDocWriteBatch(
128
    const std::vector<std::unique_ptr<DocOperation>>& doc_write_ops,
129
    CoarseTimePoint deadline,
130
    const ReadHybridTime& read_time,
131
    const DocDB& doc_db,
132
    KeyValueWriteBatchPB* write_batch,
133
    InitMarkerBehavior init_marker_behavior,
134
    std::atomic<int64_t>* monotonic_counter,
135
    HybridTime* restart_read_ht,
136
    const std::string& table_name);
137
138
struct ExternalTxnApplyStateData {
139
  HybridTime commit_ht;
140
  IntraTxnWriteId write_id = 0;
141
142
0
  std::string ToString() const {
143
0
    return YB_STRUCT_TO_STRING(commit_ht, write_id);
144
0
  }
145
};
146
147
using ExternalTxnApplyState = std::map<TransactionId, ExternalTxnApplyStateData>;
148
149
// Adds external pair to write batch.
150
// Returns true if add was skipped because pair is a regular (non external) record.
151
bool AddExternalPairToWriteBatch(
152
    const KeyValuePairPB& kv_pair,
153
    HybridTime hybrid_time,
154
    int write_id,
155
    ExternalTxnApplyState* apply_external_transactions,
156
    rocksdb::WriteBatch* regular_write_batch,
157
    rocksdb::WriteBatch* intents_write_batch);
158
159
// Prepares external part of non transaction write batch.
160
// Batch could contain intents for external transactions, in this case those intents
161
// will be added to intents_write_batch.
162
//
163
// Returns true if batch contains regular entries.
164
bool PrepareExternalWriteBatch(
165
    const docdb::KeyValueWriteBatchPB& put_batch,
166
    HybridTime hybrid_time,
167
    rocksdb::DB* intents_db,
168
    rocksdb::WriteBatch* regular_write_batch,
169
    rocksdb::WriteBatch* intents_write_batch);
170
171
YB_STRONGLY_TYPED_BOOL(LastKey);
172
173
// Enumerates intents corresponding to provided key value pairs.
174
// For each key it generates a strong intent and for each parent of each it generates a weak one.
175
// functor should accept 3 arguments:
176
// intent_kind - kind of intent weak or strong
177
// value_slice - value of intent
178
// key - pointer to key in format of SubDocKey (no ht)
179
// last_key - whether it is last strong key in enumeration
180
181
// Indicates that the intent contains a full document key, i.e. it does not omit any final range
182
// components of the document key. This flag is also true for intents that include subdocument keys.
183
YB_STRONGLY_TYPED_BOOL(FullDocKey);
184
185
// TODO(dtxn) don't expose this method outside of DocDB if TransactionConflictResolver is moved
186
// inside DocDB.
187
// Note: From https://stackoverflow.com/a/17278470/461529:
188
// "As of GCC 4.8.1, the std::function in libstdc++ optimizes only for pointers to functions and
189
// methods. So regardless the size of your functor (lambdas included), initializing a std::function
190
// from it triggers heap allocation."
191
// So, we use boost::function which doesn't have such issue:
192
// http://www.boost.org/doc/libs/1_65_1/doc/html/function/misc.html
193
typedef boost::function<
194
    Status(IntentStrength, FullDocKey, Slice, KeyBytes*, LastKey)> EnumerateIntentsCallback;
195
196
CHECKED_STATUS EnumerateIntents(
197
    const google::protobuf::RepeatedPtrField<yb::docdb::KeyValuePairPB>& kv_pairs,
198
    const EnumerateIntentsCallback& functor, PartialRangeKeyIntents partial_range_key_intents);
199
200
CHECKED_STATUS EnumerateIntents(
201
    Slice key, const Slice& intent_value, const EnumerateIntentsCallback& functor,
202
    KeyBytes* encoded_key_buffer, PartialRangeKeyIntents partial_range_key_intents,
203
    LastKey last_key = LastKey::kFalse);
204
205
// replicated_batches_state format does not matter at this point, because it is just
206
// appended to appropriate value.
207
void PrepareTransactionWriteBatch(
208
    const docdb::KeyValueWriteBatchPB& put_batch,
209
    HybridTime hybrid_time,
210
    rocksdb::WriteBatch* rocksdb_write_batch,
211
    const TransactionId& transaction_id,
212
    IsolationLevel isolation_level,
213
    PartialRangeKeyIntents partial_range_key_intents,
214
    const Slice& replicated_batches_state,
215
    IntraTxnWriteId* write_id);
216
217
218
struct IntentKeyValueForCDC {
219
  Slice key;
220
  Slice value;
221
  std::string key_buf, value_buf;
222
  std::string reverse_index_key;
223
  IntraTxnWriteId write_id = 0;
224
225
  std::string ToString() const;
226
227
  template <class PB>
228
  void ToPB(PB* pb) const {
229
    pb->set_key(key);
230
    pb->set_value(value);
231
    pb->set_reverse_index_key(reverse_index_key);
232
    pb->set_write_id(write_id);
233
  }
234
235
  template <class PB>
236
  static IntentKeyValueForCDC FromPB(const PB& pb) {
237
    return IntentKeyValueForCDC {
238
        .key = pb.key(),
239
        .value = pb.value(),
240
        .reverse_index_key = pb.reverse_index_key(),
241
        .write_id = pb.write_id(),
242
    };
243
  }
244
};
245
246
// See ApplyTransactionStatePB for details.
247
struct ApplyTransactionState {
248
  std::string key;
249
  IntraTxnWriteId write_id = 0;
250
  AbortedSubTransactionSet aborted;
251
252
1.70M
  bool active() const {
253
1.70M
    return !key.empty();
254
1.70M
  }
255
256
  std::string ToString() const;
257
258
  template <class PB>
259
1
  void ToPB(PB* pb) const {
260
1
    pb->set_key(key);
261
1
    pb->set_write_id(write_id);
262
1
    aborted.ToPB(pb->mutable_aborted()->mutable_set());
263
1
  }
264
265
  template <class PB>
266
0
  static Result<ApplyTransactionState> FromPB(const PB& pb) {
267
0
    return ApplyTransactionState {
268
0
      .key = pb.key(),
269
0
      .write_id = pb.write_id(),
270
0
      .aborted = VERIFY_RESULT(AbortedSubTransactionSet::FromPB(pb.aborted().set())),
271
0
    };
272
0
  }
273
};
274
275
Result<ApplyTransactionState> GetIntentsBatch(
276
    const TransactionId& transaction_id,
277
    const KeyBounds* key_bounds,
278
    const ApplyTransactionState* stream_state,
279
    rocksdb::DB* intents_db,
280
    std::vector<IntentKeyValueForCDC>* keyValueIntents);
281
282
void AppendTransactionKeyPrefix(const TransactionId& transaction_id, docdb::KeyBytes* out);
283
284
// Class that is used while combining external intents into single key value pair.
285
class ExternalIntentsProvider {
286
 public:
287
  // Set output key.
288
  virtual void SetKey(const Slice& slice) = 0;
289
290
  // Set output value.
291
  virtual void SetValue(const Slice& slice) = 0;
292
293
  // Get next external intent, returns false when there are no more intents.
294
  virtual boost::optional<std::pair<Slice, Slice>> Next() = 0;
295
296
  virtual const Uuid& InvolvedTablet() = 0;
297
298
0
  virtual ~ExternalIntentsProvider() = default;
299
};
300
301
// Combine external intents into single key value pair.
302
void CombineExternalIntents(
303
    const TransactionId& txn_id,
304
    ExternalIntentsProvider* provider);
305
306
}  // namespace docdb
307
}  // namespace yb
308
309
#endif  // YB_DOCDB_DOCDB_H_