YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/docdb/compaction_file_filter.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/compaction_file_filter.h"
15
16
#include <algorithm>
17
18
#include "yb/common/hybrid_time.h"
19
20
#include "yb/docdb/consensus_frontier.h"
21
#include "yb/docdb/doc_ttl_util.h"
22
#include "yb/docdb/docdb_compaction_filter.h"
23
24
#include "yb/gutil/casts.h"
25
26
#include "yb/rocksdb/compaction_filter.h"
27
#include "yb/rocksdb/db/version_edit.h"
28
29
#include "yb/util/flag_tags.h"
30
31
DEFINE_bool(file_expiration_ignore_value_ttl, false,
32
             "When deciding whether a file has expired, assume that it is safe to ignore "
33
             "value-level TTL and expire based on table TTL only. CAUTION - Shoule only be "
34
             "used for expiration of older SST files without value-level TTL metadata, or "
35
             "for expiring files with incorrect value-level expiration. Misuse can result "
36
             "in the deletion of live data!");
37
TAG_FLAG(file_expiration_ignore_value_ttl, unsafe);
38
TAG_FLAG(file_expiration_ignore_value_ttl, runtime);
39
40
DEFINE_bool(file_expiration_value_ttl_overrides_table_ttl, false,
41
            "When deciding whether a file has expired, assume that any file with "
42
            "value-level TTL metadata can be expired solely on that metadata. Useful for "
43
            "the expiration of files earlier than the table-level TTL that is set. "
44
            "CAUTION - Should only be used in workloads where the user is certain all data is "
45
            "written with a value-level TTL. Misuse can result in the deletion of live data!");
46
TAG_FLAG(file_expiration_value_ttl_overrides_table_ttl, unsafe);
47
TAG_FLAG(file_expiration_value_ttl_overrides_table_ttl, runtime);
48
49
namespace yb {
50
namespace docdb {
51
52
using rocksdb::CompactionFileFilter;
53
using rocksdb::FileMetaData;
54
using rocksdb::FilterDecision;
55
using std::unique_ptr;
56
using std::vector;
57
58
namespace {
59
0
  const ExpiryMode CurrentExpiryMode() {
60
0
    if (FLAGS_file_expiration_ignore_value_ttl) {
61
0
      return EXP_TABLE_ONLY;
62
0
    } else if (FLAGS_file_expiration_value_ttl_overrides_table_ttl) {
63
0
      return EXP_TRUST_VALUE;
64
0
    }
65
0
    return EXP_NORMAL;
66
0
  }
67
}
68
69
0
ExpirationTime ExtractExpirationTime(const FileMetaData* file) {
70
  // If no frontier detected, return an expiration time that will not expire.
71
0
  if (!file || !file->largest.user_frontier) {
72
0
      return ExpirationTime{};
73
0
  }
74
0
  auto& consensus_frontier = down_cast<ConsensusFrontier&>(*file->largest.user_frontier);
75
  // If the TTL expiration time is uninitialized, return a max expiration time with the
76
  // frontier's hybrid time.
77
0
  const auto ttl_expiry_ht =
78
0
      consensus_frontier.max_value_level_ttl_expiration_time().GetValueOr(kNoExpiration);
79
80
0
  return ExpirationTime{
81
0
    .ttl_expiration_ht = ttl_expiry_ht,
82
0
    .created_ht = consensus_frontier.hybrid_time()
83
0
  };
84
0
}
85
86
bool TtlIsExpired(const ExpirationTime expiry,
87
    const MonoDelta table_ttl,
88
    const HybridTime now,
89
0
    const ExpiryMode mode) {
90
  // If FLAGS_file_expiration_ignore_value_ttl is set, ignore the value level TTL
91
  // entirely and use only the default table TTL.
92
0
  const auto ttl_expiry_ht =
93
0
      mode == EXP_TABLE_ONLY ? kUseDefaultTTL : expiry.ttl_expiration_ht;
94
95
0
  if (mode == EXP_TRUST_VALUE && ttl_expiry_ht.is_valid() && ttl_expiry_ht != kUseDefaultTTL) {
96
0
    return HasExpiredTTL(ttl_expiry_ht, now);
97
0
  }
98
99
0
  auto file_expiry_ht = MaxExpirationFromValueAndTableTTL(
100
0
      expiry.created_ht, table_ttl, ttl_expiry_ht);
101
0
  return HasExpiredTTL(file_expiry_ht, now);
102
0
}
103
104
0
bool IsLastKeyCreatedBeforeHistoryCutoff(ExpirationTime expiry, HybridTime history_cutoff) {
105
0
  return expiry.created_ht < history_cutoff;
106
0
}
107
108
0
FilterDecision DocDBCompactionFileFilter::Filter(const FileMetaData* file) {
109
  // Filtering a file based on TTL expiration needs to be done from the oldest files to
110
  // the newest in order to prevent conflicts with tombstoned values that have expired,
111
  // but are referenced in later files or later versions. If any file is "kept" by
112
  // the file_filter, then we need to stop filtering files at that point.
113
  //
114
  // max_ht_to_expire_ indicates the expiration cutoff as determined when the filter was created.
115
  // history_cutoff_ indicates the timestamp after which it is unsafe to delete data.
116
  // table_ttl_ indicates the current default_time_to_live for the table.
117
  // filter_ht_ indicates the timestamp at which the filter was created.
118
119
0
  auto expiry = ExtractExpirationTime(file);
120
  // If the created HT is less than the max to expire, then we're clear to expire the file.
121
0
  if (expiry.created_ht < max_ht_to_expire_) {
122
    // Sanity checks to ensure that we don't accidentally expire a file that should be kept.
123
    // These paths should never be taken.
124
0
    if (!IsLastKeyCreatedBeforeHistoryCutoff(expiry, history_cutoff_)) {
125
0
      LOG(DFATAL) << "Attempted to discard a file that has not exceeded its "
126
0
          << "history cutoff: "
127
0
          << " filter: " << ToString()
128
0
          << " file: " << file->ToString();
129
0
      return FilterDecision::kKeep;
130
0
    } else if (!TtlIsExpired(expiry, table_ttl_, filter_ht_, mode_)) {
131
0
      LOG(DFATAL) << "Attempted to discard a file that has not expired: "
132
0
          << " filter: " << ToString()
133
0
          << " file: " << file->ToString();
134
0
      return FilterDecision::kKeep;
135
0
    }
136
0
    VLOG(2) << "Filtering file, TTL expired: "
137
0
        << " filter: " << ToString()
138
0
        << " file: " << file->ToString();
139
0
    return FilterDecision::kDiscard;
140
0
  } else {
141
0
    VLOG(3) << "Keeping file, has a key HybridTime greater than the max to expire ("
142
0
        << max_ht_to_expire_ << "): "
143
0
        << " filter: " << ToString()
144
0
        << " file: " << file->ToString();
145
0
    return FilterDecision::kKeep;
146
0
  }
147
0
}
148
149
0
std::string DocDBCompactionFileFilter::ToString() const {
150
0
  return YB_CLASS_TO_STRING(table_ttl, history_cutoff, max_ht_to_expire, filter_ht);
151
0
}
152
153
0
const char* DocDBCompactionFileFilter::Name() const {
154
0
  return "DocDBCompactionFileFilter";
155
0
}
156
157
unique_ptr<CompactionFileFilter> DocDBCompactionFileFilterFactory::CreateCompactionFileFilter(
158
0
    const vector<FileMetaData*>& input_files) {
159
0
  const HybridTime filter_ht = clock_->Now();
160
0
  auto history_retention = retention_policy_->GetRetentionDirective();
161
0
  MonoDelta table_ttl = history_retention.table_ttl;
162
0
  HybridTime history_cutoff = history_retention.history_cutoff;
163
0
  HybridTime min_kept_ht = HybridTime::kMax;
164
0
  const ExpiryMode mode = CurrentExpiryMode();
165
166
  // Need to iterate through all files and determine the minimum HybridTime of a file that
167
  // will *not* be expired. This will prevent us from expiring a file prematurely and accidentally
168
  // exposing old data.
169
0
  for (auto file : input_files) {
170
0
    auto expiry = ExtractExpirationTime(file);
171
0
    auto format_expiration_details = [expiry, table_ttl, mode, history_cutoff, file]() {
172
0
      return Format("file expiration info: $0, table ttl: $1,"
173
0
          " mode: $2, history_cutoff: $3, file: $4",
174
0
          expiry, table_ttl, mode, history_cutoff, file);
175
0
    };
176
177
    // A file is *not* expired if either A) its latest table TTL/value TTL time has not expired,
178
    // or B) its latest key is still within the history retention window.
179
0
    if (!TtlIsExpired(expiry, table_ttl, filter_ht, mode) ||
180
0
        !IsLastKeyCreatedBeforeHistoryCutoff(expiry, history_cutoff)) {
181
0
      VLOG(4) << "File is not expired or contains data created after history cutoff time, "
182
0
          << "updating minimum HybridTime for filter: " << format_expiration_details();
183
0
      min_kept_ht = min_kept_ht < expiry.created_ht ? min_kept_ht : expiry.created_ht;
184
0
    } else {
185
0
      VLOG(4) << "File is expired (may or may not be filtered during compaction): "
186
0
          << format_expiration_details();
187
0
    }
188
0
  }
189
0
  return std::make_unique<DocDBCompactionFileFilter>(
190
0
      table_ttl, history_cutoff, min_kept_ht, filter_ht, mode);
191
0
}
192
193
0
const char* DocDBCompactionFileFilterFactory::Name() const {
194
0
  return "DocDBCompactionFileFilterFactory";
195
0
}
196
197
0
std::string ExpirationTime::ToString() const {
198
0
  return YB_STRUCT_TO_STRING(ttl_expiration_ht, created_ht);
199
0
}
200
201
0
bool operator==(const ExpirationTime& lhs, const ExpirationTime& rhs) {
202
0
  return YB_STRUCT_EQUALS(ttl_expiration_ht, created_ht);
203
0
}
204
205
}  // namespace docdb
206
}  // namespace yb