YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/util/spinlock_profiling.cc
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
33
#include "yb/util/spinlock_profiling.h"
34
35
#include <functional>
36
#include <string>
37
#include <vector>
38
39
#include <gflags/gflags.h>
40
#include <glog/logging.h>
41
42
#include "yb/gutil/atomicops.h"
43
#include "yb/gutil/bind.h"
44
#include "yb/gutil/macros.h"
45
#include "yb/gutil/once.h"
46
#include "yb/gutil/spinlock.h"
47
#include "yb/gutil/strings/fastmem.h"
48
#include "yb/gutil/strings/human_readable.h"
49
#include "yb/gutil/sysinfo.h"
50
51
#include "yb/util/enums.h"
52
#include "yb/util/flag_tags.h"
53
#include "yb/util/metrics.h"
54
#include "yb/util/slice.h"
55
#include "yb/util/stack_trace.h"
56
#include "yb/util/striped64.h"
57
#include "yb/util/trace.h"
58
59
DEFINE_int32(lock_contention_trace_threshold_cycles,
60
             2000000, // 2M cycles should be about 1ms
61
             "If acquiring a spinlock takes more than this number of "
62
             "cycles, and a Trace is currently active, then the current "
63
             "stack trace is logged to the trace buffer.");
64
TAG_FLAG(lock_contention_trace_threshold_cycles, hidden);
65
66
METRIC_DEFINE_gauge_uint64(server, spinlock_contention_time,
67
    "Spinlock Contention Time", yb::MetricUnit::kMicroseconds,
68
    "Amount of time consumed by contention on internal spinlocks since the server "
69
    "started. If this increases rapidly, it may indicate a performance issue in YB "
70
    "internals triggered by a particular workload and warrant investigation.",
71
    yb::EXPOSE_AS_COUNTER);
72
73
using base::SpinLockHolder;
74
75
namespace yb {
76
77
static const double kMicrosPerSecond = 1000000.0;
78
79
static LongAdder* g_contended_cycles = nullptr;
80
81
namespace {
82
83
// Implements a very simple linear-probing hashtable of stack traces with
84
// a fixed number of entries.
85
//
86
// Threads experiencing contention record their stacks into this hashtable,
87
// or increment an already-existing entry. Each entry has its own lock,
88
// but we can "skip" an entry under contention, and spread out a single stack
89
// into multiple buckets if necessary.
90
//
91
// A thread collecting a profile collects stack traces out of the hash table
92
// and resets the counts to 0 as they are collected.
93
class ContentionStacks {
94
 public:
95
  ContentionStacks()
96
16.9k
    : dropped_samples_(0) {
97
16.9k
  }
98
99
  // Add a stack trace to the table.
100
  void AddStack(const StackTrace& s, int64_t cycles);
101
102
  // Flush stacks from the buffer to 'out'. See the docs for FlushSynchronizationProfile()
103
  // in spinlock_profiling.h for details on format.
104
  //
105
  // On return, guarantees that any stack traces that were present at the beginning of
106
  // the call have been flushed. However, new stacks can be added concurrently with this call.
107
  void Flush(std::stringstream* out, int64_t* dropped);
108
109
 private:
110
111
  // Collect the next sample from the underlying buffer, and set it back to 0 count
112
  // (thus marking it as "empty").
113
  //
114
  // 'iterator' serves as a way to keep track of the current position in the buffer.
115
  // Callers should initially set it to 0, and then pass the same pointer to each
116
  // call to CollectSample. This serves to loop through the collected samples.
117
  bool CollectSample(uint64_t* iterator, StackTrace* s, int64_t* trip_count, int64_t* cycles);
118
119
  // Hashtable entry.
120
  struct Entry {
121
    Entry() : trip_count(0),
122
17.3M
              cycle_count(0) {
123
17.3M
    }
124
125
    // Protects all other entries.
126
    base::SpinLock lock;
127
128
    // The number of times we've experienced contention with a stack trace equal
129
    // to 'trace'.
130
    //
131
    // If this is 0, then the entry is "unclaimed" and the other fields are not
132
    // considered valid.
133
    int64_t trip_count;
134
135
    // The total number of cycles spent waiting at this stack trace.
136
    int64_t cycle_count;
137
138
    // A cached hashcode of the trace.
139
    uint64_t hash;
140
141
    // The actual stack trace.
142
    StackTrace trace;
143
  };
144
145
  enum {
146
    kNumEntries = 1024,
147
    kNumLinearProbeAttempts = 4
148
  };
149
  Entry entries_[kNumEntries];
150
151
  // The number of samples which were dropped due to contention on this structure or
152
  // due to the hashtable being too full.
153
  AtomicInt<int64_t> dropped_samples_;
154
};
155
156
Atomic32 g_profiling_enabled = 0;
157
ContentionStacks* g_contention_stacks = nullptr;
158
159
1
void ContentionStacks::AddStack(const StackTrace& s, int64_t cycles) {
160
1
  uint64_t hash = s.HashCode();
161
162
  // Linear probe up to 4 attempts before giving up
163
1
  for (int i = 0; i < kNumLinearProbeAttempts; 
i++0
) {
164
1
    Entry* e = &entries_[(hash + i) % kNumEntries];
165
1
    if (!e->lock.TryLock()) {
166
      // If we fail to lock it, we can safely just use a different slot.
167
      // It's OK if a single stack shows up multiple times, because pprof
168
      // aggregates them in the end anyway.
169
0
      continue;
170
0
    }
171
172
1
    if (e->trip_count == 0) {
173
      // It's an un-claimed slot. Claim it.
174
1
      e->hash = hash;
175
1
      e->trace = s;
176
1
    } else 
if (0
e->hash != hash0
||
e->trace != s0
) {
177
      // It's claimed by a different stack trace.
178
0
      e->lock.Unlock();
179
0
      continue;
180
0
    }
181
182
    // Contribute to the stats for this stack.
183
1
    e->cycle_count += cycles;
184
1
    e->trip_count++;
185
1
    e->lock.Unlock();
186
1
    return;
187
1
  }
188
189
  // If we failed to find a matching hashtable slot, or we hit lock contention
190
  // trying to record our sample, add it to the dropped sample count.
191
0
  dropped_samples_.Increment();
192
0
}
193
194
2
void ContentionStacks::Flush(std::stringstream* out, int64_t* dropped) {
195
2
  uint64_t iterator = 0;
196
2
  StackTrace t;
197
2
  int64_t cycles;
198
2
  int64_t count;
199
2
  *out << "Format: Cycles\tCount @ Call Stack" << std::endl;
200
3
  while (g_contention_stacks->CollectSample(&iterator, &t, &count, &cycles)) {
201
1
    *out << cycles << "\t" << count
202
1
         << " @ " << t.ToHexString(StackTrace::NO_FIX_CALLER_ADDRESSES)
203
1
         << "\n" << t.Symbolize()
204
1
         << "\n-----------"
205
1
         << std::endl;
206
1
  }
207
208
2
  *dropped += dropped_samples_.Exchange(0);
209
2
}
210
211
bool ContentionStacks::CollectSample(uint64_t* iterator, StackTrace* s, int64_t* trip_count,
212
3
                                     int64_t* cycles) {
213
2.05k
  while (*iterator < kNumEntries) {
214
2.04k
    Entry* e = &entries_[(*iterator)++];
215
2.04k
    SpinLockHolder l(&e->lock);
216
2.04k
    if (e->trip_count == 0) 
continue2.04k
;
217
218
1
    *trip_count = e->trip_count;
219
1
    *cycles = e->cycle_count;
220
1
    *s = e->trace;
221
222
1
    e->trip_count = 0;
223
1
    e->cycle_count = 0;
224
1
    return true;
225
2.04k
  }
226
227
  // Looped through the whole array and found nothing.
228
2
  return false;
229
3
}
230
231
// Disable TSAN on this function.
232
// https://yugabyte.atlassian.net/browse/ENG-354
233
ATTRIBUTE_NO_SANITIZE_THREAD
234
11.6M
void SubmitSpinLockProfileData(const void *contendedlock, int64 wait_cycles) {
235
11.6M
  bool profiling_enabled = base::subtle::Acquire_Load(&g_profiling_enabled);
236
11.6M
  bool long_wait_time = wait_cycles > FLAGS_lock_contention_trace_threshold_cycles;
237
  // Short circuit this function quickly in the common case.
238
11.6M
  if (PREDICT_TRUE(!profiling_enabled && !long_wait_time)) {
239
11.6M
    return;
240
11.6M
  }
241
242
3.27k
  static __thread bool in_func = false;
243
3.27k
  if (in_func) 
return0
; // non-re-entrant
244
3.27k
  in_func = true;
245
246
3.27k
  StackTrace stack;
247
3.27k
  stack.Collect();
248
249
3.27k
  if (profiling_enabled) {
250
1
    DCHECK_NOTNULL(g_contention_stacks)->AddStack(stack, wait_cycles);
251
1
  }
252
253
3.85k
  if (
PREDICT_FALSE3.27k
(long_wait_time)) {
254
3.85k
    Trace* t = Trace::CurrentTrace();
255
3.85k
    if (t) {
256
351
      double seconds = static_cast<double>(wait_cycles) / base::CyclesPerSecond();
257
351
      char backtrace_buffer[1024];
258
351
      stack.StringifyToHex(backtrace_buffer, arraysize(backtrace_buffer));
259
351
      TRACE_TO(t, "Waited $0 on lock $1. stack: $2",
260
351
               HumanReadableElapsedTime::ToShortString(seconds), contendedlock,
261
351
               backtrace_buffer);
262
351
    }
263
3.85k
  }
264
265
3.27k
  LongAdder* la = reinterpret_cast<LongAdder*>(
266
3.27k
      base::subtle::Acquire_Load(reinterpret_cast<AtomicWord*>(&g_contended_cycles)));
267
3.82k
  if (
la3.27k
) {
268
3.82k
    la->IncrementBy(wait_cycles);
269
3.82k
  }
270
271
3.27k
  in_func = false;
272
3.27k
}
273
274
16.9k
void DoInit() {
275
16.9k
  base::subtle::Release_Store(reinterpret_cast<AtomicWord*>(&g_contention_stacks),
276
16.9k
                              reinterpret_cast<uintptr_t>(new ContentionStacks()));
277
16.9k
  base::subtle::Release_Store(reinterpret_cast<AtomicWord*>(&g_contended_cycles),
278
16.9k
                              reinterpret_cast<uintptr_t>(new LongAdder()));
279
16.9k
}
280
281
} // anonymous namespace
282
283
55.2k
void InitSpinLockContentionProfiling() {
284
55.2k
  static GoogleOnceType once = GOOGLE_ONCE_INIT;
285
55.2k
  GoogleOnceInit(&once, DoInit);
286
55.2k
}
287
288
289
26.3k
void RegisterSpinLockContentionMetrics(const scoped_refptr<MetricEntity>& entity) {
290
26.3k
  InitSpinLockContentionProfiling();
291
26.3k
  entity->NeverRetire(
292
26.3k
      METRIC_spinlock_contention_time.InstantiateFunctionGauge(
293
26.3k
          entity, Bind(&GetSpinLockContentionMicros)));
294
295
26.3k
}
296
297
15.3k
uint64_t GetSpinLockContentionMicros() {
298
15.3k
  int64_t wait_cycles = DCHECK_NOTNULL(g_contended_cycles)->Value();
299
15.3k
  double micros = static_cast<double>(wait_cycles) / base::CyclesPerSecond()
300
15.3k
    * kMicrosPerSecond;
301
15.3k
  return implicit_cast<int64_t>(micros);
302
15.3k
}
303
304
2
void StartSynchronizationProfiling() {
305
2
  InitSpinLockContentionProfiling();
306
2
  base::subtle::Barrier_AtomicIncrement(&g_profiling_enabled, 1);
307
2
}
308
309
void FlushSynchronizationProfile(std::stringstream* out,
310
2
                                 int64_t* drop_count) {
311
2
  CHECK_NOTNULL(g_contention_stacks)->Flush(out, drop_count);
312
2
}
313
314
2
void StopSynchronizationProfiling() {
315
2
  InitSpinLockContentionProfiling();
316
2
  CHECK_GE(base::subtle::Barrier_AtomicIncrement(&g_profiling_enabled, -1), 0);
317
2
}
318
319
} // namespace yb
320
321
// The hook expected by gutil is in the gutil namespace. Simply forward into the
322
// yb namespace so we don't need to qualify everything.
323
namespace gutil {
324
11.6M
void SubmitSpinLockProfileData(const void *contendedlock, int64 wait_cycles) {
325
11.6M
  yb::SubmitSpinLockProfileData(contendedlock, wait_cycles);
326
11.6M
}
327
} // namespace gutil