/Users/deen/code/yugabyte-db/src/yb/rocksdb/util/thread_local.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | |
24 | | #include "yb/rocksdb/util/thread_local.h" |
25 | | |
26 | | #include <algorithm> |
27 | | |
28 | | #include "yb/rocksdb/port/likely.h" |
29 | | #include "yb/rocksdb/util/mutexlock.h" |
30 | | |
31 | | namespace rocksdb { |
32 | | |
33 | | #if ROCKSDB_SUPPORT_THREAD_LOCAL |
34 | | __thread ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr; |
35 | | #endif |
36 | | |
37 | | // Windows doesn't support a per-thread destructor with its |
38 | | // TLS primitives. So, we build it manually by inserting a |
39 | | // function to be called on each thread's exit. |
40 | | // See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way |
41 | | // and http://www.nynaeve.net/?p=183 |
42 | | // |
43 | | // really we do this to have clear conscience since using TLS with thread-pools |
44 | | // is iffy |
45 | | // although OK within a request. But otherwise, threads have no identity in its |
46 | | // modern use. |
47 | | |
48 | | // This runs on windows only called from the System Loader |
49 | | #ifdef OS_WIN |
50 | | |
51 | | // Windows cleanup routine is invoked from a System Loader with a different |
52 | | // signature so we can not directly hookup the original OnThreadExit which is |
53 | | // private member |
54 | | // so we make StaticMeta class share with the us the address of the function so |
55 | | // we can invoke it. |
56 | | namespace wintlscleanup { |
57 | | |
58 | | // This is set to OnThreadExit in StaticMeta singleton constructor |
59 | | UnrefHandler thread_local_inclass_routine = nullptr; |
60 | | pthread_key_t thread_local_key = -1; |
61 | | |
62 | | // Static callback function to call with each thread termination. |
63 | | void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) { |
64 | | // We decided to punt on PROCESS_EXIT |
65 | | if (DLL_THREAD_DETACH == reason) { |
66 | | if (thread_local_key != -1 && thread_local_inclass_routine != nullptr) { |
67 | | void* tls = pthread_getspecific(thread_local_key); |
68 | | if (tls != nullptr) { |
69 | | thread_local_inclass_routine(tls); |
70 | | } |
71 | | } |
72 | | } |
73 | | } |
74 | | |
75 | | } // namespace wintlscleanup |
76 | | |
77 | | #ifdef _WIN64 |
78 | | |
79 | | #pragma comment(linker, "/include:_tls_used") |
80 | | #pragma comment(linker, "/include:p_thread_callback_on_exit") |
81 | | |
82 | | #else // _WIN64 |
83 | | |
84 | | #pragma comment(linker, "/INCLUDE:__tls_used") |
85 | | #pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit") |
86 | | |
87 | | #endif // _WIN64 |
88 | | |
89 | | // extern "C" suppresses C++ name mangling so we know the symbol name for the |
90 | | // linker /INCLUDE:symbol pragma above. |
91 | | extern "C" { |
92 | | |
93 | | // The linker must not discard thread_callback_on_exit. (We force a reference |
94 | | // to this variable with a linker /include:symbol pragma to ensure that.) If |
95 | | // this variable is discarded, the OnThreadExit function will never be called. |
96 | | #ifdef _WIN64 |
97 | | |
98 | | // .CRT section is merged with .rdata on x64 so it must be constant data. |
99 | | #pragma const_seg(".CRT$XLB") |
100 | | // When defining a const variable, it must have external linkage to be sure the |
101 | | // linker doesn't discard it. |
102 | | extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit; |
103 | | const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = |
104 | | wintlscleanup::WinOnThreadExit; |
105 | | // Reset the default section. |
106 | | #pragma const_seg() |
107 | | |
108 | | #else // _WIN64 |
109 | | |
110 | | #pragma data_seg(".CRT$XLB") |
111 | | PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit; |
112 | | // Reset the default section. |
113 | | #pragma data_seg() |
114 | | |
115 | | #endif // _WIN64 |
116 | | |
117 | | } // extern "C" |
118 | | |
119 | | #endif // OS_WIN |
120 | | |
121 | 9.20M | void ThreadLocalPtr::InitSingletons() { |
122 | 9.20M | ThreadLocalPtr::StaticMeta::InitSingletons(); |
123 | 9.20M | } |
124 | | |
125 | 108M | ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() { |
126 | | // Here we prefer function static variable instead of global |
127 | | // static variable as function static variable is initialized |
128 | | // when the function is first call. As a result, we can properly |
129 | | // control their construction order by properly preparing their |
130 | | // first function call. |
131 | | // |
132 | | // Note that here we decide to make "inst" a static pointer w/o deleting |
133 | | // it at the end instead of a static variable. This is to avoid the following |
134 | | // destruction order desester happens when a child thread using ThreadLocalPtr |
135 | | // dies AFTER the main thread dies: When a child thread happens to use |
136 | | // ThreadLocalPtr, it will try to delete its thread-local data on its |
137 | | // OnThreadExit when the child thread dies. However, OnThreadExit depends |
138 | | // on the following variable. As a result, if the main thread dies before any |
139 | | // child thread happen to use ThreadLocalPtr dies, then the destruction of |
140 | | // the following variable will go first, then OnThreadExit, therefore causing |
141 | | // invalid access. |
142 | | // |
143 | | // The above problem can be solved by using thread_local to store tls_ instead |
144 | | // of using __thread. The major difference between thread_local and __thread |
145 | | // is that thread_local supports dynamic construction and destruction of |
146 | | // non-primitive typed variables. As a result, we can guarantee the |
147 | | // desturction order even when the main thread dies before any child threads. |
148 | | // However, thread_local requires gcc 4.8 and is not supported in all the |
149 | | // compilers that accepts -std=c++11 (e.g., the default clang on Mac), while |
150 | | // the current RocksDB still accept gcc 4.7. |
151 | 108M | static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta(); |
152 | 108M | return inst; |
153 | 108M | } |
154 | | |
155 | 9.20M | void ThreadLocalPtr::StaticMeta::InitSingletons() { Mutex(); } |
156 | | |
157 | 13.1M | port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; } |
158 | | |
159 | 25.2k | void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) { |
160 | 25.2k | auto* tls = static_cast<ThreadData*>(ptr); |
161 | 25.2k | assert(tls != nullptr); |
162 | | |
163 | | // Use the cached StaticMeta::Instance() instead of directly calling |
164 | | // the variable inside StaticMeta::Instance() might already go out of |
165 | | // scope here in case this OnThreadExit is called after the main thread |
166 | | // dies. |
167 | 25.2k | auto* inst = tls->inst; |
168 | 25.2k | pthread_setspecific(inst->pthread_key_, nullptr); |
169 | | |
170 | 25.2k | MutexLock l(inst->MemberMutex()); |
171 | 25.2k | inst->RemoveThreadData(tls); |
172 | | // Unref stored pointers of current thread from all instances |
173 | 25.2k | uint32_t id = 0; |
174 | 1.01M | for (auto& e : tls->entries) { |
175 | 1.01M | void* raw = e.ptr.load(); |
176 | 1.01M | if (raw != nullptr) { |
177 | 7.65k | auto unref = inst->GetHandler(id); |
178 | 7.65k | if (unref != nullptr) { |
179 | 5.54k | unref(raw); |
180 | 5.54k | } |
181 | 7.65k | } |
182 | 1.01M | ++id; |
183 | 1.01M | } |
184 | | // Delete thread local structure no matter if it is Mac platform |
185 | 25.2k | delete tls; |
186 | 25.2k | } |
187 | | |
188 | 11.9k | ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0), head_(this) { |
189 | 11.9k | if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) { |
190 | 0 | abort(); |
191 | 0 | } |
192 | | |
193 | | // OnThreadExit is not getting called on the main thread. |
194 | | // Call through the static destructor mechanism to avoid memory leak. |
195 | | // |
196 | | // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global |
197 | | // singleton (destructors are invoked in reverse order of constructor |
198 | | // _completion_); the latter must not mutate internal members. This |
199 | | // cleanup mechanism inherently relies on use-after-release of the |
200 | | // StaticMeta, and is brittle with respect to compiler-specific handling |
201 | | // of memory backing destructed statically-scoped objects. Perhaps |
202 | | // registering with atexit(3) would be more robust. |
203 | | // |
204 | | // This is not required on Windows. |
205 | 11.9k | #if !defined(OS_WIN) |
206 | 11.9k | static struct A { |
207 | 1.27k | ~A() { |
208 | 1.27k | #if !(ROCKSDB_SUPPORT_THREAD_LOCAL) |
209 | 1.27k | ThreadData* tls_ = |
210 | 1.27k | static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_)); |
211 | 1.27k | #endif |
212 | 1.27k | if (tls_) { |
213 | 596 | OnThreadExit(tls_); |
214 | 596 | } |
215 | 1.27k | } |
216 | 11.9k | } a; |
217 | 11.9k | #endif // !defined(OS_WIN) |
218 | | |
219 | 11.9k | head_.next = &head_; |
220 | 11.9k | head_.prev = &head_; |
221 | | |
222 | | #ifdef OS_WIN |
223 | | // Share with Windows its cleanup routine and the key |
224 | | wintlscleanup::thread_local_inclass_routine = OnThreadExit; |
225 | | wintlscleanup::thread_local_key = pthread_key_; |
226 | | #endif |
227 | 11.9k | } |
228 | | |
229 | 60.4k | void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadLocalPtr::ThreadData* d) { |
230 | 60.4k | Mutex()->AssertHeld(); |
231 | 60.4k | d->next = &head_; |
232 | 60.4k | d->prev = head_.prev; |
233 | 60.4k | head_.prev->next = d; |
234 | 60.4k | head_.prev = d; |
235 | 60.4k | } |
236 | | |
237 | | void ThreadLocalPtr::StaticMeta::RemoveThreadData( |
238 | 25.2k | ThreadLocalPtr::ThreadData* d) { |
239 | 25.2k | Mutex()->AssertHeld(); |
240 | 25.2k | d->next->prev = d->prev; |
241 | 25.2k | d->prev->next = d->next; |
242 | 25.2k | d->next = d->prev = d; |
243 | 25.2k | } |
244 | | |
245 | 46.6M | ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() { |
246 | 46.6M | #if !(ROCKSDB_SUPPORT_THREAD_LOCAL) |
247 | | // Make this local variable name look like a member variable so that we |
248 | | // can share all the code below |
249 | 46.6M | ThreadData* tls_ = |
250 | 46.6M | static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_)); |
251 | 46.6M | #endif |
252 | | |
253 | 46.6M | if (UNLIKELY(tls_ == nullptr)) { |
254 | 60.3k | auto* inst = Instance(); |
255 | 60.3k | tls_ = new ThreadData(inst); |
256 | 60.3k | { |
257 | | // Register it in the global chain, needs to be done before thread exit |
258 | | // handler registration |
259 | 60.3k | MutexLock l(Mutex()); |
260 | 60.3k | inst->AddThreadData(tls_); |
261 | 60.3k | } |
262 | | // Even it is not OS_MACOSX, need to register value for pthread_key_ so that |
263 | | // its exit handler will be triggered. |
264 | 60.3k | if (pthread_setspecific(inst->pthread_key_, tls_) != 0) { |
265 | 0 | { |
266 | 0 | MutexLock l(Mutex()); |
267 | 0 | inst->RemoveThreadData(tls_); |
268 | 0 | } |
269 | 0 | delete tls_; |
270 | 0 | abort(); |
271 | 0 | } |
272 | 46.6M | } |
273 | 46.6M | return tls_; |
274 | 46.6M | } |
275 | | |
276 | 2.64M | void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const { |
277 | 2.64M | auto* tls = GetThreadLocal(); |
278 | 2.64M | if (UNLIKELY(id >= tls->entries.size())) { |
279 | 3.67k | return nullptr; |
280 | 3.67k | } |
281 | 2.64M | return tls->entries[id].ptr.load(std::memory_order_acquire); |
282 | 2.64M | } |
283 | | |
284 | 2.30M | void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) { |
285 | 2.30M | auto* tls = GetThreadLocal(); |
286 | 2.30M | if (UNLIKELY(id >= tls->entries.size())) { |
287 | | // Need mutex to protect entries access within ReclaimId |
288 | 3.38k | MutexLock l(Mutex()); |
289 | 3.38k | tls->entries.resize(id + 1); |
290 | 3.38k | } |
291 | 2.30M | tls->entries[id].ptr.store(ptr, std::memory_order_release); |
292 | 2.30M | } |
293 | | |
294 | 21.8M | void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) { |
295 | 21.8M | auto* tls = GetThreadLocal(); |
296 | 21.8M | if (UNLIKELY(id >= tls->entries.size())) { |
297 | | // Need mutex to protect entries access within ReclaimId |
298 | 117k | MutexLock l(Mutex()); |
299 | 117k | tls->entries.resize(id + 1); |
300 | 117k | } |
301 | 21.8M | return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire); |
302 | 21.8M | } |
303 | | |
304 | | bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr, |
305 | 21.8M | void*& expected) { |
306 | 21.8M | auto* tls = GetThreadLocal(); |
307 | 21.8M | if (UNLIKELY(id >= tls->entries.size())) { |
308 | | // Need mutex to protect entries access within ReclaimId |
309 | 0 | MutexLock l(Mutex()); |
310 | 0 | tls->entries.resize(id + 1); |
311 | 0 | } |
312 | 21.8M | return tls->entries[id].ptr.compare_exchange_strong( |
313 | 21.8M | expected, ptr, std::memory_order_release, std::memory_order_relaxed); |
314 | 21.8M | } |
315 | | |
316 | | void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs, |
317 | 1.00M | void* const replacement) { |
318 | 1.00M | MutexLock l(Mutex()); |
319 | 39.3M | for (ThreadData* t = head_.next; t != &head_; t = t->next) { |
320 | 38.3M | if (id < t->entries.size()) { |
321 | 31.6M | void* ptr = |
322 | 31.6M | t->entries[id].ptr.exchange(replacement, std::memory_order_acquire); |
323 | 31.6M | if (ptr != nullptr) { |
324 | 71.2k | ptrs->push_back(ptr); |
325 | 71.2k | } |
326 | 31.6M | } |
327 | 38.3M | } |
328 | 1.00M | } |
329 | | |
330 | 690k | void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) { |
331 | 690k | MutexLock l(Mutex()); |
332 | 690k | handler_map_[id] = handler; |
333 | 690k | } |
334 | | |
335 | 661k | UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) { |
336 | 661k | Mutex()->AssertHeld(); |
337 | 661k | auto iter = handler_map_.find(id); |
338 | 661k | if (iter == handler_map_.end()) { |
339 | 4 | return nullptr; |
340 | 4 | } |
341 | 661k | return iter->second; |
342 | 661k | } |
343 | | |
344 | 690k | uint32_t ThreadLocalPtr::StaticMeta::GetId() { |
345 | 690k | MutexLock l(Mutex()); |
346 | 690k | if (free_instance_ids_.empty()) { |
347 | 134k | return next_instance_id_++; |
348 | 134k | } |
349 | | |
350 | 555k | uint32_t id = free_instance_ids_.back(); |
351 | 555k | free_instance_ids_.pop_back(); |
352 | 555k | return id; |
353 | 555k | } |
354 | | |
355 | 2.06k | uint32_t ThreadLocalPtr::StaticMeta::PeekId() const { |
356 | 2.06k | MutexLock l(Mutex()); |
357 | 2.06k | if (!free_instance_ids_.empty()) { |
358 | 2.05k | return free_instance_ids_.back(); |
359 | 2.05k | } |
360 | 6 | return next_instance_id_; |
361 | 6 | } |
362 | | |
363 | 3 | std::vector<uint32_t> ThreadLocalPtr::StaticMeta::PeekIds() const { |
364 | 3 | MutexLock l(Mutex()); |
365 | 3 | return std::vector<uint32_t>(free_instance_ids_.begin(), free_instance_ids_.end()); |
366 | 3 | } |
367 | | |
368 | 652k | void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) { |
369 | | // This id is not used, go through all thread local data and release |
370 | | // corresponding value |
371 | 652k | MutexLock l(Mutex()); |
372 | 652k | auto unref = GetHandler(id); |
373 | 27.5M | for (ThreadData* t = head_.next; t != &head_; t = t->next) { |
374 | 26.8M | if (id < t->entries.size()) { |
375 | 22.2M | void* ptr = t->entries[id].ptr.exchange(nullptr); |
376 | 22.2M | if (ptr != nullptr && unref != nullptr) { |
377 | 155k | unref(ptr); |
378 | 155k | } |
379 | 22.2M | } |
380 | 26.8M | } |
381 | 652k | handler_map_[id] = nullptr; |
382 | 652k | free_instance_ids_.push_back(id); |
383 | 652k | } |
384 | | |
385 | | ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler) |
386 | 690k | : id_(Instance()->GetId()) { |
387 | 690k | if (handler != nullptr) { |
388 | 690k | Instance()->SetHandler(id_, handler); |
389 | 690k | } |
390 | 690k | } |
391 | | |
392 | 653k | ThreadLocalPtr::~ThreadLocalPtr() { |
393 | 653k | Instance()->ReclaimId(id_); |
394 | 653k | } |
395 | | |
396 | 2.84M | void* ThreadLocalPtr::Get() const { |
397 | 2.84M | return Instance()->Get(id_); |
398 | 2.84M | } |
399 | | |
400 | 2.09M | void ThreadLocalPtr::Reset(void* ptr) { |
401 | 2.09M | Instance()->Reset(id_, ptr); |
402 | 2.09M | } |
403 | | |
404 | 21.9M | void* ThreadLocalPtr::Swap(void* ptr) { |
405 | 21.9M | return Instance()->Swap(id_, ptr); |
406 | 21.9M | } |
407 | | |
408 | 21.9M | bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) { |
409 | 21.9M | return Instance()->CompareAndSwap(id_, ptr, expected); |
410 | 21.9M | } |
411 | | |
412 | 1.00M | void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) { |
413 | 1.00M | Instance()->Scrape(id_, ptrs, replacement); |
414 | 1.00M | } |
415 | | |
416 | | } // namespace rocksdb |