/Users/deen/code/yugabyte-db/src/yb/util/striped64.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #ifndef YB_UTIL_STRIPED64_H_ |
34 | | #define YB_UTIL_STRIPED64_H_ |
35 | | |
36 | | #include "yb/gutil/port.h" |
37 | | #include "yb/util/atomic.h" |
38 | | #include "yb/util/threadlocal.h" |
39 | | |
40 | | namespace yb { |
41 | | |
42 | | class Striped64; |
43 | | |
44 | | namespace striped64 { |
45 | | namespace internal { |
46 | | |
47 | | struct HashCode { |
48 | | public: |
49 | | HashCode(); |
50 | | uint64_t code_; |
51 | | }; |
52 | | |
53 | | #define ATOMIC_INT_SIZE sizeof(AtomicInt<int64_t>) |
54 | | // Padded POD container for AtomicInt. This prevents false sharing of cache lines. |
55 | | class Cell { |
56 | | public: |
57 | | Cell(); |
58 | | |
59 | | Cell(const Cell&) = delete; |
60 | | void operator=(const Cell&) = delete; |
61 | | |
62 | 999M | inline bool CompareAndSet(int64_t cmp, int64_t value) { |
63 | 999M | return value_.CompareAndSet(cmp, value); |
64 | 999M | } |
65 | | |
66 | | // Padding advice from Herb Sutter: |
67 | | // http://www.drdobbs.com/parallel/eliminate-false-sharing/217500206?pgno=4 |
68 | | AtomicInt<int64_t> value_; |
69 | | char pad[CACHELINE_SIZE > ATOMIC_INT_SIZE ? |
70 | | CACHELINE_SIZE - ATOMIC_INT_SIZE : 1]; |
71 | | } CACHELINE_ALIGNED; |
72 | | #undef ATOMIC_INT_SIZE |
73 | | |
74 | | } // namespace internal |
75 | | } // namespace striped64 |
76 | | |
77 | | // This set of classes is heavily derived from JSR166e, released into the public domain |
78 | | // by Doug Lea and the other authors. |
79 | | // |
80 | | // See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/Striped64.java?view=co |
81 | | // See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/LongAdder.java?view=co |
82 | | // |
83 | | // The Striped64 and LongAdder implementations here are simplified versions of what's present in |
84 | | // JSR166e. However, the core ideas remain the same. |
85 | | // |
86 | | // Updating a single AtomicInteger in a multi-threaded environment can be quite slow: |
87 | | // |
88 | | // 1. False sharing of cache lines with other counters. |
89 | | // 2. Cache line bouncing from high update rates, especially with many cores. |
90 | | // |
91 | | // These two problems are addressed by Striped64. When there is no contention, it uses CAS on a |
92 | | // single base counter to store updates. However, when Striped64 detects contention |
93 | | // (via a failed CAS operation), it will allocate a small, fixed size hashtable of Cells. |
94 | | // A Cell is a simple POD that pads out an AtomicInt to 64 bytes to prevent |
95 | | // sharing a cache line. |
96 | | // |
97 | | // Reading the value of a Striped64 requires traversing the hashtable to calculate the true sum. |
98 | | // |
99 | | // Each updating thread uses a thread-local hashcode to determine its Cell in the hashtable. |
100 | | // If a thread fails to CAS its hashed Cell, it will do a lightweight rehash operation to try |
101 | | // and find an uncontended bucket. Because the hashcode is thread-local, this rehash affects all |
102 | | // Striped64's accessed by the thread. This is good, since contention on one Striped64 is |
103 | | // indicative of contention elsewhere too. |
104 | | // |
105 | | // The hashtable is statically sized to the nearest power of 2 greater than or equal to the |
106 | | // number of CPUs. This is sufficient, since this guarantees the existence of a perfect hash |
107 | | // function. Due to the random rehashing, the threads should eventually converge to this function. |
108 | | // In practice, this scheme has shown to be sufficient. |
109 | | // |
110 | | // The biggest simplification of this implementation compared to JSR166e is that we do not |
111 | | // dynamically grow the table, instead immediately allocating it to the full size. |
112 | | // We also do not lazily allocate each Cell, instead allocating the entire array at once. |
113 | | // This means we waste some additional memory in low contention scenarios, and initial allocation |
114 | | // will also be slower. Some of the micro-optimizations were also elided for readability. |
115 | | class Striped64 { |
116 | | public: |
117 | | Striped64(); |
118 | | virtual ~Striped64(); |
119 | | |
120 | | protected: |
121 | | |
122 | | enum Rehash { |
123 | | kRehash, |
124 | | kNoRehash |
125 | | }; |
126 | | |
127 | | // CAS the base field. |
128 | 3.18k | bool CasBase(int64_t cmp, int64_t val) { return base_.CompareAndSet(cmp, val); } |
129 | | |
130 | | // CAS the busy field from 0 to 1 to acquire the lock. |
131 | 119k | bool CasBusy() { return busy_.CompareAndSet(0, 1); } |
132 | | |
133 | | // Computes the function of the current and new value. Used in RetryUpdate. |
134 | | virtual int64_t Fn(int64_t current_value, int64_t new_value) = 0; |
135 | | |
136 | | // Handles cases of updates involving initialization, resizing, creating new Cells, and/or |
137 | | // contention. See above for further explanation. |
138 | | void RetryUpdate(int64_t x, Rehash to_rehash); |
139 | | |
140 | | // Sets base and all cells to the given value. |
141 | | void InternalReset(int64_t initialValue); |
142 | | |
143 | | // Base value, used mainly when there is no contention, but also as a fallback during |
144 | | // table initialization races. Updated via CAS. |
145 | | striped64::internal::Cell base_; |
146 | | |
147 | | // CAS lock used when resizing and/or creating cells. |
148 | | AtomicBool busy_; |
149 | | |
150 | | // Backing buffer for cells_, used for alignment. |
151 | | void* cell_buffer_; |
152 | | |
153 | | // Table of cells. When non-null, size is the nearest power of 2 >= NCPU. |
154 | | striped64::internal::Cell* cells_; |
155 | | int32_t num_cells_; |
156 | | }; |
157 | | |
158 | | // A 64-bit number optimized for high-volume concurrent updates. |
159 | | // See Striped64 for a longer explanation of the inner workings. |
160 | | class LongAdder : Striped64 { |
161 | | public: |
162 | 11.9M | LongAdder() {} |
163 | | void IncrementBy(int64_t x); |
164 | | void Increment() { IncrementBy(1); } |
165 | | void Decrement() { IncrementBy(-1); } |
166 | | |
167 | | // Returns the current value. |
168 | | // Note this is not an atomic snapshot in the presence of concurrent updates. |
169 | | int64_t Value() const; |
170 | | |
171 | | // Resets the counter state to zero. |
172 | | void Reset() { InternalReset(0); } |
173 | | |
174 | | private: |
175 | 152k | int64_t Fn(int64_t current_value, int64_t new_value) override { |
176 | 152k | return current_value + new_value; |
177 | 152k | } |
178 | | |
179 | | DISALLOW_COPY_AND_ASSIGN(LongAdder); |
180 | | }; |
181 | | |
182 | | } // namespace yb |
183 | | |
184 | | #endif // YB_UTIL_STRIPED64_H_ |