/Users/deen/code/yugabyte-db/src/yb/util/striped64.h
Line  | Count  | Source  | 
1  |  | // Licensed to the Apache Software Foundation (ASF) under one  | 
2  |  | // or more contributor license agreements.  See the NOTICE file  | 
3  |  | // distributed with this work for additional information  | 
4  |  | // regarding copyright ownership.  The ASF licenses this file  | 
5  |  | // to you under the Apache License, Version 2.0 (the  | 
6  |  | // "License"); you may not use this file except in compliance  | 
7  |  | // with the License.  You may obtain a copy of the License at  | 
8  |  | //  | 
9  |  | //   http://www.apache.org/licenses/LICENSE-2.0  | 
10  |  | //  | 
11  |  | // Unless required by applicable law or agreed to in writing,  | 
12  |  | // software distributed under the License is distributed on an  | 
13  |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY  | 
14  |  | // KIND, either express or implied.  See the License for the  | 
15  |  | // specific language governing permissions and limitations  | 
16  |  | // under the License.  | 
17  |  | //  | 
18  |  | // The following only applies to changes made to this file as part of YugaByte development.  | 
19  |  | //  | 
20  |  | // Portions Copyright (c) YugaByte, Inc.  | 
21  |  | //  | 
22  |  | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except  | 
23  |  | // in compliance with the License.  You may obtain a copy of the License at  | 
24  |  | //  | 
25  |  | // http://www.apache.org/licenses/LICENSE-2.0  | 
26  |  | //  | 
27  |  | // Unless required by applicable law or agreed to in writing, software distributed under the License  | 
28  |  | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express  | 
29  |  | // or implied.  See the License for the specific language governing permissions and limitations  | 
30  |  | // under the License.  | 
31  |  | //  | 
32  |  |  | 
33  |  | #ifndef YB_UTIL_STRIPED64_H_  | 
34  |  | #define YB_UTIL_STRIPED64_H_  | 
35  |  |  | 
36  |  | #include "yb/gutil/port.h"  | 
37  |  | #include "yb/util/atomic.h"  | 
38  |  | #include "yb/util/threadlocal.h"  | 
39  |  |  | 
40  |  | namespace yb { | 
41  |  |  | 
42  |  | class Striped64;  | 
43  |  |  | 
44  |  | namespace striped64 { | 
45  |  | namespace internal { | 
46  |  |  | 
47  |  | struct HashCode { | 
48  |  |  public:  | 
49  |  |   HashCode();  | 
50  |  |   uint64_t code_;  | 
51  |  | };  | 
52  |  |  | 
53  |  | #define ATOMIC_INT_SIZE sizeof(AtomicInt<int64_t>)  | 
54  |  | // Padded POD container for AtomicInt. This prevents false sharing of cache lines.  | 
55  |  | class Cell { | 
56  |  |  public:  | 
57  |  |   Cell();  | 
58  |  |  | 
59  |  |   Cell(const Cell&) = delete;  | 
60  |  |   void operator=(const Cell&) = delete;  | 
61  |  |  | 
62  | 999M  |   inline bool CompareAndSet(int64_t cmp, int64_t value) { | 
63  | 999M  |     return value_.CompareAndSet(cmp, value);  | 
64  | 999M  |   }  | 
65  |  |  | 
66  |  |   // Padding advice from Herb Sutter:  | 
67  |  |   // http://www.drdobbs.com/parallel/eliminate-false-sharing/217500206?pgno=4  | 
68  |  |   AtomicInt<int64_t> value_;  | 
69  |  |   char pad[CACHELINE_SIZE > ATOMIC_INT_SIZE ?  | 
70  |  |            CACHELINE_SIZE - ATOMIC_INT_SIZE : 1];  | 
71  |  | } CACHELINE_ALIGNED;  | 
72  |  | #undef ATOMIC_INT_SIZE  | 
73  |  |  | 
74  |  | } // namespace internal  | 
75  |  | } // namespace striped64  | 
76  |  |  | 
77  |  | // This set of classes is heavily derived from JSR166e, released into the public domain  | 
78  |  | // by Doug Lea and the other authors.  | 
79  |  | //  | 
80  |  | // See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/Striped64.java?view=co  | 
81  |  | // See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/LongAdder.java?view=co  | 
82  |  | //  | 
83  |  | // The Striped64 and LongAdder implementations here are simplified versions of what's present in  | 
84  |  | // JSR166e. However, the core ideas remain the same.  | 
85  |  | //  | 
86  |  | // Updating a single AtomicInteger in a multi-threaded environment can be quite slow:  | 
87  |  | //  | 
88  |  | //   1. False sharing of cache lines with other counters.  | 
89  |  | //   2. Cache line bouncing from high update rates, especially with many cores.  | 
90  |  | //  | 
91  |  | // These two problems are addressed by Striped64. When there is no contention, it uses CAS on a  | 
92  |  | // single base counter to store updates. However, when Striped64 detects contention  | 
93  |  | // (via a failed CAS operation), it will allocate a small, fixed size hashtable of Cells.  | 
94  |  | // A Cell is a simple POD that pads out an AtomicInt to 64 bytes to prevent  | 
95  |  | // sharing a cache line.  | 
96  |  | //  | 
97  |  | // Reading the value of a Striped64 requires traversing the hashtable to calculate the true sum.  | 
98  |  | //  | 
99  |  | // Each updating thread uses a thread-local hashcode to determine its Cell in the hashtable.  | 
100  |  | // If a thread fails to CAS its hashed Cell, it will do a lightweight rehash operation to try  | 
101  |  | // and find an uncontended bucket. Because the hashcode is thread-local, this rehash affects all  | 
102  |  | // Striped64's accessed by the thread. This is good, since contention on one Striped64 is  | 
103  |  | // indicative of contention elsewhere too.  | 
104  |  | //  | 
105  |  | // The hashtable is statically sized to the nearest power of 2 greater than or equal to the  | 
106  |  | // number of CPUs. This is sufficient, since this guarantees the existence of a perfect hash  | 
107  |  | // function. Due to the random rehashing, the threads should eventually converge to this function.  | 
108  |  | // In practice, this scheme has shown to be sufficient.  | 
109  |  | //  | 
110  |  | // The biggest simplification of this implementation compared to JSR166e is that we do not  | 
111  |  | // dynamically grow the table, instead immediately allocating it to the full size.  | 
112  |  | // We also do not lazily allocate each Cell, instead allocating the entire array at once.  | 
113  |  | // This means we waste some additional memory in low contention scenarios, and initial allocation  | 
114  |  | // will also be slower. Some of the micro-optimizations were also elided for readability.  | 
115  |  | class Striped64 { | 
116  |  |  public:  | 
117  |  |   Striped64();  | 
118  |  |   virtual ~Striped64();  | 
119  |  |  | 
120  |  |  protected:  | 
121  |  |  | 
122  |  |   enum Rehash { | 
123  |  |     kRehash,  | 
124  |  |     kNoRehash  | 
125  |  |   };  | 
126  |  |  | 
127  |  |   // CAS the base field.  | 
128  | 3.18k  |   bool CasBase(int64_t cmp, int64_t val) { return base_.CompareAndSet(cmp, val); } | 
129  |  |  | 
130  |  |   // CAS the busy field from 0 to 1 to acquire the lock.  | 
131  | 119k  |   bool CasBusy() { return busy_.CompareAndSet(0, 1); } | 
132  |  |  | 
133  |  |   // Computes the function of the current and new value. Used in RetryUpdate.  | 
134  |  |   virtual int64_t Fn(int64_t current_value, int64_t new_value) = 0;  | 
135  |  |  | 
136  |  |   // Handles cases of updates involving initialization, resizing, creating new Cells, and/or  | 
137  |  |   // contention. See above for further explanation.  | 
138  |  |   void RetryUpdate(int64_t x, Rehash to_rehash);  | 
139  |  |  | 
140  |  |   // Sets base and all cells to the given value.  | 
141  |  |   void InternalReset(int64_t initialValue);  | 
142  |  |  | 
143  |  |   // Base value, used mainly when there is no contention, but also as a fallback during  | 
144  |  |   // table initialization races. Updated via CAS.  | 
145  |  |   striped64::internal::Cell base_;  | 
146  |  |  | 
147  |  |   // CAS lock used when resizing and/or creating cells.  | 
148  |  |   AtomicBool busy_;  | 
149  |  |  | 
150  |  |   // Backing buffer for cells_, used for alignment.  | 
151  |  |   void* cell_buffer_;  | 
152  |  |  | 
153  |  |   // Table of cells. When non-null, size is the nearest power of 2 >= NCPU.  | 
154  |  |   striped64::internal::Cell* cells_;  | 
155  |  |   int32_t num_cells_;  | 
156  |  | };  | 
157  |  |  | 
158  |  | // A 64-bit number optimized for high-volume concurrent updates.  | 
159  |  | // See Striped64 for a longer explanation of the inner workings.  | 
160  |  | class LongAdder : Striped64 { | 
161  |  |  public:  | 
162  | 11.9M  |   LongAdder() {} | 
163  |  |   void IncrementBy(int64_t x);  | 
164  |  |   void Increment() { IncrementBy(1); } | 
165  |  |   void Decrement() { IncrementBy(-1); } | 
166  |  |  | 
167  |  |   // Returns the current value.  | 
168  |  |   // Note this is not an atomic snapshot in the presence of concurrent updates.  | 
169  |  |   int64_t Value() const;  | 
170  |  |  | 
171  |  |   // Resets the counter state to zero.  | 
172  |  |   void Reset() { InternalReset(0); } | 
173  |  |  | 
174  |  |  private:  | 
175  | 152k  |   int64_t Fn(int64_t current_value, int64_t new_value) override { | 
176  | 152k  |     return current_value + new_value;  | 
177  | 152k  |   }  | 
178  |  |  | 
179  |  |   DISALLOW_COPY_AND_ASSIGN(LongAdder);  | 
180  |  | };  | 
181  |  |  | 
182  |  | } // namespace yb  | 
183  |  |  | 
184  |  | #endif // YB_UTIL_STRIPED64_H_  |