/Users/deen/code/yugabyte-db/src/yb/util/random.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | // |
5 | | // The following only applies to changes made to this file as part of YugaByte development. |
6 | | // |
7 | | // Portions Copyright (c) YugaByte, Inc. |
8 | | // |
9 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
10 | | // in compliance with the License. You may obtain a copy of the License at |
11 | | // |
12 | | // http://www.apache.org/licenses/LICENSE-2.0 |
13 | | // |
14 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
15 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
16 | | // or implied. See the License for the specific language governing permissions and limitations |
17 | | // under the License. |
18 | | // |
19 | | |
20 | | #ifndef YB_UTIL_RANDOM_H_ |
21 | | #define YB_UTIL_RANDOM_H_ |
22 | | |
23 | | #include <stdint.h> |
24 | | |
25 | | #include <cmath> |
26 | | #include <mutex> |
27 | | #include <vector> |
28 | | |
29 | | #include "yb/gutil/map-util.h" // For ContainsKey |
30 | | #include "yb/util/locks.h" |
31 | | |
32 | | namespace yb { |
33 | | |
34 | | namespace random_internal { |
35 | | |
36 | | static const uint32_t M = 2147483647L; // 2^31-1 |
37 | | const double kTwoPi = 6.283185307179586476925286; |
38 | | |
39 | | } // namespace random_internal |
40 | | |
41 | | // A very simple random number generator. Not especially good at |
42 | | // generating truly random bits, but good enough for our needs in this |
43 | | // package. This implementation is not thread-safe. |
44 | | class Random { |
45 | | private: |
46 | | uint32_t seed_; |
47 | | public: |
48 | 3.13M | explicit Random(uint32_t s) { |
49 | 3.13M | Reset(s); |
50 | 3.13M | } |
51 | | |
52 | | // Reset the RNG to the given seed value. |
53 | 3.13M | void Reset(uint32_t s) { |
54 | 3.13M | seed_ = s & 0x7fffffffu; |
55 | | // Avoid bad seeds. |
56 | 3.13M | if (seed_ == 0 || seed_ == random_internal::M3.13M ) { |
57 | 14 | seed_ = 1; |
58 | 14 | } |
59 | 3.13M | } |
60 | | |
61 | | // Next pseudo-random 32-bit unsigned integer. |
62 | | // FIXME: This currently only generates 31 bits of randomness. |
63 | | // The MSB will always be zero. |
64 | 225M | uint32_t Next() { |
65 | 225M | static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 |
66 | | // We are computing |
67 | | // seed_ = (seed_ * A) % M, where M = 2^31-1 |
68 | | // |
69 | | // seed_ must not be zero or M, or else all subsequent computed values |
70 | | // will be zero or M respectively. For all other values, seed_ will end |
71 | | // up cycling through every number in [1,M-1] |
72 | 225M | uint64_t product = seed_ * A; |
73 | | |
74 | | // Compute (product % M) using the fact that ((x << 31) % M) == x. |
75 | 225M | seed_ = static_cast<uint32_t>((product >> 31) + (product & random_internal::M)); |
76 | | // The first reduction may overflow by 1 bit, so we may need to |
77 | | // repeat. mod == M is not possible; using > allows the faster |
78 | | // sign-bit-based test. |
79 | 225M | if (seed_ > random_internal::M) { |
80 | 841 | seed_ -= random_internal::M; |
81 | 841 | } |
82 | 225M | return seed_; |
83 | 225M | } |
84 | | |
85 | | // Alias for consistency with Next64 |
86 | | uint32_t Next32() { return Next(); } |
87 | | |
88 | | // Next pseudo-random 64-bit unsigned integer. |
89 | | // FIXME: This currently only generates 62 bits of randomness due to Next() |
90 | | // only giving 31 bits of randomness. The 2 most significant bits will always |
91 | | // be zero. |
92 | 10.1M | uint64_t Next64() { |
93 | 10.1M | uint64_t large = Next(); |
94 | | // Only shift by 31 bits so we end up with zeros in MSB and not scattered |
95 | | // throughout the 64-bit word. This is due to the weakness in Next() noted |
96 | | // above. |
97 | 10.1M | large <<= 31; |
98 | 10.1M | large |= Next(); |
99 | 10.1M | return large; |
100 | 10.1M | } |
101 | | |
102 | | // Returns a uniformly distributed value in the range [0..n-1] |
103 | | // REQUIRES: n > 0 |
104 | 6.67M | uint32_t Uniform(uint32_t n) { return Next() % n; } |
105 | | |
106 | | // Alias for consistency with Uniform64 |
107 | 0 | uint32_t Uniform32(uint32_t n) { return Uniform(n); } |
108 | | |
109 | | // Returns a uniformly distributed 64-bit value in the range [0..n-1] |
110 | | // REQUIRES: n > 0 |
111 | | uint64_t Uniform64(uint64_t n) { return Next64() % n; } |
112 | | |
113 | | // Randomly returns true ~"1/n" of the time, and false otherwise. |
114 | | // REQUIRES: n > 0 |
115 | 0 | bool OneIn(int n) { return (Next() % n) == 0; } |
116 | | |
117 | | // Skewed: pick "base" uniformly from range [0,max_log] and then |
118 | | // return "base" random bits. The effect is to pick a number in the |
119 | | // range [0,2^max_log-1] with exponential bias towards smaller numbers. |
120 | 0 | uint32_t Skewed(int max_log) { |
121 | 0 | return Uniform(1 << Uniform(max_log + 1)); |
122 | 0 | } |
123 | | |
124 | | // Creates a normal distribution variable using the |
125 | | // Box-Muller transform. See: |
126 | | // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform |
127 | | // Adapted from WebRTC source code at: |
128 | | // webrtc/trunk/modules/video_coding/main/test/test_util.cc |
129 | 204k | double Normal(double mean, double std_dev) { |
130 | 204k | double uniform1 = (Next() + 1.0) / (random_internal::M + 1.0); |
131 | 204k | double uniform2 = (Next() + 1.0) / (random_internal::M + 1.0); |
132 | 204k | return (mean + std_dev * sqrt(-2 * ::log(uniform1)) * cos(random_internal::kTwoPi * uniform2)); |
133 | 204k | } |
134 | | |
135 | | // Return a random number between 0.0 and 1.0 inclusive. |
136 | 29.3M | double NextDoubleFraction() { |
137 | 29.3M | return Next() / static_cast<double>(random_internal::M + 1.0); |
138 | 29.3M | } |
139 | | |
140 | | // Sample 'k' random elements from the collection 'c' into 'result', taking care not to sample any |
141 | | // elements that are already present in 'avoid'. |
142 | | // |
143 | | // In the case that 'c' has fewer than 'k' elements then all elements in 'c' will be selected. |
144 | | // |
145 | | // 'c' should be an iterable STL collection such as a vector, set, or list. |
146 | | // 'avoid' should be an STL-compatible set. |
147 | | // |
148 | | // The results are not stored in a randomized order: the order of results will |
149 | | // match their order in the input collection. |
150 | | template<class Collection, class Set, class T> |
151 | | void ReservoirSample(const Collection& c, size_t k, const Set& avoid, |
152 | | std::vector<T>* result) { |
153 | | result->clear(); |
154 | | result->reserve(k); |
155 | | int i = 0; |
156 | | for (const T& elem : c) { |
157 | | if (ContainsKey(avoid, elem)) { |
158 | | continue; |
159 | | } |
160 | | i++; |
161 | | // Fill the reservoir if there is available space. |
162 | | if (result->size() < k) { |
163 | | result->push_back(elem); |
164 | | continue; |
165 | | } |
166 | | // Otherwise replace existing elements with decreasing probability. |
167 | | auto j = Uniform(i); |
168 | | if (j < k) { |
169 | | (*result)[j] = elem; |
170 | | } |
171 | | } |
172 | | } |
173 | | }; |
174 | | |
175 | | // Thread-safe wrapper around Random. |
176 | | class ThreadSafeRandom { |
177 | | public: |
178 | | explicit ThreadSafeRandom(uint32_t s) |
179 | 2.60M | : random_(s) { |
180 | 2.60M | } |
181 | | |
182 | 0 | void Reset(uint32_t s) { |
183 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
184 | 0 | random_.Reset(s); |
185 | 0 | } |
186 | | |
187 | 0 | uint32_t Next() { |
188 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
189 | 0 | return random_.Next(); |
190 | 0 | } |
191 | | |
192 | 0 | uint32_t Next32() { |
193 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
194 | 0 | return random_.Next32(); |
195 | 0 | } |
196 | | |
197 | 0 | uint64_t Next64() { |
198 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
199 | 0 | return random_.Next64(); |
200 | 0 | } |
201 | | |
202 | 0 | uint32_t Uniform(uint32_t n) { |
203 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
204 | 0 | return random_.Uniform(n); |
205 | 0 | } |
206 | | |
207 | 0 | uint32_t Uniform32(uint32_t n) { |
208 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
209 | 0 | return random_.Uniform32(n); |
210 | 0 | } |
211 | | |
212 | 0 | uint64_t Uniform64(uint64_t n) { |
213 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
214 | 0 | return random_.Uniform64(n); |
215 | 0 | } |
216 | | |
217 | 0 | bool OneIn(int n) { |
218 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
219 | 0 | return random_.OneIn(n); |
220 | 0 | } |
221 | | |
222 | 0 | uint32_t Skewed(int max_log) { |
223 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
224 | 0 | return random_.Skewed(max_log); |
225 | 0 | } |
226 | | |
227 | 2.01k | double Normal(double mean, double std_dev) { |
228 | 2.01k | std::lock_guard<simple_spinlock> l(lock_); |
229 | 2.01k | return random_.Normal(mean, std_dev); |
230 | 2.01k | } |
231 | | |
232 | | template<class Collection, class Set, class T> |
233 | | void ReservoirSample(const Collection& c, int k, const Set& avoid, |
234 | | std::vector<T>* result) { |
235 | | std::lock_guard<simple_spinlock> l(lock_); |
236 | | random_.ReservoirSample(c, k, avoid, result); |
237 | | } |
238 | | |
239 | | private: |
240 | | simple_spinlock lock_; |
241 | | Random random_; |
242 | | }; |
243 | | |
244 | | |
245 | | |
246 | | } // namespace yb |
247 | | |
248 | | #endif // YB_UTIL_RANDOM_H_ |