/Users/deen/code/yugabyte-db/src/yb/util/random.h

Source (jump to first uncovered line)
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// The following only applies to changes made to this file as part of YugaByte development.
//
// Portions Copyright (c) YugaByte, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License.  You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied.  See the License for the specific language governing permissions and limitations
// under the License.
//

#ifndef YB_UTIL_RANDOM_H_
#define YB_UTIL_RANDOM_H_

#include <stdint.h>

#include <cmath>
#include <mutex>
#include <vector>

#include "yb/gutil/map-util.h" // For ContainsKey
#include "yb/util/locks.h"

namespace yb {

namespace random_internal {

static const uint32_t M = 2147483647L;   // 2^31-1
const double kTwoPi = 6.283185307179586476925286;

} // namespace random_internal

// A very simple random number generator.  Not especially good at
// generating truly random bits, but good enough for our needs in this
// package. This implementation is not thread-safe.
class Random {
 private:
  uint32_t seed_;
 public:
  explicit Random(uint32_t s) {
    Reset(s);
  }

  // Reset the RNG to the given seed value.
  void Reset(uint32_t s) {
    seed_ = s & 0x7fffffffu;
    // Avoid bad seeds.
    if (seed_ == 0 || seed_ == random_internal::M3.13M) {
      seed_ = 1;
    }
  }

  // Next pseudo-random 32-bit unsigned integer.
  // FIXME: This currently only generates 31 bits of randomness.
  // The MSB will always be zero.
  uint32_t Next() {
    static const uint64_t A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
    // We are computing
    //       seed_ = (seed_ * A) % M,    where M = 2^31-1
    //
    // seed_ must not be zero or M, or else all subsequent computed values
    // will be zero or M respectively.  For all other values, seed_ will end
    // up cycling through every number in [1,M-1]
    uint64_t product = seed_ * A;

    // Compute (product % M) using the fact that ((x << 31) % M) == x.
    seed_ = static_cast<uint32_t>((product >> 31) + (product & random_internal::M));
    // The first reduction may overflow by 1 bit, so we may need to
    // repeat.  mod == M is not possible; using > allows the faster
    // sign-bit-based test.
    if (seed_ > random_internal::M) {
      seed_ -= random_internal::M;
    }
    return seed_;
  }

  // Alias for consistency with Next64
  uint32_t Next32() { return Next(); }

  // Next pseudo-random 64-bit unsigned integer.
  // FIXME: This currently only generates 62 bits of randomness due to Next()
  // only giving 31 bits of randomness. The 2 most significant bits will always
  // be zero.
  uint64_t Next64() {
    uint64_t large = Next();
    // Only shift by 31 bits so we end up with zeros in MSB and not scattered
    // throughout the 64-bit word. This is due to the weakness in Next() noted
    // above.
    large <<= 31;
    large |= Next();
    return large;
  }

  // Returns a uniformly distributed value in the range [0..n-1]
  // REQUIRES: n > 0
  uint32_t Uniform(uint32_t n) { return Next() % n; }

  // Alias for consistency with Uniform64
  uint32_t Uniform32(uint32_t n) { return Uniform(n); }

  // Returns a uniformly distributed 64-bit value in the range [0..n-1]
  // REQUIRES: n > 0
  uint64_t Uniform64(uint64_t n) { return Next64() % n; }

  // Randomly returns true ~"1/n" of the time, and false otherwise.
  // REQUIRES: n > 0
  bool OneIn(int n) { return (Next() % n) == 0; }

  // Skewed: pick "base" uniformly from range [0,max_log] and then
  // return "base" random bits.  The effect is to pick a number in the
  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
  uint32_t Skewed(int max_log) {
    return Uniform(1 << Uniform(max_log + 1));
  }

  // Creates a normal distribution variable using the
  // Box-Muller transform. See:
  // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
  // Adapted from WebRTC source code at:
  // webrtc/trunk/modules/video_coding/main/test/test_util.cc
  double Normal(double mean, double std_dev) {
    double uniform1 = (Next() + 1.0) / (random_internal::M + 1.0);
    double uniform2 = (Next() + 1.0) / (random_internal::M + 1.0);
    return (mean + std_dev * sqrt(-2 * ::log(uniform1)) * cos(random_internal::kTwoPi * uniform2));
  }

  // Return a random number between 0.0 and 1.0 inclusive.
  double NextDoubleFraction() {
    return Next() / static_cast<double>(random_internal::M + 1.0);
  }

  // Sample 'k' random elements from the collection 'c' into 'result', taking care not to sample any
  // elements that are already present in 'avoid'.
  //
  // In the case that 'c' has fewer than 'k' elements then all elements in 'c' will be selected.
  //
  // 'c' should be an iterable STL collection such as a vector, set, or list.
  // 'avoid' should be an STL-compatible set.
  //
  // The results are not stored in a randomized order: the order of results will
  // match their order in the input collection.
  template<class Collection, class Set, class T>
  void ReservoirSample(const Collection& c, size_t k, const Set& avoid,
                       std::vector<T>* result) {
    result->clear();
    result->reserve(k);
    int i = 0;
    for (const T& elem : c) {
      if (ContainsKey(avoid, elem)) {
        continue;
      }
      i++;
      // Fill the reservoir if there is available space.
      if (result->size() < k) {
        result->push_back(elem);
        continue;
      }
      // Otherwise replace existing elements with decreasing probability.
      auto j = Uniform(i);
      if (j < k) {
        (*result)[j] = elem;
      }
    }
  }
};

// Thread-safe wrapper around Random.
class ThreadSafeRandom {
 public:
  explicit ThreadSafeRandom(uint32_t s)
      : random_(s) {
  }

  void Reset(uint32_t s) {
    std::lock_guard<simple_spinlock> l(lock_);
    random_.Reset(s);
  }

  uint32_t Next() {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.Next();
  }

  uint32_t Next32() {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.Next32();
  }

  uint64_t Next64() {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.Next64();
  }

  uint32_t Uniform(uint32_t n) {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.Uniform(n);
  }

  uint32_t Uniform32(uint32_t n) {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.Uniform32(n);
  }

  uint64_t Uniform64(uint64_t n) {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.Uniform64(n);
  }

  bool OneIn(int n) {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.OneIn(n);
  }

  uint32_t Skewed(int max_log) {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.Skewed(max_log);
  }

  double Normal(double mean, double std_dev) {
    std::lock_guard<simple_spinlock> l(lock_);
    return random_.Normal(mean, std_dev);
  }

  template<class Collection, class Set, class T>
  void ReservoirSample(const Collection& c, int k, const Set& avoid,
                       std::vector<T>* result) {
    std::lock_guard<simple_spinlock> l(lock_);
    random_.ReservoirSample(c, k, avoid, result);
  }

 private:
  simple_spinlock lock_;
  Random random_;
};



}  // namespace yb

#endif // YB_UTIL_RANDOM_H_

YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2		// Use of this source code is governed by a BSD-style license that can be
3		// found in the LICENSE file. See the AUTHORS file for names of contributors.
4		//
5		// The following only applies to changes made to this file as part of YugaByte development.
6		//
7		// Portions Copyright (c) YugaByte, Inc.
8		//
9		// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
10		// in compliance with the License. You may obtain a copy of the License at
11		//
12		// http://www.apache.org/licenses/LICENSE-2.0
13		//
14		// Unless required by applicable law or agreed to in writing, software distributed under the License
15		// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
16		// or implied. See the License for the specific language governing permissions and limitations
17		// under the License.
18		//
19
20		#ifndef YB_UTIL_RANDOM_H_
21		#define YB_UTIL_RANDOM_H_
22
23		#include <stdint.h>
24
25		#include <cmath>
26		#include <mutex>
27		#include <vector>
28
29		#include "yb/gutil/map-util.h" // For ContainsKey
30		#include "yb/util/locks.h"
31
32		namespace yb {
33
34		namespace random_internal {
35
36		static const uint32_t M = 2147483647L; // 2^31-1
37		const double kTwoPi = 6.283185307179586476925286;
38
39		} // namespace random_internal
40
41		// A very simple random number generator. Not especially good at
42		// generating truly random bits, but good enough for our needs in this
43		// package. This implementation is not thread-safe.
44		class Random {
45		private:
46		uint32_t seed_;
47		public:
48	3.13M	explicit Random(uint32_t s) {
49	3.13M	Reset(s);
50	3.13M	}
51
52		// Reset the RNG to the given seed value.
53	3.13M	void Reset(uint32_t s) {
54	3.13M	seed_ = s & 0x7fffffffu;
55		// Avoid bad seeds.
56	3.13M	if (seed_ == 0 \|\| seed_ == random_internal::M3.13M ) {
57	14	seed_ = 1;
58	14	}
59	3.13M	}
60
61		// Next pseudo-random 32-bit unsigned integer.
62		// FIXME: This currently only generates 31 bits of randomness.
63		// The MSB will always be zero.
64	225M	uint32_t Next() {
65	225M	static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0
66		// We are computing
67		// seed_ = (seed_ * A) % M, where M = 2^31-1
68		//
69		// seed_ must not be zero or M, or else all subsequent computed values
70		// will be zero or M respectively. For all other values, seed_ will end
71		// up cycling through every number in [1,M-1]
72	225M	uint64_t product = seed_ * A;
73
74		// Compute (product % M) using the fact that ((x << 31) % M) == x.
75	225M	seed_ = static_cast<uint32_t>((product >> 31) + (product & random_internal::M));
76		// The first reduction may overflow by 1 bit, so we may need to
77		// repeat. mod == M is not possible; using > allows the faster
78		// sign-bit-based test.
79	225M	if (seed_ > random_internal::M) {
80	841	seed_ -= random_internal::M;
81	841	}
82	225M	return seed_;
83	225M	}
84
85		// Alias for consistency with Next64
86		uint32_t Next32() { return Next(); }
87
88		// Next pseudo-random 64-bit unsigned integer.
89		// FIXME: This currently only generates 62 bits of randomness due to Next()
90		// only giving 31 bits of randomness. The 2 most significant bits will always
91		// be zero.
92	10.1M	uint64_t Next64() {
93	10.1M	uint64_t large = Next();
94		// Only shift by 31 bits so we end up with zeros in MSB and not scattered
95		// throughout the 64-bit word. This is due to the weakness in Next() noted
96		// above.
97	10.1M	large <<= 31;
98	10.1M	large \|= Next();
99	10.1M	return large;
100	10.1M	}
101
102		// Returns a uniformly distributed value in the range [0..n-1]
103		// REQUIRES: n > 0
104	6.67M	uint32_t Uniform(uint32_t n) { return Next() % n; }
105
106		// Alias for consistency with Uniform64
107	0	uint32_t Uniform32(uint32_t n) { return Uniform(n); }
108
109		// Returns a uniformly distributed 64-bit value in the range [0..n-1]
110		// REQUIRES: n > 0
111		uint64_t Uniform64(uint64_t n) { return Next64() % n; }
112
113		// Randomly returns true ~"1/n" of the time, and false otherwise.
114		// REQUIRES: n > 0
115	0	bool OneIn(int n) { return (Next() % n) == 0; }
116
117		// Skewed: pick "base" uniformly from range [0,max_log] and then
118		// return "base" random bits. The effect is to pick a number in the
119		// range [0,2^max_log-1] with exponential bias towards smaller numbers.
120	0	uint32_t Skewed(int max_log) {
121	0	return Uniform(1 << Uniform(max_log + 1));
122	0	}
123
124		// Creates a normal distribution variable using the
125		// Box-Muller transform. See:
126		// http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
127		// Adapted from WebRTC source code at:
128		// webrtc/trunk/modules/video_coding/main/test/test_util.cc
129	204k	double Normal(double mean, double std_dev) {
130	204k	double uniform1 = (Next() + 1.0) / (random_internal::M + 1.0);
131	204k	double uniform2 = (Next() + 1.0) / (random_internal::M + 1.0);
132	204k	return (mean + std_dev * sqrt(-2 * ::log(uniform1)) * cos(random_internal::kTwoPi * uniform2));
133	204k	}
134
135		// Return a random number between 0.0 and 1.0 inclusive.
136	29.3M	double NextDoubleFraction() {
137	29.3M	return Next() / static_cast<double>(random_internal::M + 1.0);
138	29.3M	}
139
140		// Sample 'k' random elements from the collection 'c' into 'result', taking care not to sample any
141		// elements that are already present in 'avoid'.
142		//
143		// In the case that 'c' has fewer than 'k' elements then all elements in 'c' will be selected.
144		//
145		// 'c' should be an iterable STL collection such as a vector, set, or list.
146		// 'avoid' should be an STL-compatible set.
147		//
148		// The results are not stored in a randomized order: the order of results will
149		// match their order in the input collection.
150		template<class Collection, class Set, class T>
151		void ReservoirSample(const Collection& c, size_t k, const Set& avoid,
152		std::vector<T>* result) {
153		result->clear();
154		result->reserve(k);
155		int i = 0;
156		for (const T& elem : c) {
157		if (ContainsKey(avoid, elem)) {
158		continue;
159		}
160		i++;
161		// Fill the reservoir if there is available space.
162		if (result->size() < k) {
163		result->push_back(elem);
164		continue;
165		}
166		// Otherwise replace existing elements with decreasing probability.
167		auto j = Uniform(i);
168		if (j < k) {
169		(*result)[j] = elem;
170		}
171		}
172		}
173		};
174
175		// Thread-safe wrapper around Random.
176		class ThreadSafeRandom {
177		public:
178		explicit ThreadSafeRandom(uint32_t s)
179	2.60M	: random_(s) {
180	2.60M	}
181
182	0	void Reset(uint32_t s) {
183	0	std::lock_guard<simple_spinlock> l(lock_);
184	0	random_.Reset(s);
185	0	}
186
187	0	uint32_t Next() {
188	0	std::lock_guard<simple_spinlock> l(lock_);
189	0	return random_.Next();
190	0	}
191
192	0	uint32_t Next32() {
193	0	std::lock_guard<simple_spinlock> l(lock_);
194	0	return random_.Next32();
195	0	}
196
197	0	uint64_t Next64() {
198	0	std::lock_guard<simple_spinlock> l(lock_);
199	0	return random_.Next64();
200	0	}
201
202	0	uint32_t Uniform(uint32_t n) {
203	0	std::lock_guard<simple_spinlock> l(lock_);
204	0	return random_.Uniform(n);
205	0	}
206
207	0	uint32_t Uniform32(uint32_t n) {
208	0	std::lock_guard<simple_spinlock> l(lock_);
209	0	return random_.Uniform32(n);
210	0	}
211
212	0	uint64_t Uniform64(uint64_t n) {
213	0	std::lock_guard<simple_spinlock> l(lock_);
214	0	return random_.Uniform64(n);
215	0	}
216
217	0	bool OneIn(int n) {
218	0	std::lock_guard<simple_spinlock> l(lock_);
219	0	return random_.OneIn(n);
220	0	}
221
222	0	uint32_t Skewed(int max_log) {
223	0	std::lock_guard<simple_spinlock> l(lock_);
224	0	return random_.Skewed(max_log);
225	0	}
226
227	2.01k	double Normal(double mean, double std_dev) {
228	2.01k	std::lock_guard<simple_spinlock> l(lock_);
229	2.01k	return random_.Normal(mean, std_dev);
230	2.01k	}
231
232		template<class Collection, class Set, class T>
233		void ReservoirSample(const Collection& c, int k, const Set& avoid,
234		std::vector<T>* result) {
235		std::lock_guard<simple_spinlock> l(lock_);
236		random_.ReservoirSample(c, k, avoid, result);
237		}
238
239		private:
240		simple_spinlock lock_;
241		Random random_;
242		};
243
244
245
246		} // namespace yb
247
248		#endif // YB_UTIL_RANDOM_H_