/Users/deen/code/yugabyte-db/src/yb/util/random-test.cc

Source
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
//
// The following only applies to changes made to this file as part of YugaByte development.
//
// Portions Copyright (c) YugaByte, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License.  You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied.  See the License for the specific language governing permissions and limitations
// under the License.
//

#include <limits>
#include <unordered_set>

#include "yb/util/random.h"
#include "yb/util/test_util.h"

namespace yb {

class RandomTest : public YBTest {
 public:
  RandomTest()
      : rng_(SeedRandom()) {
  }

 protected:
  Random rng_;
};

// Tests that after a certain number of invocations of Normal(), the
// actual mean of all samples is within the specified standard
// deviation of the target mean.
TEST_F(RandomTest, TestNormalDist) {
  const double kMean = 5.0;
  const double kStdDev = 0.01;
  const int kNumIters = 100000;

  double sum = 0.0;
  for (int i = 0; i < kNumIters; ++i) {
    sum += rng_.Normal(kMean, kStdDev);
  }

  ASSERT_LE(fabs((sum / static_cast<double>(kNumIters)) - kMean), kStdDev);
}

// Tests that after a large number of invocations of Next32() and Next64(), we
// have flipped all the bits we claim we should have.
//
// This is a regression test for a bug where we were incorrectly bit-shifting
// in Next64().
//
// Note: Our RNG actually only generates 31 bits of randomness for 32 bit
// integers and 62 bits for 64 bit integers. So this test reflects that, and if
// we change the RNG algo this test should also change.
TEST_F(RandomTest, TestUseOfBits) {
  uint32_t ones32 = std::numeric_limits<uint32_t>::max();
  uint32_t zeroes32 = 0;
  uint64_t ones64 = std::numeric_limits<uint64_t>::max();
  uint64_t zeroes64 = 0;

  for (int i = 0; i < 10000000; i++) {
    uint32_t r32 = rng_.Next32();
    ones32 &= r32;
    zeroes32 |= r32;

    uint64_t r64 = rng_.Next64();
    ones64 &= r64;
    zeroes64 |= r64;
  }

  // At the end, we should have flipped 31 and 62 bits, respectively. One
  // detail of the current RNG impl is that Next32() always returns a number
  // with MSB set to 0, and Next64() always returns a number with the first
  // two bits set to zero.
  uint32_t expected_bits_31 = std::numeric_limits<uint32_t>::max() >> 1;
  uint64_t expected_bits_62 = std::numeric_limits<uint64_t>::max() >> 2;

  ASSERT_EQ(0, ones32);
  ASSERT_EQ(expected_bits_31, zeroes32);
  ASSERT_EQ(0, ones64);
  ASSERT_EQ(expected_bits_62, zeroes64);
}

TEST_F(RandomTest, TestResetSeed) {
  rng_.Reset(1);
  uint64_t first = rng_.Next64();
  rng_.Reset(1);
  uint64_t second = rng_.Next64();
  ASSERT_EQ(first, second);
}

TEST_F(RandomTest, TestReservoirSample) {
  // Use a constant seed to avoid flakiness.
  rng_.Reset(12345);

  vector<int> population;
  for (int i = 0; i < 100; i++) {
    population.push_back(i);
  }

  // Run 1000 trials selecting 5 elements.
  vector<int> results;
  vector<int> counts(population.size());
  std::unordered_set<int> avoid;
  for (int trial = 0; trial < 1000; trial++) {
    rng_.ReservoirSample(population, 5, avoid, &results);
    for (int result : results) {
      counts[result]++;
    }
  }

  // We expect each element to be selected
  // 50 times on average, but since it's random, it won't be exact.
  // However, since we use a constant seed, this test won't be flaky.
  for (int count : counts) {
    ASSERT_GE(count, 25);
    ASSERT_LE(count, 75);
  }

  // Run again, but avoid some particular entries.
  avoid.insert(3);
  avoid.insert(10);
  avoid.insert(20);
  counts.assign(100, 0);
  for (int trial = 0; trial < 1000; trial++) {
    rng_.ReservoirSample(population, 5, avoid, &results);
    for (int result : results) {
      counts[result]++;
    }
  }

  // Ensure that we didn't ever pick the avoided elements.
  ASSERT_EQ(0, counts[3]);
  ASSERT_EQ(0, counts[10]);
  ASSERT_EQ(0, counts[20]);
}

TEST_F(RandomTest, TestReservoirSamplePopulationTooSmall) {
  vector<int> population;
  for (int i = 0; i < 10; i++) {
    population.push_back(i);
  }

  vector<int> results;
  std::unordered_set<int> avoid;
  rng_.ReservoirSample(population, 20, avoid, &results);
  ASSERT_EQ(population.size(), results.size());
  ASSERT_EQ(population, results);

  rng_.ReservoirSample(population, 10, avoid, &results);
  ASSERT_EQ(population.size(), results.size());
  ASSERT_EQ(population, results);
}

} // namespace yb

Line	Count	Source
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17		//
18		// The following only applies to changes made to this file as part of YugaByte development.
19		//
20		// Portions Copyright (c) YugaByte, Inc.
21		//
22		// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23		// in compliance with the License. You may obtain a copy of the License at
24		//
25		// http://www.apache.org/licenses/LICENSE-2.0
26		//
27		// Unless required by applicable law or agreed to in writing, software distributed under the License
28		// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29		// or implied. See the License for the specific language governing permissions and limitations
30		// under the License.
31		//
32
33		#include <limits>
34		#include <unordered_set>
35
36		#include "yb/util/random.h"
37		#include "yb/util/test_util.h"
38
39		namespace yb {
40
41		class RandomTest : public YBTest {
42		public:
43		RandomTest()
44	5	: rng_(SeedRandom()) {
45	5	}
46
47		protected:
48		Random rng_;
49		};
50
51		// Tests that after a certain number of invocations of Normal(), the
52		// actual mean of all samples is within the specified standard
53		// deviation of the target mean.
54	1	TEST_F(RandomTest, TestNormalDist) {
55	1	const double kMean = 5.0;
56	1	const double kStdDev = 0.01;
57	1	const int kNumIters = 100000;
58
59	1	double sum = 0.0;
60	100k	for (int i = 0; i < kNumIters; ++i) {
61	100k	sum += rng_.Normal(kMean, kStdDev);
62	100k	}
63
64	1	ASSERT_LE(fabs((sum / static_cast<double>(kNumIters)) - kMean), kStdDev);
65	1	}
66
67		// Tests that after a large number of invocations of Next32() and Next64(), we
68		// have flipped all the bits we claim we should have.
69		//
70		// This is a regression test for a bug where we were incorrectly bit-shifting
71		// in Next64().
72		//
73		// Note: Our RNG actually only generates 31 bits of randomness for 32 bit
74		// integers and 62 bits for 64 bit integers. So this test reflects that, and if
75		// we change the RNG algo this test should also change.
76	1	TEST_F(RandomTest, TestUseOfBits) {
77	1	uint32_t ones32 = std::numeric_limits<uint32_t>::max();
78	1	uint32_t zeroes32 = 0;
79	1	uint64_t ones64 = std::numeric_limits<uint64_t>::max();
80	1	uint64_t zeroes64 = 0;
81
82	10.0M	for (int i = 0; i < 10000000; i++) {
83	10.0M	uint32_t r32 = rng_.Next32();
84	10.0M	ones32 &= r32;
85	10.0M	zeroes32 \|= r32;
86
87	10.0M	uint64_t r64 = rng_.Next64();
88	10.0M	ones64 &= r64;
89	10.0M	zeroes64 \|= r64;
90	10.0M	}
91
92		// At the end, we should have flipped 31 and 62 bits, respectively. One
93		// detail of the current RNG impl is that Next32() always returns a number
94		// with MSB set to 0, and Next64() always returns a number with the first
95		// two bits set to zero.
96	1	uint32_t expected_bits_31 = std::numeric_limits<uint32_t>::max() >> 1;
97	1	uint64_t expected_bits_62 = std::numeric_limits<uint64_t>::max() >> 2;
98
99	1	ASSERT_EQ(0, ones32);
100	1	ASSERT_EQ(expected_bits_31, zeroes32);
101	1	ASSERT_EQ(0, ones64);
102	1	ASSERT_EQ(expected_bits_62, zeroes64);
103	1	}
104
105	1	TEST_F(RandomTest, TestResetSeed) {
106	1	rng_.Reset(1);
107	1	uint64_t first = rng_.Next64();
108	1	rng_.Reset(1);
109	1	uint64_t second = rng_.Next64();
110	1	ASSERT_EQ(first, second);
111	1	}
112
113	1	TEST_F(RandomTest, TestReservoirSample) {
114		// Use a constant seed to avoid flakiness.
115	1	rng_.Reset(12345);
116
117	1	vector<int> population;
118	101	for (int i = 0; i < 100; i++) {
119	100	population.push_back(i);
120	100	}
121
122		// Run 1000 trials selecting 5 elements.
123	1	vector<int> results;
124	1	vector<int> counts(population.size());
125	1	std::unordered_set<int> avoid;
126	1.00k	for (int trial = 0; trial < 1000; trial++) {
127	1.00k	rng_.ReservoirSample(population, 5, avoid, &results);
128	5.00k	for (int result : results) {
129	5.00k	counts[result]++;
130	5.00k	}
131	1.00k	}
132
133		// We expect each element to be selected
134		// 50 times on average, but since it's random, it won't be exact.
135		// However, since we use a constant seed, this test won't be flaky.
136	100	for (int count : counts) {
137	100	ASSERT_GE(count, 25);
138	100	ASSERT_LE(count, 75);
139	100	}
140
141		// Run again, but avoid some particular entries.
142	1	avoid.insert(3);
143	1	avoid.insert(10);
144	1	avoid.insert(20);
145	1	counts.assign(100, 0);
146	1.00k	for (int trial = 0; trial < 1000; trial++) {
147	1.00k	rng_.ReservoirSample(population, 5, avoid, &results);
148	5.00k	for (int result : results) {
149	5.00k	counts[result]++;
150	5.00k	}
151	1.00k	}
152
153		// Ensure that we didn't ever pick the avoided elements.
154	1	ASSERT_EQ(0, counts[3]);
155	1	ASSERT_EQ(0, counts[10]);
156	1	ASSERT_EQ(0, counts[20]);
157	1	}
158
159	1	TEST_F(RandomTest, TestReservoirSamplePopulationTooSmall) {
160	1	vector<int> population;
161	11	for (int i = 0; i < 10; i++) {
162	10	population.push_back(i);
163	10	}
164
165	1	vector<int> results;
166	1	std::unordered_set<int> avoid;
167	1	rng_.ReservoirSample(population, 20, avoid, &results);
168	1	ASSERT_EQ(population.size(), results.size());
169	1	ASSERT_EQ(population, results);
170
171	1	rng_.ReservoirSample(population, 10, avoid, &results);
172	1	ASSERT_EQ(population.size(), results.size());
173	1	ASSERT_EQ(population, results);
174	1	}
175
176		} // namespace yb

YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30