YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/charset.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2008 Google Inc. All Rights Reserved.
2
//
3
// The following only applies to changes made to this file as part of YugaByte development.
4
//
5
// Portions Copyright (c) YugaByte, Inc.
6
//
7
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
8
// in compliance with the License.  You may obtain a copy of the License at
9
//
10
// http://www.apache.org/licenses/LICENSE-2.0
11
//
12
// Unless required by applicable law or agreed to in writing, software distributed under the License
13
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14
// or implied.  See the License for the specific language governing permissions and limitations
15
// under the License.
16
//
17
18
#ifndef STRINGS_CHARSET_H_
19
#define STRINGS_CHARSET_H_
20
21
#include "yb/gutil/integral_types.h"
22
23
namespace strings {
24
25
// A CharSet is a simple map from (1-byte) characters to Booleans. It simply
26
// exposes the mechanism of checking if a given character is in the set, fairly
27
// efficiently. Useful for string tokenizing routines.
28
//
29
// Run on asherah (2 X 2400 MHz CPUs); 2008/11/10-13:18:03
30
// CPU: Intel Core2 (2 cores) dL1:32KB dL2:4096KB
31
// ***WARNING*** CPU scaling is enabled, the benchmark timings may be noisy,
32
// Benchmark                Time(ns)    CPU(ns) Iterations
33
// -------------------------------------------------------
34
// BM_CharSetTesting/1K           21         21   32563138
35
// BM_CharSetTesting/4K           21         21   31968433
36
// BM_CharSetTesting/32K          21         21   32114953
37
// BM_CharSetTesting/256K         22         22   31679082
38
// BM_CharSetTesting/1M           21         21   32563138
39
//
40
// This class is thread-compatible.
41
//
42
// This class has an implicit constructor.
43
// Style guide exception granted:
44
// http://goto/style-guide-exception-20978288
45
46
class CharSet {
47
 public:
48
  // Initialize a CharSet containing no characters or the given set of
49
  // characters, respectively.
50
  CharSet();
51
  // Deliberately an implicit constructor, so anything that takes a CharSet
52
  // can also take an explicit list of characters.
53
  CharSet(const char* characters);  // NOLINT(runtime/explicit)
54
  explicit CharSet(const CharSet& other);
55
56
  // Add or remove a character from the set.
57
0
  void Add(unsigned char c) { bits_[Word(c)] |= BitMask(c); }
58
0
  void Remove(unsigned char c) { bits_[Word(c)] &= ~BitMask(c); }
59
60
  // Return true if this character is in the set
61
0
  bool Test(unsigned char c) const { return bits_[Word(c)] & BitMask(c); }
62
63
 private:
64
  // The numbers below are optimized for 64-bit hardware. TODO(user): In the
65
  // future, we should change this to use uword_t and do various bits of magic
66
  // to calculate the numbers at compile time.
67
68
  // In general,
69
  // static const int kNumWords = max(32 / sizeof(uword_t), 1);
70
  uint64 bits_[4];
71
72
  // 4 words => the high 2 bits of c are the word number. In general,
73
  // kShiftValue = 8 - log2(kNumWords)
74
0
  static int Word(unsigned char c) { return c >> 6; }
75
76
  // And the value we AND with c is ((1 << shift value) - 1)
77
  // static const int kLowBitsMask = (256 / kNumWords) - 1;
78
0
  static uint64 BitMask(unsigned char c) {
79
0
    uint64 mask = 1;
80
0
    return mask << (c & 0x3f);
81
0
  }
82
};
83
84
}  // namespace strings
85
86
#endif  // STRINGS_CHARSET_H_