/Users/deen/code/yugabyte-db/src/yb/gutil/strings/charset.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2008 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // The following only applies to changes made to this file as part of YugaByte development. |
4 | | // |
5 | | // Portions Copyright (c) YugaByte, Inc. |
6 | | // |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
8 | | // in compliance with the License. You may obtain a copy of the License at |
9 | | // |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // |
12 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
13 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
14 | | // or implied. See the License for the specific language governing permissions and limitations |
15 | | // under the License. |
16 | | // |
17 | | |
18 | | #ifndef STRINGS_CHARSET_H_ |
19 | | #define STRINGS_CHARSET_H_ |
20 | | |
21 | | #include "yb/gutil/integral_types.h" |
22 | | |
23 | | namespace strings { |
24 | | |
25 | | // A CharSet is a simple map from (1-byte) characters to Booleans. It simply |
26 | | // exposes the mechanism of checking if a given character is in the set, fairly |
27 | | // efficiently. Useful for string tokenizing routines. |
28 | | // |
29 | | // Run on asherah (2 X 2400 MHz CPUs); 2008/11/10-13:18:03 |
30 | | // CPU: Intel Core2 (2 cores) dL1:32KB dL2:4096KB |
31 | | // ***WARNING*** CPU scaling is enabled, the benchmark timings may be noisy, |
32 | | // Benchmark Time(ns) CPU(ns) Iterations |
33 | | // ------------------------------------------------------- |
34 | | // BM_CharSetTesting/1K 21 21 32563138 |
35 | | // BM_CharSetTesting/4K 21 21 31968433 |
36 | | // BM_CharSetTesting/32K 21 21 32114953 |
37 | | // BM_CharSetTesting/256K 22 22 31679082 |
38 | | // BM_CharSetTesting/1M 21 21 32563138 |
39 | | // |
40 | | // This class is thread-compatible. |
41 | | // |
42 | | // This class has an implicit constructor. |
43 | | // Style guide exception granted: |
44 | | // http://goto/style-guide-exception-20978288 |
45 | | |
46 | | class CharSet { |
47 | | public: |
48 | | // Initialize a CharSet containing no characters or the given set of |
49 | | // characters, respectively. |
50 | | CharSet(); |
51 | | // Deliberately an implicit constructor, so anything that takes a CharSet |
52 | | // can also take an explicit list of characters. |
53 | | CharSet(const char* characters); // NOLINT(runtime/explicit) |
54 | | explicit CharSet(const CharSet& other); |
55 | | |
56 | | // Add or remove a character from the set. |
57 | 0 | void Add(unsigned char c) { bits_[Word(c)] |= BitMask(c); } |
58 | 0 | void Remove(unsigned char c) { bits_[Word(c)] &= ~BitMask(c); } |
59 | | |
60 | | // Return true if this character is in the set |
61 | 0 | bool Test(unsigned char c) const { return bits_[Word(c)] & BitMask(c); } |
62 | | |
63 | | private: |
64 | | // The numbers below are optimized for 64-bit hardware. TODO(user): In the |
65 | | // future, we should change this to use uword_t and do various bits of magic |
66 | | // to calculate the numbers at compile time. |
67 | | |
68 | | // In general, |
69 | | // static const int kNumWords = max(32 / sizeof(uword_t), 1); |
70 | | uint64 bits_[4]; |
71 | | |
72 | | // 4 words => the high 2 bits of c are the word number. In general, |
73 | | // kShiftValue = 8 - log2(kNumWords) |
74 | 0 | static int Word(unsigned char c) { return c >> 6; } |
75 | | |
76 | | // And the value we AND with c is ((1 << shift value) - 1) |
77 | | // static const int kLowBitsMask = (256 / kNumWords) - 1; |
78 | 0 | static uint64 BitMask(unsigned char c) { |
79 | 0 | uint64 mask = 1; |
80 | 0 | return mask << (c & 0x3f); |
81 | 0 | } |
82 | | }; |
83 | | |
84 | | } // namespace strings |
85 | | |
86 | | #endif // STRINGS_CHARSET_H_ |