/Users/deen/code/yugabyte-db/src/yb/common/id_mapping.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | #ifndef YB_COMMON_ID_MAPPING_H |
33 | | #define YB_COMMON_ID_MAPPING_H |
34 | | |
35 | | #include <vector> |
36 | | |
37 | | #include <glog/logging.h> |
38 | | |
39 | | namespace yb { |
40 | | |
41 | | // Light-weight hashtable implementation for mapping a small number of |
42 | | // integers to other integers. |
43 | | // This is used by Schema to map from Column ID to column index. |
44 | | // |
45 | | // The implementation is an open-addressed hash table with linear probing. |
46 | | // The probing is limited to look only in the initial position and a single |
47 | | // position following. If neither position is free, the hashtable is doubled. |
48 | | // Therefore, the fill rate of the hashtable could be fairly bad, in the worst |
49 | | // case. However, in practice, we expect that most tables will have nearly |
50 | | // sequential column IDs (with only the occasional gap if a column has been removed). |
51 | | // Therefore, we expect to have very few collisions. |
52 | | // |
53 | | // The implementation takes care to only use power-of-2 sized bucket arrays so that |
54 | | // modulo can be calculated using bit masking. This improves performance substantially |
55 | | // since the 'div' instruction can take many cycles. |
56 | | // |
57 | | // NOTE: this map restricts that keys and values are positive. '-1' is used |
58 | | // as a special identifier indicating that the slot is unused or that the key |
59 | | // was not found. |
60 | | class IdMapping { |
61 | | private: |
62 | | enum { |
63 | | kInitialCapacity = 64, |
64 | | kNumProbes = 2 |
65 | | }; |
66 | | typedef std::pair<int, int> value_type; |
67 | | |
68 | | public: |
69 | | static const int kNoEntry; |
70 | | |
71 | | IdMapping() : |
72 | | mask_(kInitialCapacity - 1), |
73 | 20.5M | entries_(kInitialCapacity) { |
74 | 20.5M | clear(); |
75 | 20.5M | } |
76 | | |
77 | | explicit IdMapping(const IdMapping& other) |
78 | | : mask_(other.mask_), |
79 | 0 | entries_(other.entries_) { |
80 | 0 | } |
81 | | |
82 | 19.7M | ~IdMapping() {} |
83 | | |
84 | 37.9M | void clear() { |
85 | 37.9M | ClearMap(&entries_); |
86 | 37.9M | } |
87 | | |
88 | | // NOLINT on this function definition because it thinks we're calling |
89 | | // std::swap instead of defining it. |
90 | 2 | void swap(IdMapping& other) { // NOLINT(*) |
91 | 2 | uint64_t tmp = other.mask_; |
92 | 2 | other.mask_ = mask_; |
93 | 2 | mask_ = tmp; |
94 | 2 | other.entries_.swap(entries_); |
95 | 2 | } |
96 | | |
97 | 1.77M | IdMapping& operator=(const IdMapping& other) { |
98 | 1.77M | mask_ = other.mask_; |
99 | 1.77M | entries_ = other.entries_; |
100 | 1.77M | return *this; |
101 | 1.77M | } |
102 | | |
103 | 72.6M | int operator[](int key) const { |
104 | 72.6M | return get(key); |
105 | 72.6M | } |
106 | | |
107 | 72.6M | int get(int key) const { |
108 | 72.6M | DCHECK_GE(key, 0); |
109 | 72.6M | for (int i = 0; i < kNumProbes; i++) { |
110 | 72.5M | int s = slot(key + i); |
111 | 72.5M | if (entries_[s].first == key || entries_[s].first == kNoEntry) { |
112 | 72.5M | return entries_[s].second; |
113 | 72.5M | } |
114 | 72.5M | } |
115 | 44.1k | return kNoEntry; |
116 | 72.6M | } |
117 | | |
118 | 37.1M | void set(int key, int val) { |
119 | 37.1M | DCHECK_GE(key, 0); |
120 | 37.1M | DCHECK_GE(val, 0); |
121 | 37.1M | while (true) { |
122 | 37.1M | for (int i = 0; i < kNumProbes; i++) { |
123 | 37.1M | int s = slot(key + i); |
124 | 1 | CHECK_NE(entries_[s].first, key) << "Cannot insert duplicate keys"; |
125 | 37.1M | if (entries_[s].first == kNoEntry) { |
126 | 37.1M | entries_[s].first = key; |
127 | 37.1M | entries_[s].second = val; |
128 | 37.1M | return; |
129 | 37.1M | } |
130 | 37.1M | } |
131 | | // Didn't find a spot. |
132 | 27.3k | DoubleCapacity(); |
133 | 27.3k | } |
134 | 37.1M | } |
135 | | |
136 | 15 | size_t capacity() const { |
137 | 15 | return mask_ + 1; |
138 | 15 | } |
139 | | |
140 | | // Returns the memory usage of this object without the object itself. Should |
141 | | // be used when embedded inside another object. |
142 | | size_t memory_footprint_excluding_this() const; |
143 | | |
144 | | // Returns the memory usage of this object including the object itself. |
145 | | // Should be used when allocated on the heap. |
146 | | size_t memory_footprint_including_this() const; |
147 | | |
148 | | private: |
149 | 109M | int slot(int key) const { |
150 | 109M | return key & mask_; |
151 | 109M | } |
152 | | |
153 | 15 | void DoubleCapacity() { |
154 | 15 | auto new_capacity = capacity() * 2; |
155 | 15 | std::vector<value_type> entries(new_capacity); |
156 | 15 | ClearMap(&entries); |
157 | 15 | mask_ = new_capacity - 1; |
158 | 15 | entries.swap(entries_); |
159 | | |
160 | 33.8k | for (const auto& entry : entries) { |
161 | 33.8k | if (entry.first != kNoEntry) { |
162 | 2.99k | set(entry.first, entry.second); |
163 | 2.99k | } |
164 | 33.8k | } |
165 | 15 | } |
166 | | |
167 | 37.9M | static void ClearMap(std::vector<value_type>* v) { |
168 | 2.39G | for (auto& entry : *v) { |
169 | 2.39G | entry = std::make_pair(kNoEntry, kNoEntry); |
170 | 2.39G | } |
171 | 37.9M | } |
172 | | |
173 | | uint64_t mask_; |
174 | | std::vector<value_type> entries_; |
175 | | }; |
176 | | |
177 | | } // namespace yb |
178 | | #endif /* YB_COMMON_ID_MAPPING_H */ |