/Users/deen/code/yugabyte-db/src/yb/gutil/cpu.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. |
4 | | // |
5 | | // The following only applies to changes made to this file as part of YugaByte development. |
6 | | // |
7 | | // Portions Copyright (c) YugaByte, Inc. |
8 | | // |
9 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
10 | | // in compliance with the License. You may obtain a copy of the License at |
11 | | // |
12 | | // http://www.apache.org/licenses/LICENSE-2.0 |
13 | | // |
14 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
15 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
16 | | // or implied. See the License for the specific language governing permissions and limitations |
17 | | // under the License. |
18 | | // |
19 | | |
20 | | #include "yb/gutil/cpu.h" |
21 | | |
22 | | #include <string.h> |
23 | | |
24 | | |
25 | | #include "yb/gutil/integral_types.h" |
26 | | |
27 | | #if defined(__x86_64__) |
28 | | #if defined(_MSC_VER) |
29 | | #include <intrin.h> |
30 | | #include <immintrin.h> // For _xgetbv() |
31 | | #endif |
32 | | #endif |
33 | | |
34 | | namespace base { |
35 | | |
36 | | CPU::CPU() |
37 | | : signature_(0), |
38 | | type_(0), |
39 | | family_(0), |
40 | | model_(0), |
41 | | stepping_(0), |
42 | | ext_model_(0), |
43 | | ext_family_(0), |
44 | | has_mmx_(false), |
45 | | has_sse_(false), |
46 | | has_sse2_(false), |
47 | | has_sse3_(false), |
48 | | has_ssse3_(false), |
49 | | has_sse41_(false), |
50 | | has_sse42_(false), |
51 | | has_avx_(false), |
52 | | has_avx2_(false), |
53 | | has_aesni_(false), |
54 | | has_non_stop_time_stamp_counter_(false), |
55 | | has_broken_neon_(false), |
56 | 45.8k | cpu_vendor_("unknown") { |
57 | 45.8k | Initialize(); |
58 | 45.8k | } |
59 | | |
60 | | namespace { |
61 | | |
62 | | #if defined(__x86_64__) |
63 | | #ifndef _MSC_VER |
64 | | |
65 | | #if defined(__pic__) && defined(__i386__) |
66 | | |
67 | | void __cpuid(int cpu_info[4], int info_type) { |
68 | | __asm__ volatile ( |
69 | | "mov %%ebx, %%edi\n" |
70 | | "cpuid\n" |
71 | | "xchg %%edi, %%ebx\n" |
72 | | : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) |
73 | | : "a"(info_type) |
74 | | ); |
75 | | } |
76 | | |
77 | | #else |
78 | | |
79 | | void __cpuid(int cpu_info[4], int info_type) { |
80 | | __asm__ volatile ( |
81 | | "cpuid\n" |
82 | | : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) |
83 | | : "a"(info_type) |
84 | | ); |
85 | | } |
86 | | |
87 | | #endif |
88 | | |
89 | | // _xgetbv returns the value of an Intel Extended Control Register (XCR). |
90 | | // Currently only XCR0 is defined by Intel so |xcr| should always be zero. |
91 | | uint64 _xgetbv(uint32 xcr) { |
92 | | uint32 eax, edx; |
93 | | |
94 | | __asm__ volatile ( |
95 | | "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr)); |
96 | | return (static_cast<uint64>(edx) << 32) | eax; |
97 | | } |
98 | | |
99 | | #endif // !_MSC_VER |
100 | | #endif // __x86_64__ |
101 | | |
102 | | #if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(__linux__)) |
103 | | class LazyCpuInfoValue { |
104 | | public: |
105 | | LazyCpuInfoValue() : has_broken_neon_(false) { |
106 | | // This function finds the value from /proc/cpuinfo under the key "model |
107 | | // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7 |
108 | | // and later for arm64) and is shown once per CPU. "Processor" is used in |
109 | | // earler versions and is shown only once at the top of /proc/cpuinfo |
110 | | // regardless of the number CPUs. |
111 | | const char kModelNamePrefix[] = "model name\t: "; |
112 | | const char kProcessorPrefix[] = "Processor\t: "; |
113 | | |
114 | | // This function also calculates whether we believe that this CPU has a |
115 | | // broken NEON unit based on these fields from cpuinfo: |
116 | | unsigned implementer = 0, architecture = 0, variant = 0, part = 0, |
117 | | revision = 0; |
118 | | const struct { |
119 | | const char key[17]; |
120 | | unsigned int* result; |
121 | | } kUnsignedValues[] = { |
122 | | {"CPU implementer", &implementer}, |
123 | | {"CPU architecture", &architecture}, |
124 | | {"CPU variant", &variant}, |
125 | | {"CPU part", &part}, |
126 | | {"CPU revision", &revision}, |
127 | | }; |
128 | | |
129 | | std::string contents; |
130 | | ReadFileToString(FilePath("/proc/cpuinfo"), &contents); |
131 | | DCHECK(!contents.empty()); |
132 | | if (contents.empty()) { |
133 | | return; |
134 | | } |
135 | | |
136 | | std::istringstream iss(contents); |
137 | | std::string line; |
138 | | while (std::getline(iss, line)) { |
139 | | if (brand_.empty() && |
140 | | (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0 || |
141 | | line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0)) { |
142 | | brand_.assign(line.substr(strlen(kModelNamePrefix))); |
143 | | } |
144 | | |
145 | | for (size_t i = 0; i < arraysize(kUnsignedValues); i++) { |
146 | | const char *key = kUnsignedValues[i].key; |
147 | | const size_t len = strlen(key); |
148 | | |
149 | | if (line.compare(0, len, key) == 0 && |
150 | | line.size() >= len + 1 && |
151 | | (line[len] == '\t' || line[len] == ' ' || line[len] == ':')) { |
152 | | size_t colon_pos = line.find(':', len); |
153 | | if (colon_pos == std::string::npos) { |
154 | | continue; |
155 | | } |
156 | | |
157 | | const GStringPiece line_sp(line); |
158 | | GStringPiece value_sp = line_sp.substr(colon_pos + 1); |
159 | | while (!value_sp.empty() && |
160 | | (value_sp[0] == ' ' || value_sp[0] == '\t')) { |
161 | | value_sp = value_sp.substr(1); |
162 | | } |
163 | | |
164 | | // The string may have leading "0x" or not, so we use strtoul to |
165 | | // handle that. |
166 | | char* endptr; |
167 | | std::string value(value_sp.as_string()); |
168 | | unsigned long int result = strtoul(value.c_str(), &endptr, 0); // NOLINT |
169 | | if (*endptr == 0 && result <= UINT_MAX) { |
170 | | *kUnsignedValues[i].result = result; |
171 | | } |
172 | | } |
173 | | } |
174 | | } |
175 | | |
176 | | has_broken_neon_ = |
177 | | implementer == 0x51 && |
178 | | architecture == 7 && |
179 | | variant == 1 && |
180 | | part == 0x4d && |
181 | | revision == 0; |
182 | | } |
183 | | |
184 | | const std::string& brand() const { return brand_; } |
185 | | bool has_broken_neon() const { return has_broken_neon_; } |
186 | | |
187 | | private: |
188 | | std::string brand_; |
189 | | bool has_broken_neon_; |
190 | | DISALLOW_COPY_AND_ASSIGN(LazyCpuInfoValue); |
191 | | }; |
192 | | |
193 | | base::LazyInstance<LazyCpuInfoValue>::Leaky g_lazy_cpuinfo = |
194 | | LAZY_INSTANCE_INITIALIZER; |
195 | | |
196 | | #endif // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || |
197 | | // defined(__linux__)) |
198 | | |
199 | | } // anonymous namespace |
200 | | |
201 | 45.8k | void CPU::Initialize() { |
202 | | #if defined(__x86_64__) |
203 | | int cpu_info[4] = {-1}; |
204 | | char cpu_string[48]; |
205 | | |
206 | | // __cpuid with an InfoType argument of 0 returns the number of |
207 | | // valid Ids in CPUInfo[0] and the CPU identification string in |
208 | | // the other three array elements. The CPU identification string is |
209 | | // not in linear order. The code below arranges the information |
210 | | // in a human readable form. The human readable order is CPUInfo[1] | |
211 | | // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped |
212 | | // before using memcpy to copy these three array elements to cpu_string. |
213 | | __cpuid(cpu_info, 0); |
214 | | int num_ids = cpu_info[0]; |
215 | | std::swap(cpu_info[2], cpu_info[3]); |
216 | | memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1])); |
217 | | cpu_vendor_.assign(cpu_string, 3 * sizeof(cpu_info[1])); |
218 | | |
219 | | // Interpret CPU feature information. |
220 | | if (num_ids > 0) { |
221 | | int cpu_info7[4] = {0}; |
222 | | __cpuid(cpu_info, 1); |
223 | | if (num_ids >= 7) { |
224 | | __cpuid(cpu_info7, 7); |
225 | | } |
226 | | signature_ = cpu_info[0]; |
227 | | stepping_ = cpu_info[0] & 0xf; |
228 | | model_ = ((cpu_info[0] >> 4) & 0xf) + ((cpu_info[0] >> 12) & 0xf0); |
229 | | family_ = (cpu_info[0] >> 8) & 0xf; |
230 | | type_ = (cpu_info[0] >> 12) & 0x3; |
231 | | ext_model_ = (cpu_info[0] >> 16) & 0xf; |
232 | | ext_family_ = (cpu_info[0] >> 20) & 0xff; |
233 | | has_mmx_ = (cpu_info[3] & 0x00800000) != 0; |
234 | | has_sse_ = (cpu_info[3] & 0x02000000) != 0; |
235 | | has_sse2_ = (cpu_info[3] & 0x04000000) != 0; |
236 | | has_sse3_ = (cpu_info[2] & 0x00000001) != 0; |
237 | | has_ssse3_ = (cpu_info[2] & 0x00000200) != 0; |
238 | | has_sse41_ = (cpu_info[2] & 0x00080000) != 0; |
239 | | has_sse42_ = (cpu_info[2] & 0x00100000) != 0; |
240 | | // AVX instructions will generate an illegal instruction exception unless |
241 | | // a) they are supported by the CPU, |
242 | | // b) XSAVE is supported by the CPU and |
243 | | // c) XSAVE is enabled by the kernel. |
244 | | // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled |
245 | | // |
246 | | // In addition, we have observed some crashes with the xgetbv instruction |
247 | | // even after following Intel's example code. (See crbug.com/375968.) |
248 | | // Because of that, we also test the XSAVE bit because its description in |
249 | | // the CPUID documentation suggests that it signals xgetbv support. |
250 | | has_avx_ = |
251 | | (cpu_info[2] & 0x10000000) != 0 && |
252 | | (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ && |
253 | | (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ && |
254 | | (_xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */; |
255 | | has_aesni_ = (cpu_info[2] & 0x02000000) != 0; |
256 | | has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0; |
257 | | } |
258 | | |
259 | | // Get the brand string of the cpu. |
260 | | __cpuid(cpu_info, 0x80000000); |
261 | | const int parameter_end = 0x80000004; |
262 | | int max_parameter = cpu_info[0]; |
263 | | |
264 | | if (cpu_info[0] >= parameter_end) { |
265 | | char* cpu_string_ptr = cpu_string; |
266 | | |
267 | | for (int parameter = 0x80000002; parameter <= parameter_end && |
268 | | cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) { |
269 | | __cpuid(cpu_info, parameter); |
270 | | memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info)); |
271 | | cpu_string_ptr += sizeof(cpu_info); |
272 | | } |
273 | | cpu_brand_.assign(cpu_string, cpu_string_ptr - cpu_string); |
274 | | } |
275 | | |
276 | | const int parameter_containing_non_stop_time_stamp_counter = 0x80000007; |
277 | | if (max_parameter >= parameter_containing_non_stop_time_stamp_counter) { |
278 | | __cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter); |
279 | | has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0; |
280 | | } |
281 | | #elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(__linux__)) |
282 | | cpu_brand_.assign(g_lazy_cpuinfo.Get().brand()); |
283 | | has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon(); |
284 | | #elif defined(__aarch64__) |
285 | 45.8k | cpu_brand_.assign("ARM64"); |
286 | 45.8k | has_broken_neon_ = false; |
287 | | #else |
288 | | #error unknown architecture |
289 | | #endif |
290 | 45.8k | } |
291 | | |
292 | 0 | CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const { |
293 | 0 | if (has_avx2()) return AVX2; |
294 | 0 | if (has_avx()) return AVX; |
295 | 0 | if (has_sse42()) return SSE42; |
296 | 0 | if (has_sse41()) return SSE41; |
297 | 0 | if (has_ssse3()) return SSSE3; |
298 | 0 | if (has_sse3()) return SSE3; |
299 | 0 | if (has_sse2()) return SSE2; |
300 | 0 | if (has_sse()) return SSE; |
301 | 0 | return PENTIUM; |
302 | 0 | } |
303 | | |
304 | | } // namespace base |