YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/gutil/cpu.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
//
5
// The following only applies to changes made to this file as part of YugaByte development.
6
//
7
// Portions Copyright (c) YugaByte, Inc.
8
//
9
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
10
// in compliance with the License.  You may obtain a copy of the License at
11
//
12
// http://www.apache.org/licenses/LICENSE-2.0
13
//
14
// Unless required by applicable law or agreed to in writing, software distributed under the License
15
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
16
// or implied.  See the License for the specific language governing permissions and limitations
17
// under the License.
18
//
19
20
#include "yb/gutil/cpu.h"
21
22
#include <string.h>
23
24
25
#include "yb/gutil/integral_types.h"
26
27
#if defined(__x86_64__)
28
#if defined(_MSC_VER)
29
#include <intrin.h>
30
#include <immintrin.h>  // For _xgetbv()
31
#endif
32
#endif
33
34
namespace base {
35
36
CPU::CPU()
37
  : signature_(0),
38
    type_(0),
39
    family_(0),
40
    model_(0),
41
    stepping_(0),
42
    ext_model_(0),
43
    ext_family_(0),
44
    has_mmx_(false),
45
    has_sse_(false),
46
    has_sse2_(false),
47
    has_sse3_(false),
48
    has_ssse3_(false),
49
    has_sse41_(false),
50
    has_sse42_(false),
51
    has_avx_(false),
52
    has_avx2_(false),
53
    has_aesni_(false),
54
    has_non_stop_time_stamp_counter_(false),
55
    has_broken_neon_(false),
56
71.6k
    cpu_vendor_("unknown") {
57
71.6k
  Initialize();
58
71.6k
}
59
60
namespace {
61
62
#if defined(__x86_64__)
63
#ifndef _MSC_VER
64
65
#if defined(__pic__) && defined(__i386__)
66
67
void __cpuid(int cpu_info[4], int info_type) {
68
  __asm__ volatile (
69
    "mov %%ebx, %%edi\n"
70
    "cpuid\n"
71
    "xchg %%edi, %%ebx\n"
72
    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
73
    : "a"(info_type)
74
  );
75
}
76
77
#else
78
79
void __cpuid(int cpu_info[4], int info_type) {
80
  __asm__ volatile (
81
    "cpuid\n"
82
    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
83
    : "a"(info_type)
84
  );
85
}
86
87
#endif
88
89
// _xgetbv returns the value of an Intel Extended Control Register (XCR).
90
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
91
uint64 _xgetbv(uint32 xcr) {
92
  uint32 eax, edx;
93
94
  __asm__ volatile (
95
    "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
96
  return (static_cast<uint64>(edx) << 32) | eax;
97
}
98
99
#endif  // !_MSC_VER
100
#endif  // __x86_64__
101
102
#if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(__linux__))
103
class LazyCpuInfoValue {
104
 public:
105
  LazyCpuInfoValue() : has_broken_neon_(false) {
106
    // This function finds the value from /proc/cpuinfo under the key "model
107
    // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7
108
    // and later for arm64) and is shown once per CPU. "Processor" is used in
109
    // earler versions and is shown only once at the top of /proc/cpuinfo
110
    // regardless of the number CPUs.
111
    const char kModelNamePrefix[] = "model name\t: ";
112
    const char kProcessorPrefix[] = "Processor\t: ";
113
114
    // This function also calculates whether we believe that this CPU has a
115
    // broken NEON unit based on these fields from cpuinfo:
116
    unsigned implementer = 0, architecture = 0, variant = 0, part = 0,
117
             revision = 0;
118
    const struct {
119
      const char key[17];
120
      unsigned int* result;
121
    } kUnsignedValues[] = {
122
      {"CPU implementer", &implementer},
123
      {"CPU architecture", &architecture},
124
      {"CPU variant", &variant},
125
      {"CPU part", &part},
126
      {"CPU revision", &revision},
127
    };
128
129
    std::string contents;
130
    ReadFileToString(FilePath("/proc/cpuinfo"), &contents);
131
    DCHECK(!contents.empty());
132
    if (contents.empty()) {
133
      return;
134
    }
135
136
    std::istringstream iss(contents);
137
    std::string line;
138
    while (std::getline(iss, line)) {
139
      if (brand_.empty() &&
140
          (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0 ||
141
           line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0)) {
142
        brand_.assign(line.substr(strlen(kModelNamePrefix)));
143
      }
144
145
      for (size_t i = 0; i < arraysize(kUnsignedValues); i++) {
146
        const char *key = kUnsignedValues[i].key;
147
        const size_t len = strlen(key);
148
149
        if (line.compare(0, len, key) == 0 &&
150
            line.size() >= len + 1 &&
151
            (line[len] == '\t' || line[len] == ' ' || line[len] == ':')) {
152
          size_t colon_pos = line.find(':', len);
153
          if (colon_pos == std::string::npos) {
154
            continue;
155
          }
156
157
          const GStringPiece line_sp(line);
158
          GStringPiece value_sp = line_sp.substr(colon_pos + 1);
159
          while (!value_sp.empty() &&
160
                 (value_sp[0] == ' ' || value_sp[0] == '\t')) {
161
            value_sp = value_sp.substr(1);
162
          }
163
164
          // The string may have leading "0x" or not, so we use strtoul to
165
          // handle that.
166
          char* endptr;
167
          std::string value(value_sp.as_string());
168
          unsigned long int result = strtoul(value.c_str(), &endptr, 0);  // NOLINT
169
          if (*endptr == 0 && result <= UINT_MAX) {
170
            *kUnsignedValues[i].result = result;
171
          }
172
        }
173
      }
174
    }
175
176
    has_broken_neon_ =
177
      implementer == 0x51 &&
178
      architecture == 7 &&
179
      variant == 1 &&
180
      part == 0x4d &&
181
      revision == 0;
182
  }
183
184
  const std::string& brand() const { return brand_; }
185
  bool has_broken_neon() const { return has_broken_neon_; }
186
187
 private:
188
  std::string brand_;
189
  bool has_broken_neon_;
190
  DISALLOW_COPY_AND_ASSIGN(LazyCpuInfoValue);
191
};
192
193
base::LazyInstance<LazyCpuInfoValue>::Leaky g_lazy_cpuinfo =
194
    LAZY_INSTANCE_INITIALIZER;
195
196
#endif  // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) ||
197
        // defined(__linux__))
198
199
}  // anonymous namespace
200
201
71.6k
void CPU::Initialize() {
202
#if defined(__x86_64__)
203
  int cpu_info[4] = {-1};
204
  char cpu_string[48];
205
206
  // __cpuid with an InfoType argument of 0 returns the number of
207
  // valid Ids in CPUInfo[0] and the CPU identification string in
208
  // the other three array elements. The CPU identification string is
209
  // not in linear order. The code below arranges the information
210
  // in a human readable form. The human readable order is CPUInfo[1] |
211
  // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
212
  // before using memcpy to copy these three array elements to cpu_string.
213
  __cpuid(cpu_info, 0);
214
  int num_ids = cpu_info[0];
215
  std::swap(cpu_info[2], cpu_info[3]);
216
  memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
217
  cpu_vendor_.assign(cpu_string, 3 * sizeof(cpu_info[1]));
218
219
  // Interpret CPU feature information.
220
  if (num_ids > 0) {
221
    int cpu_info7[4] = {0};
222
    __cpuid(cpu_info, 1);
223
    if (num_ids >= 7) {
224
      __cpuid(cpu_info7, 7);
225
    }
226
    signature_ = cpu_info[0];
227
    stepping_ = cpu_info[0] & 0xf;
228
    model_ = ((cpu_info[0] >> 4) & 0xf) + ((cpu_info[0] >> 12) & 0xf0);
229
    family_ = (cpu_info[0] >> 8) & 0xf;
230
    type_ = (cpu_info[0] >> 12) & 0x3;
231
    ext_model_ = (cpu_info[0] >> 16) & 0xf;
232
    ext_family_ = (cpu_info[0] >> 20) & 0xff;
233
    has_mmx_ =   (cpu_info[3] & 0x00800000) != 0;
234
    has_sse_ =   (cpu_info[3] & 0x02000000) != 0;
235
    has_sse2_ =  (cpu_info[3] & 0x04000000) != 0;
236
    has_sse3_ =  (cpu_info[2] & 0x00000001) != 0;
237
    has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
238
    has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
239
    has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
240
    // AVX instructions will generate an illegal instruction exception unless
241
    //   a) they are supported by the CPU,
242
    //   b) XSAVE is supported by the CPU and
243
    //   c) XSAVE is enabled by the kernel.
244
    // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
245
    //
246
    // In addition, we have observed some crashes with the xgetbv instruction
247
    // even after following Intel's example code. (See crbug.com/375968.)
248
    // Because of that, we also test the XSAVE bit because its description in
249
    // the CPUID documentation suggests that it signals xgetbv support.
250
    has_avx_ =
251
        (cpu_info[2] & 0x10000000) != 0 &&
252
        (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
253
        (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
254
        (_xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
255
    has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
256
    has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
257
  }
258
259
  // Get the brand string of the cpu.
260
  __cpuid(cpu_info, 0x80000000);
261
  const int parameter_end = 0x80000004;
262
  int max_parameter = cpu_info[0];
263
264
  if (cpu_info[0] >= parameter_end) {
265
    char* cpu_string_ptr = cpu_string;
266
267
    for (int parameter = 0x80000002; parameter <= parameter_end &&
268
         cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
269
      __cpuid(cpu_info, parameter);
270
      memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
271
      cpu_string_ptr += sizeof(cpu_info);
272
    }
273
    cpu_brand_.assign(cpu_string, cpu_string_ptr - cpu_string);
274
  }
275
276
  const int parameter_containing_non_stop_time_stamp_counter = 0x80000007;
277
  if (max_parameter >= parameter_containing_non_stop_time_stamp_counter) {
278
    __cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter);
279
    has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
280
  }
281
#elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(__linux__))
282
  cpu_brand_.assign(g_lazy_cpuinfo.Get().brand());
283
  has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon();
284
#elif defined(__aarch64__)
285
71.6k
  cpu_brand_.assign("ARM64");
286
71.6k
  has_broken_neon_ = false;
287
#else
288
  #error unknown architecture
289
#endif
290
71.6k
}
291
292
0
CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
293
0
  if (has_avx2()) return AVX2;
294
0
  if (has_avx()) return AVX;
295
0
  if (has_sse42()) return SSE42;
296
0
  if (has_sse41()) return SSE41;
297
0
  if (has_ssse3()) return SSSE3;
298
0
  if (has_sse3()) return SSE3;
299
0
  if (has_sse2()) return SSE2;
300
0
  if (has_sse()) return SSE;
301
0
  return PENTIUM;
302
0
}
303
304
}  // namespace base