YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/util/file_system_posix.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/util/file_system_posix.h"
15
16
#include <fcntl.h>
17
#include <stdio.h>
18
#include <sys/ioctl.h>
19
#include <sys/stat.h>
20
#include <sys/types.h>
21
22
#ifdef __linux__
23
#include <linux/fs.h>
24
#include <sys/statfs.h>
25
#include <sys/syscall.h>
26
#endif // __linux__
27
28
#include "yb/util/coding.h"
29
#include "yb/util/debug/trace_event.h"
30
#include "yb/util/errno.h"
31
#include "yb/util/malloc.h"
32
#include "yb/util/result.h"
33
#include "yb/util/thread_restrictions.h"
34
35
// For platforms without fdatasync (like OS X)
36
#ifndef fdatasync
37
#define fdatasync fsync
38
#endif
39
40
// For platforms without unlocked_stdio (like OS X)
41
#ifndef fread_unlocked
42
3.09M
#define fread_unlocked fread
43
#endif
44
45
// For non linux platform, the following macros are used only as place
46
// holder.
47
#if !(defined __linux__) && !(defined CYGWIN)
48
79.7k
#define POSIX_FADV_NORMAL 0     /* [MC1] no further special treatment */
49
125k
#define POSIX_FADV_RANDOM 1     /* [MC1] expect random page refs */
50
0
#define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */
51
0
#define POSIX_FADV_WILLNEED 3   /* [MC1] will need these pages */
52
0
#define POSIX_FADV_DONTNEED 4   /* [MC1] dont need these pages */
53
#endif
54
55
namespace yb {
56
57
namespace {
58
59
// A wrapper for fadvise, if the platform doesn't support fadvise, it will simply return
60
// Status::NotSupport.
61
205k
int Fadvise(int fd, off_t offset, size_t len, int advice) {
62
#ifdef __linux__
63
  return posix_fadvise(fd, offset, len, advice);
64
#else
65
205k
  return 0;  // simply do nothing.
66
205k
#endif
67
205k
}
68
69
#define STATUS_IO_ERROR(context, err_number) \
70
211
    STATUS_FROM_ERRNO_SPECIAL_EIO_HANDLING(context, err_number)
71
72
} // namespace
73
74
#if defined(__linux__)
75
size_t GetUniqueIdFromFile(int fd, uint8_t* id) {
76
  struct stat buf;
77
  int result = fstat(fd, &buf);
78
  if (result == -1) {
79
    return 0;
80
  }
81
82
  int version = 0;
83
  result = ioctl(fd, FS_IOC_GETVERSION, &version);
84
  if (result == -1) {
85
    return 0;
86
  }
87
88
  uint8_t* rid = id;
89
  rid = EncodeVarint64(rid, buf.st_dev);
90
  rid = EncodeVarint64(rid, buf.st_ino);
91
  rid = EncodeVarint64(rid, version);
92
  DCHECK_GE(rid, id);
93
  return rid - id;
94
}
95
#endif // __linux__
96
97
PosixSequentialFile::PosixSequentialFile(const std::string& fname, FILE* f,
98
                                         const FileSystemOptions& options)
99
    : filename_(fname),
100
      file_(f),
101
      fd_(fileno(f)),
102
1.72M
      use_os_buffer_(options.use_os_buffer) {}
103
104
1.72M
PosixSequentialFile::~PosixSequentialFile() { fclose(file_); }
105
106
3.09M
Status PosixSequentialFile::Read(size_t n, Slice* result, uint8_t* scratch) {
107
3.09M
  ThreadRestrictions::AssertIOAllowed();
108
3.09M
  Status s;
109
3.09M
  size_t r = 0;
110
3.09M
  do {
111
3.09M
    r = fread_unlocked(scratch, 1, n, file_);
112
3.09M
  } while (r == 0 && ferror(file_) && errno == EINTR);
113
3.09M
  *result = Slice(scratch, r);
114
3.09M
  if (r < n) {
115
2.04M
    if (feof(file_)) {
116
      // We leave status as ok if we hit the end of the file
117
      // We also clear the error so that the reads can continue
118
      // if a new data is written to the file
119
2.04M
      clearerr(file_);
120
211
    } else {
121
      // A partial read with an error: return a non-ok status
122
211
      s = STATUS_IO_ERROR(filename_, errno);
123
211
    }
124
2.04M
  }
125
3.09M
  if (!use_os_buffer_) {
126
    // We need to fadvise away the entire range of pages because we do not want readahead pages to
127
    // be cached.
128
0
    Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED);  // free OS pages
129
0
  }
130
3.09M
  return s;
131
3.09M
}
132
133
18
Status PosixSequentialFile::Skip(uint64_t n) {
134
18
  TRACE_EVENT1("io", "PosixSequentialFile::Skip", "path", filename_);
135
18
  ThreadRestrictions::AssertIOAllowed();
136
18
  if (fseek(file_, static_cast<long>(n), SEEK_CUR)) { // NOLINT
137
0
    return STATUS_IO_ERROR(filename_, errno);
138
0
  }
139
18
  return Status::OK();
140
18
}
141
142
0
Status PosixSequentialFile::InvalidateCache(size_t offset, size_t length) {
143
0
#ifndef __linux__
144
0
  return Status::OK();
145
#else
146
  // free OS pages
147
  int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
148
  if (ret == 0) {
149
    return Status::OK();
150
  }
151
  return STATUS_IO_ERROR(filename_, errno);
152
#endif
153
0
}
154
155
PosixRandomAccessFile::PosixRandomAccessFile(const std::string& fname, int fd,
156
                                             const FileSystemOptions& options)
157
2.25M
    : filename_(fname), fd_(fd), use_os_buffer_(options.use_os_buffer) {
158
2.25M
  assert(!options.use_mmap_reads || sizeof(void*) < 8);
159
2.25M
}
160
161
2.16M
PosixRandomAccessFile::~PosixRandomAccessFile() { close(fd_); }
162
163
Status PosixRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result,
164
12.3M
                                   uint8_t* scratch) const {
165
12.3M
  ThreadRestrictions::AssertIOAllowed();
166
12.3M
  Status s;
167
12.3M
  ssize_t r = -1;
168
12.3M
  size_t left = n;
169
12.3M
  uint8_t* ptr = scratch;
170
24.6M
  while (left > 0) {
171
12.3M
    r = pread(fd_, ptr, left, static_cast<off_t>(offset));
172
173
12.3M
    if (r <= 0) {
174
4.53k
      if (errno == EINTR) {
175
0
        continue;
176
0
      }
177
4.53k
      break;
178
4.53k
    }
179
12.3M
    ptr += r;
180
12.3M
    offset += r;
181
12.3M
    left -= r;
182
12.3M
  }
183
184
12.3M
  *result = Slice(scratch, (r < 0) ? 0 : n - left);
185
12.3M
  if (r < 0) {
186
    // An error: return a non-ok status
187
0
    s = STATUS_IO_ERROR(filename_, errno);
188
0
  }
189
12.3M
  if (!use_os_buffer_) {
190
    // we need to fadvise away the entire range of pages because
191
    // we do not want readahead pages to be cached.
192
0
    Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED);  // free OS pages
193
0
  }
194
12.3M
  return s;
195
12.3M
}
196
197
1.55M
Result<uint64_t> PosixRandomAccessFile::Size() const {
198
1.55M
  TRACE_EVENT1("io", __PRETTY_FUNCTION__, "path", filename_);
199
1.55M
  ThreadRestrictions::AssertIOAllowed();
200
1.55M
  struct stat st;
201
1.55M
  if (fstat(fd_, &st) == -1) {
202
0
    return STATUS_IO_ERROR(filename_, errno);
203
0
  }
204
1.55M
  return st.st_size;
205
1.55M
}
206
207
3.51k
Result<uint64_t> PosixRandomAccessFile::INode() const {
208
3.51k
  TRACE_EVENT1("io", __PRETTY_FUNCTION__, "path", filename_);
209
3.51k
  ThreadRestrictions::AssertIOAllowed();
210
3.51k
  struct stat st;
211
3.51k
  if (fstat(fd_, &st) == -1) {
212
0
    return STATUS_IO_ERROR(filename_, errno);
213
0
  }
214
3.51k
  return st.st_ino;
215
3.51k
}
216
217
0
size_t PosixRandomAccessFile::memory_footprint() const {
218
0
  return malloc_usable_size(this) + filename_.capacity();
219
0
}
220
221
#ifdef __linux__
222
size_t PosixRandomAccessFile::GetUniqueId(char* id) const {
223
  return GetUniqueIdFromFile(fd_, pointer_cast<uint8_t*>(id));
224
}
225
#endif
226
227
205k
void PosixRandomAccessFile::Hint(AccessPattern pattern) {
228
205k
  switch (pattern) {
229
79.7k
    case NORMAL:
230
79.7k
      Fadvise(fd_, 0, 0, POSIX_FADV_NORMAL);
231
79.7k
      break;
232
125k
    case RANDOM:
233
125k
      Fadvise(fd_, 0, 0, POSIX_FADV_RANDOM);
234
125k
      break;
235
0
    case SEQUENTIAL:
236
0
      Fadvise(fd_, 0, 0, POSIX_FADV_SEQUENTIAL);
237
0
      break;
238
0
    case WILLNEED:
239
0
      Fadvise(fd_, 0, 0, POSIX_FADV_WILLNEED);
240
0
      break;
241
0
    case DONTNEED:
242
0
      Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED);
243
0
      break;
244
0
    default:
245
0
      assert(false);
246
0
      break;
247
205k
  }
248
205k
}
249
250
0
Status PosixRandomAccessFile::InvalidateCache(size_t offset, size_t length) {
251
0
#ifndef __linux__
252
0
  return Status::OK();
253
#else
254
  // free OS pages
255
  int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
256
  if (ret == 0) {
257
    return Status::OK();
258
  }
259
  return STATUS_IO_ERROR(filename_, errno);
260
#endif
261
0
}
262
263
} // namespace yb