/Users/deen/code/yugabyte-db/src/yb/util/file_system_posix.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/util/file_system_posix.h" |
15 | | |
16 | | #include <fcntl.h> |
17 | | #include <stdio.h> |
18 | | #include <sys/ioctl.h> |
19 | | #include <sys/stat.h> |
20 | | #include <sys/types.h> |
21 | | |
22 | | #ifdef __linux__ |
23 | | #include <linux/fs.h> |
24 | | #include <sys/statfs.h> |
25 | | #include <sys/syscall.h> |
26 | | #endif // __linux__ |
27 | | |
28 | | #include "yb/util/coding.h" |
29 | | #include "yb/util/debug/trace_event.h" |
30 | | #include "yb/util/errno.h" |
31 | | #include "yb/util/malloc.h" |
32 | | #include "yb/util/result.h" |
33 | | #include "yb/util/thread_restrictions.h" |
34 | | |
35 | | // For platforms without fdatasync (like OS X) |
36 | | #ifndef fdatasync |
37 | | #define fdatasync fsync |
38 | | #endif |
39 | | |
40 | | // For platforms without unlocked_stdio (like OS X) |
41 | | #ifndef fread_unlocked |
42 | 3.09M | #define fread_unlocked fread |
43 | | #endif |
44 | | |
45 | | // For non linux platform, the following macros are used only as place |
46 | | // holder. |
47 | | #if !(defined __linux__) && !(defined CYGWIN) |
48 | 79.7k | #define POSIX_FADV_NORMAL 0 /* [MC1] no further special treatment */ |
49 | 125k | #define POSIX_FADV_RANDOM 1 /* [MC1] expect random page refs */ |
50 | 0 | #define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */ |
51 | 0 | #define POSIX_FADV_WILLNEED 3 /* [MC1] will need these pages */ |
52 | 0 | #define POSIX_FADV_DONTNEED 4 /* [MC1] dont need these pages */ |
53 | | #endif |
54 | | |
55 | | namespace yb { |
56 | | |
57 | | namespace { |
58 | | |
59 | | // A wrapper for fadvise, if the platform doesn't support fadvise, it will simply return |
60 | | // Status::NotSupport. |
61 | 205k | int Fadvise(int fd, off_t offset, size_t len, int advice) { |
62 | | #ifdef __linux__ |
63 | | return posix_fadvise(fd, offset, len, advice); |
64 | | #else |
65 | 205k | return 0; // simply do nothing. |
66 | 205k | #endif |
67 | 205k | } |
68 | | |
69 | | #define STATUS_IO_ERROR(context, err_number) \ |
70 | 211 | STATUS_FROM_ERRNO_SPECIAL_EIO_HANDLING(context, err_number) |
71 | | |
72 | | } // namespace |
73 | | |
74 | | #if defined(__linux__) |
75 | | size_t GetUniqueIdFromFile(int fd, uint8_t* id) { |
76 | | struct stat buf; |
77 | | int result = fstat(fd, &buf); |
78 | | if (result == -1) { |
79 | | return 0; |
80 | | } |
81 | | |
82 | | int version = 0; |
83 | | result = ioctl(fd, FS_IOC_GETVERSION, &version); |
84 | | if (result == -1) { |
85 | | return 0; |
86 | | } |
87 | | |
88 | | uint8_t* rid = id; |
89 | | rid = EncodeVarint64(rid, buf.st_dev); |
90 | | rid = EncodeVarint64(rid, buf.st_ino); |
91 | | rid = EncodeVarint64(rid, version); |
92 | | DCHECK_GE(rid, id); |
93 | | return rid - id; |
94 | | } |
95 | | #endif // __linux__ |
96 | | |
97 | | PosixSequentialFile::PosixSequentialFile(const std::string& fname, FILE* f, |
98 | | const FileSystemOptions& options) |
99 | | : filename_(fname), |
100 | | file_(f), |
101 | | fd_(fileno(f)), |
102 | 1.72M | use_os_buffer_(options.use_os_buffer) {} |
103 | | |
104 | 1.72M | PosixSequentialFile::~PosixSequentialFile() { fclose(file_); } |
105 | | |
106 | 3.09M | Status PosixSequentialFile::Read(size_t n, Slice* result, uint8_t* scratch) { |
107 | 3.09M | ThreadRestrictions::AssertIOAllowed(); |
108 | 3.09M | Status s; |
109 | 3.09M | size_t r = 0; |
110 | 3.09M | do { |
111 | 3.09M | r = fread_unlocked(scratch, 1, n, file_); |
112 | 3.09M | } while (r == 0 && ferror(file_) && errno == EINTR); |
113 | 3.09M | *result = Slice(scratch, r); |
114 | 3.09M | if (r < n) { |
115 | 2.04M | if (feof(file_)) { |
116 | | // We leave status as ok if we hit the end of the file |
117 | | // We also clear the error so that the reads can continue |
118 | | // if a new data is written to the file |
119 | 2.04M | clearerr(file_); |
120 | 211 | } else { |
121 | | // A partial read with an error: return a non-ok status |
122 | 211 | s = STATUS_IO_ERROR(filename_, errno); |
123 | 211 | } |
124 | 2.04M | } |
125 | 3.09M | if (!use_os_buffer_) { |
126 | | // We need to fadvise away the entire range of pages because we do not want readahead pages to |
127 | | // be cached. |
128 | 0 | Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED); // free OS pages |
129 | 0 | } |
130 | 3.09M | return s; |
131 | 3.09M | } |
132 | | |
133 | 18 | Status PosixSequentialFile::Skip(uint64_t n) { |
134 | 18 | TRACE_EVENT1("io", "PosixSequentialFile::Skip", "path", filename_); |
135 | 18 | ThreadRestrictions::AssertIOAllowed(); |
136 | 18 | if (fseek(file_, static_cast<long>(n), SEEK_CUR)) { // NOLINT |
137 | 0 | return STATUS_IO_ERROR(filename_, errno); |
138 | 0 | } |
139 | 18 | return Status::OK(); |
140 | 18 | } |
141 | | |
142 | 0 | Status PosixSequentialFile::InvalidateCache(size_t offset, size_t length) { |
143 | 0 | #ifndef __linux__ |
144 | 0 | return Status::OK(); |
145 | | #else |
146 | | // free OS pages |
147 | | int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); |
148 | | if (ret == 0) { |
149 | | return Status::OK(); |
150 | | } |
151 | | return STATUS_IO_ERROR(filename_, errno); |
152 | | #endif |
153 | 0 | } |
154 | | |
155 | | PosixRandomAccessFile::PosixRandomAccessFile(const std::string& fname, int fd, |
156 | | const FileSystemOptions& options) |
157 | 2.25M | : filename_(fname), fd_(fd), use_os_buffer_(options.use_os_buffer) { |
158 | 2.25M | assert(!options.use_mmap_reads || sizeof(void*) < 8); |
159 | 2.25M | } |
160 | | |
161 | 2.16M | PosixRandomAccessFile::~PosixRandomAccessFile() { close(fd_); } |
162 | | |
163 | | Status PosixRandomAccessFile::Read(uint64_t offset, size_t n, Slice* result, |
164 | 12.3M | uint8_t* scratch) const { |
165 | 12.3M | ThreadRestrictions::AssertIOAllowed(); |
166 | 12.3M | Status s; |
167 | 12.3M | ssize_t r = -1; |
168 | 12.3M | size_t left = n; |
169 | 12.3M | uint8_t* ptr = scratch; |
170 | 24.6M | while (left > 0) { |
171 | 12.3M | r = pread(fd_, ptr, left, static_cast<off_t>(offset)); |
172 | | |
173 | 12.3M | if (r <= 0) { |
174 | 4.53k | if (errno == EINTR) { |
175 | 0 | continue; |
176 | 0 | } |
177 | 4.53k | break; |
178 | 4.53k | } |
179 | 12.3M | ptr += r; |
180 | 12.3M | offset += r; |
181 | 12.3M | left -= r; |
182 | 12.3M | } |
183 | | |
184 | 12.3M | *result = Slice(scratch, (r < 0) ? 0 : n - left); |
185 | 12.3M | if (r < 0) { |
186 | | // An error: return a non-ok status |
187 | 0 | s = STATUS_IO_ERROR(filename_, errno); |
188 | 0 | } |
189 | 12.3M | if (!use_os_buffer_) { |
190 | | // we need to fadvise away the entire range of pages because |
191 | | // we do not want readahead pages to be cached. |
192 | 0 | Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED); // free OS pages |
193 | 0 | } |
194 | 12.3M | return s; |
195 | 12.3M | } |
196 | | |
197 | 1.55M | Result<uint64_t> PosixRandomAccessFile::Size() const { |
198 | 1.55M | TRACE_EVENT1("io", __PRETTY_FUNCTION__, "path", filename_); |
199 | 1.55M | ThreadRestrictions::AssertIOAllowed(); |
200 | 1.55M | struct stat st; |
201 | 1.55M | if (fstat(fd_, &st) == -1) { |
202 | 0 | return STATUS_IO_ERROR(filename_, errno); |
203 | 0 | } |
204 | 1.55M | return st.st_size; |
205 | 1.55M | } |
206 | | |
207 | 3.51k | Result<uint64_t> PosixRandomAccessFile::INode() const { |
208 | 3.51k | TRACE_EVENT1("io", __PRETTY_FUNCTION__, "path", filename_); |
209 | 3.51k | ThreadRestrictions::AssertIOAllowed(); |
210 | 3.51k | struct stat st; |
211 | 3.51k | if (fstat(fd_, &st) == -1) { |
212 | 0 | return STATUS_IO_ERROR(filename_, errno); |
213 | 0 | } |
214 | 3.51k | return st.st_ino; |
215 | 3.51k | } |
216 | | |
217 | 0 | size_t PosixRandomAccessFile::memory_footprint() const { |
218 | 0 | return malloc_usable_size(this) + filename_.capacity(); |
219 | 0 | } |
220 | | |
221 | | #ifdef __linux__ |
222 | | size_t PosixRandomAccessFile::GetUniqueId(char* id) const { |
223 | | return GetUniqueIdFromFile(fd_, pointer_cast<uint8_t*>(id)); |
224 | | } |
225 | | #endif |
226 | | |
227 | 205k | void PosixRandomAccessFile::Hint(AccessPattern pattern) { |
228 | 205k | switch (pattern) { |
229 | 79.7k | case NORMAL: |
230 | 79.7k | Fadvise(fd_, 0, 0, POSIX_FADV_NORMAL); |
231 | 79.7k | break; |
232 | 125k | case RANDOM: |
233 | 125k | Fadvise(fd_, 0, 0, POSIX_FADV_RANDOM); |
234 | 125k | break; |
235 | 0 | case SEQUENTIAL: |
236 | 0 | Fadvise(fd_, 0, 0, POSIX_FADV_SEQUENTIAL); |
237 | 0 | break; |
238 | 0 | case WILLNEED: |
239 | 0 | Fadvise(fd_, 0, 0, POSIX_FADV_WILLNEED); |
240 | 0 | break; |
241 | 0 | case DONTNEED: |
242 | 0 | Fadvise(fd_, 0, 0, POSIX_FADV_DONTNEED); |
243 | 0 | break; |
244 | 0 | default: |
245 | 0 | assert(false); |
246 | 0 | break; |
247 | 205k | } |
248 | 205k | } |
249 | | |
250 | 0 | Status PosixRandomAccessFile::InvalidateCache(size_t offset, size_t length) { |
251 | 0 | #ifndef __linux__ |
252 | 0 | return Status::OK(); |
253 | | #else |
254 | | // free OS pages |
255 | | int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); |
256 | | if (ret == 0) { |
257 | | return Status::OK(); |
258 | | } |
259 | | return STATUS_IO_ERROR(filename_, errno); |
260 | | #endif |
261 | 0 | } |
262 | | |
263 | | } // namespace yb |