/Users/deen/code/yugabyte-db/src/yb/rocksdb/util/compression.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | // |
24 | | #pragma once |
25 | | |
26 | | #include <algorithm> |
27 | | #include <limits> |
28 | | #include <string> |
29 | | |
30 | | #include "yb/rocksdb/options.h" |
31 | | #include "yb/rocksdb/util/coding.h" |
32 | | |
33 | | #ifdef SNAPPY |
34 | | #include <snappy.h> |
35 | | #endif |
36 | | |
37 | | #ifdef ZLIB |
38 | | #include <zlib.h> |
39 | | #endif |
40 | | |
41 | | #ifdef BZIP2 |
42 | | #include <bzlib.h> |
43 | | #endif |
44 | | |
45 | | #if defined(LZ4) |
46 | | #include <lz4.h> |
47 | | #include <lz4hc.h> |
48 | | #endif |
49 | | |
50 | | #if defined(ZSTD) |
51 | | #include <zstd.h> |
52 | | #endif |
53 | | |
54 | | namespace rocksdb { |
55 | | |
56 | 4.60M | inline bool Snappy_Supported() { |
57 | 4.60M | #ifdef SNAPPY |
58 | 4.60M | return true; |
59 | 0 | #endif |
60 | 0 | return false; |
61 | 4.60M | } |
62 | | |
63 | 26 | inline bool Zlib_Supported() { |
64 | 26 | #ifdef ZLIB |
65 | 26 | return true; |
66 | 0 | #endif |
67 | 0 | return false; |
68 | 26 | } |
69 | | |
70 | 9 | inline bool BZip2_Supported() { |
71 | | #ifdef BZIP2 |
72 | | return true; |
73 | | #endif |
74 | 9 | return false; |
75 | 9 | } |
76 | | |
77 | 29 | inline bool LZ4_Supported() { |
78 | 29 | #ifdef LZ4 |
79 | 29 | return true; |
80 | 0 | #endif |
81 | 0 | return false; |
82 | 29 | } |
83 | | |
84 | 5 | inline bool ZSTD_Supported() { |
85 | | #ifdef ZSTD |
86 | | return true; |
87 | | #endif |
88 | 5 | return false; |
89 | 5 | } |
90 | | |
91 | 984k | inline bool CompressionTypeSupported(CompressionType compression_type) { |
92 | 984k | switch (compression_type) { |
93 | 1.04k | case kNoCompression: |
94 | 1.04k | return true; |
95 | 983k | case kSnappyCompression: |
96 | 983k | return Snappy_Supported(); |
97 | 15 | case kZlibCompression: |
98 | 15 | return Zlib_Supported(); |
99 | 4 | case kBZip2Compression: |
100 | 4 | return BZip2_Supported(); |
101 | 15 | case kLZ4Compression: |
102 | 15 | return LZ4_Supported(); |
103 | 6 | case kLZ4HCCompression: |
104 | 6 | return LZ4_Supported(); |
105 | 0 | case kZSTDNotFinalCompression: |
106 | 0 | return ZSTD_Supported(); |
107 | 0 | default: |
108 | 0 | assert(false); |
109 | 0 | return false; |
110 | 984k | } |
111 | 984k | } |
112 | | |
113 | 1.10M | inline std::string CompressionTypeToString(CompressionType compression_type) { |
114 | 1.10M | switch (compression_type) { |
115 | 545k | case kNoCompression: |
116 | 545k | return "NoCompression"; |
117 | 561k | case kSnappyCompression: |
118 | 561k | return "Snappy"; |
119 | 13 | case kZlibCompression: |
120 | 13 | return "Zlib"; |
121 | 2 | case kBZip2Compression: |
122 | 2 | return "BZip2"; |
123 | 13 | case kLZ4Compression: |
124 | 13 | return "LZ4"; |
125 | 4 | case kLZ4HCCompression: |
126 | 4 | return "LZ4HC"; |
127 | 0 | case kZSTDNotFinalCompression: |
128 | 0 | return "ZSTD"; |
129 | 0 | default: |
130 | 0 | assert(false); |
131 | 0 | return ""; |
132 | 1.10M | } |
133 | 1.10M | } |
134 | | |
135 | | // compress_format_version can have two values: |
136 | | // 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed |
137 | | // block. Also, decompressed sizes for LZ4 are encoded in platform-dependent |
138 | | // way. |
139 | | // 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the |
140 | | // start of compressed block. Snappy format is the same as version 1. |
141 | | |
142 | | inline bool Snappy_Compress(const CompressionOptions& opts, const char* input, |
143 | 1.98M | size_t length, ::std::string* output) { |
144 | 1.98M | #ifdef SNAPPY |
145 | 1.98M | output->resize(snappy::MaxCompressedLength(length)); |
146 | 1.98M | size_t outlen; |
147 | 1.98M | snappy::RawCompress(input, length, &(*output)[0], &outlen); |
148 | 1.98M | output->resize(outlen); |
149 | 1.98M | return true; |
150 | 0 | #endif |
151 | | |
152 | 0 | return false; |
153 | 1.98M | } |
154 | | |
155 | | inline bool Snappy_GetUncompressedLength(const char* input, size_t length, |
156 | 1.77M | size_t* result) { |
157 | 1.77M | #ifdef SNAPPY |
158 | 1.77M | return snappy::GetUncompressedLength(input, length, result); |
159 | | #else |
160 | | return false; |
161 | | #endif |
162 | 1.77M | } |
163 | | |
164 | | inline bool Snappy_Uncompress(const char* input, size_t length, |
165 | 1.77M | char* output) { |
166 | 1.77M | #ifdef SNAPPY |
167 | 1.77M | return snappy::RawUncompress(input, length, output); |
168 | | #else |
169 | | return false; |
170 | | #endif |
171 | 1.77M | } |
172 | | |
173 | | namespace compression { |
174 | | // returns size |
175 | 29.3k | inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) { |
176 | 29.3k | PutVarint32(output, length); |
177 | 29.3k | return output->size(); |
178 | 29.3k | } |
179 | | |
180 | | inline bool GetDecompressedSizeInfo(const char** input_data, |
181 | | size_t* input_length, |
182 | 22.5k | uint32_t* output_len) { |
183 | 22.5k | auto new_input_data = |
184 | 22.5k | GetVarint32Ptr(*input_data, *input_data + *input_length, output_len); |
185 | 22.5k | if (new_input_data == nullptr) { |
186 | 0 | return false; |
187 | 0 | } |
188 | 22.5k | *input_length -= (new_input_data - *input_data); |
189 | 22.5k | *input_data = new_input_data; |
190 | 22.5k | return true; |
191 | 22.5k | } |
192 | | } // namespace compression |
193 | | |
194 | | // compress_format_version == 1 -- decompressed size is not included in the |
195 | | // block header |
196 | | // compress_format_version == 2 -- decompressed size is included in the block |
197 | | // header in varint32 format |
198 | | inline bool Zlib_Compress(const CompressionOptions& opts, |
199 | | uint32_t compress_format_version, |
200 | | const char* input, size_t length, |
201 | 18.7k | ::std::string* output) { |
202 | 18.7k | #ifdef ZLIB |
203 | 18.7k | if (length > std::numeric_limits<uint32_t>::max()) { |
204 | | // Can't compress more than 4GB |
205 | 0 | return false; |
206 | 0 | } |
207 | | |
208 | 18.7k | size_t output_header_len = 0; |
209 | 18.7k | if (compress_format_version == 2) { |
210 | 9.85k | output_header_len = compression::PutDecompressedSizeInfo( |
211 | 9.85k | output, static_cast<uint32_t>(length)); |
212 | 9.85k | } |
213 | | // Resize output to be the plain data length. |
214 | | // This may not be big enough if the compression actually expands data. |
215 | 18.7k | output->resize(output_header_len + length); |
216 | | |
217 | | // The memLevel parameter specifies how much memory should be allocated for |
218 | | // the internal compression state. |
219 | | // memLevel=1 uses minimum memory but is slow and reduces compression ratio. |
220 | | // memLevel=9 uses maximum memory for optimal speed. |
221 | | // The default value is 8. See zconf.h for more details. |
222 | 18.7k | static const int memLevel = 8; |
223 | 18.7k | z_stream _stream; |
224 | 18.7k | memset(&_stream, 0, sizeof(z_stream)); |
225 | 18.7k | int st = deflateInit2(&_stream, opts.level, Z_DEFLATED, opts.window_bits, |
226 | 18.7k | memLevel, opts.strategy); |
227 | 18.7k | if (st != Z_OK) { |
228 | 0 | return false; |
229 | 0 | } |
230 | | |
231 | | // Compress the input, and put compressed data in output. |
232 | 18.7k | _stream.next_in = (Bytef *)input; |
233 | 18.7k | _stream.avail_in = static_cast<unsigned int>(length); |
234 | | |
235 | | // Initialize the output size. |
236 | 18.7k | _stream.avail_out = static_cast<unsigned int>(length); |
237 | 18.7k | _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]); |
238 | | |
239 | 18.7k | bool done = false; |
240 | 37.4k | while (!done) { |
241 | 18.7k | st = deflate(&_stream, Z_FINISH); |
242 | 18.7k | switch (st) { |
243 | 18.6k | case Z_STREAM_END: |
244 | 18.6k | done = true; |
245 | 18.6k | break; |
246 | 68 | case Z_OK: |
247 | | // No output space. This means the compression is bigger than |
248 | | // decompressed size. Just fail the compression in that case. |
249 | | // Intentional fallback (to failure case) |
250 | 68 | case Z_BUF_ERROR: |
251 | 68 | default: |
252 | 68 | deflateEnd(&_stream); |
253 | 68 | return false; |
254 | 18.7k | } |
255 | 18.7k | } |
256 | | |
257 | 18.6k | output->resize(output->size() - _stream.avail_out + output_header_len); |
258 | 18.6k | deflateEnd(&_stream); |
259 | 18.6k | return true; |
260 | 0 | #endif |
261 | 0 | return false; |
262 | 18.7k | } |
263 | | |
264 | | // compress_format_version == 1 -- decompressed size is not included in the |
265 | | // block header |
266 | | // compress_format_version == 2 -- decompressed size is included in the block |
267 | | // header in varint32 format |
268 | | inline char* Zlib_Uncompress(const char* input_data, size_t input_length, |
269 | | int* decompress_size, |
270 | | uint32_t compress_format_version, |
271 | 16.7k | int windowBits = -14) { |
272 | 16.7k | #ifdef ZLIB |
273 | 16.7k | uint32_t output_len = 0; |
274 | 16.7k | if (compress_format_version == 2) { |
275 | 9.14k | if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, |
276 | 9.14k | &output_len)) { |
277 | 0 | return nullptr; |
278 | 0 | } |
279 | 9.14k | } else { |
280 | | // Assume the decompressed data size will 5x of compressed size, but round |
281 | | // to the page size |
282 | 7.64k | size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096; |
283 | 7.64k | output_len = static_cast<uint32_t>( |
284 | 7.64k | std::min(proposed_output_len, |
285 | 7.64k | static_cast<size_t>(std::numeric_limits<uint32_t>::max()))); |
286 | 7.64k | } |
287 | | |
288 | 16.7k | z_stream _stream; |
289 | 16.7k | memset(&_stream, 0, sizeof(z_stream)); |
290 | | |
291 | | // For raw inflate, the windowBits should be -8..-15. |
292 | | // If windowBits is bigger than zero, it will use either zlib |
293 | | // header or gzip header. Adding 32 to it will do automatic detection. |
294 | 16.7k | int st = inflateInit2(&_stream, |
295 | 16.7k | windowBits > 0 ? windowBits + 32 : windowBits); |
296 | 16.7k | if (st != Z_OK) { |
297 | 0 | return nullptr; |
298 | 0 | } |
299 | | |
300 | 16.7k | _stream.next_in = (Bytef *)input_data; |
301 | 16.7k | _stream.avail_in = static_cast<unsigned int>(input_length); |
302 | | |
303 | 16.7k | char* output = new char[output_len]; |
304 | | |
305 | 16.7k | _stream.next_out = (Bytef *)output; |
306 | 16.7k | _stream.avail_out = static_cast<unsigned int>(output_len); |
307 | | |
308 | 16.7k | bool done = false; |
309 | 33.5k | while (!done) { |
310 | 16.7k | st = inflate(&_stream, Z_SYNC_FLUSH); |
311 | 16.7k | switch (st) { |
312 | 16.7k | case Z_STREAM_END: |
313 | 16.7k | done = true; |
314 | 16.7k | break; |
315 | 0 | case Z_OK: { |
316 | | // No output space. Increase the output space by 20%. |
317 | | // We should never run out of output space if |
318 | | // compress_format_version == 2 |
319 | 0 | assert(compress_format_version != 2); |
320 | 0 | size_t old_sz = output_len; |
321 | 0 | uint32_t output_len_delta = output_len/5; |
322 | 0 | output_len += output_len_delta < 10 ? 10 : output_len_delta; |
323 | 0 | char* tmp = new char[output_len]; |
324 | 0 | memcpy(tmp, output, old_sz); |
325 | 0 | delete[] output; |
326 | 0 | output = tmp; |
327 | | |
328 | | // Set more output. |
329 | 0 | _stream.next_out = (Bytef *)(output + old_sz); |
330 | 0 | _stream.avail_out = static_cast<unsigned int>(output_len - old_sz); |
331 | 0 | break; |
332 | 0 | } |
333 | 0 | case Z_BUF_ERROR: |
334 | 0 | default: |
335 | 0 | delete[] output; |
336 | 0 | inflateEnd(&_stream); |
337 | 0 | return nullptr; |
338 | 16.7k | } |
339 | 16.7k | } |
340 | | |
341 | | // If we encoded decompressed block size, we should have no bytes left |
342 | 16.7k | assert(compress_format_version != 2 || _stream.avail_out == 0); |
343 | 0 | *decompress_size = static_cast<int>(output_len - _stream.avail_out); |
344 | 16.7k | inflateEnd(&_stream); |
345 | 16.7k | return output; |
346 | 0 | #endif |
347 | | |
348 | 0 | return nullptr; |
349 | 16.7k | } |
350 | | |
351 | | // compress_format_version == 1 -- decompressed size is not included in the |
352 | | // block header |
353 | | // compress_format_version == 2 -- decompressed size is included in the block |
354 | | // header in varint32 format |
355 | | inline bool BZip2_Compress(const CompressionOptions& opts, |
356 | | uint32_t compress_format_version, |
357 | | const char* input, size_t length, |
358 | 3 | ::std::string* output) { |
359 | | #ifdef BZIP2 |
360 | | if (length > std::numeric_limits<uint32_t>::max()) { |
361 | | // Can't compress more than 4GB |
362 | | return false; |
363 | | } |
364 | | size_t output_header_len = 0; |
365 | | if (compress_format_version == 2) { |
366 | | output_header_len = compression::PutDecompressedSizeInfo( |
367 | | output, static_cast<uint32_t>(length)); |
368 | | } |
369 | | // Resize output to be the plain data length. |
370 | | // This may not be big enough if the compression actually expands data. |
371 | | output->resize(output_header_len + length); |
372 | | |
373 | | |
374 | | bz_stream _stream; |
375 | | memset(&_stream, 0, sizeof(bz_stream)); |
376 | | |
377 | | // Block size 1 is 100K. |
378 | | // 0 is for silent. |
379 | | // 30 is the default workFactor |
380 | | int st = BZ2_bzCompressInit(&_stream, 1, 0, 30); |
381 | | if (st != BZ_OK) { |
382 | | return false; |
383 | | } |
384 | | |
385 | | // Compress the input, and put compressed data in output. |
386 | | _stream.next_in = (char *)input; |
387 | | _stream.avail_in = static_cast<unsigned int>(length); |
388 | | |
389 | | // Initialize the output size. |
390 | | _stream.avail_out = static_cast<unsigned int>(length); |
391 | | _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]); |
392 | | |
393 | | while (_stream.next_in != nullptr && _stream.avail_in != 0) { |
394 | | st = BZ2_bzCompress(&_stream, BZ_FINISH); |
395 | | switch (st) { |
396 | | case BZ_STREAM_END: |
397 | | break; |
398 | | case BZ_FINISH_OK: |
399 | | // No output space. This means the compression is bigger than |
400 | | // decompressed size. Just fail the compression in that case |
401 | | // Intentional fallback (to failure case) |
402 | | case BZ_SEQUENCE_ERROR: |
403 | | default: |
404 | | BZ2_bzCompressEnd(&_stream); |
405 | | return false; |
406 | | } |
407 | | } |
408 | | |
409 | | output->resize(output->size() - _stream.avail_out + output_header_len); |
410 | | BZ2_bzCompressEnd(&_stream); |
411 | | return true; |
412 | | #endif |
413 | 3 | return false; |
414 | 3 | } |
415 | | |
416 | | // compress_format_version == 1 -- decompressed size is not included in the |
417 | | // block header |
418 | | // compress_format_version == 2 -- decompressed size is included in the block |
419 | | // header in varint32 format |
420 | | inline char* BZip2_Uncompress(const char* input_data, size_t input_length, |
421 | | int* decompress_size, |
422 | 0 | uint32_t compress_format_version) { |
423 | 0 | #ifdef BZIP2 |
424 | 0 | uint32_t output_len = 0; |
425 | 0 | if (compress_format_version == 2) { |
426 | 0 | if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, |
427 | 0 | &output_len)) { |
428 | 0 | return nullptr; |
429 | 0 | } |
430 | 0 | } else { |
431 | 0 | // Assume the decompressed data size will 5x of compressed size, but round |
432 | 0 | // to the next page size |
433 | 0 | size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096; |
434 | 0 | output_len = static_cast<uint32_t>( |
435 | 0 | std::min(proposed_output_len, |
436 | 0 | static_cast<size_t>(std::numeric_limits<uint32_t>::max()))); |
437 | 0 | } |
438 | 0 |
|
439 | 0 | bz_stream _stream; |
440 | 0 | memset(&_stream, 0, sizeof(bz_stream)); |
441 | 0 |
|
442 | 0 | int st = BZ2_bzDecompressInit(&_stream, 0, 0); |
443 | 0 | if (st != BZ_OK) { |
444 | 0 | return nullptr; |
445 | 0 | } |
446 | 0 |
|
447 | 0 | _stream.next_in = (char *)input_data; |
448 | 0 | _stream.avail_in = static_cast<unsigned int>(input_length); |
449 | 0 |
|
450 | 0 | char* output = new char[output_len]; |
451 | 0 |
|
452 | 0 | _stream.next_out = (char *)output; |
453 | 0 | _stream.avail_out = static_cast<unsigned int>(output_len); |
454 | 0 |
|
455 | 0 | bool done = false; |
456 | 0 | while (!done) { |
457 | 0 | st = BZ2_bzDecompress(&_stream); |
458 | 0 | switch (st) { |
459 | 0 | case BZ_STREAM_END: |
460 | 0 | done = true; |
461 | 0 | break; |
462 | 0 | case BZ_OK: { |
463 | 0 | // No output space. Increase the output space by 20%. |
464 | 0 | // We should never run out of output space if |
465 | 0 | // compress_format_version == 2 |
466 | 0 | assert(compress_format_version != 2); |
467 | 0 | uint32_t old_sz = output_len; |
468 | 0 | output_len = output_len * 1.2; |
469 | 0 | char* tmp = new char[output_len]; |
470 | 0 | memcpy(tmp, output, old_sz); |
471 | 0 | delete[] output; |
472 | 0 | output = tmp; |
473 | 0 |
|
474 | 0 | // Set more output. |
475 | 0 | _stream.next_out = (char *)(output + old_sz); |
476 | 0 | _stream.avail_out = static_cast<unsigned int>(output_len - old_sz); |
477 | 0 | break; |
478 | 0 | } |
479 | 0 | default: |
480 | 0 | delete[] output; |
481 | 0 | BZ2_bzDecompressEnd(&_stream); |
482 | 0 | return nullptr; |
483 | 0 | } |
484 | 0 | } |
485 | 0 |
|
486 | 0 | // If we encoded decompressed block size, we should have no bytes left |
487 | 0 | assert(compress_format_version != 2 || _stream.avail_out == 0); |
488 | 0 | *decompress_size = static_cast<int>(output_len - _stream.avail_out); |
489 | 0 | BZ2_bzDecompressEnd(&_stream); |
490 | 0 | return output; |
491 | 0 | #endif |
492 | 0 | return nullptr; |
493 | 0 | } |
494 | | |
495 | | // compress_format_version == 1 -- decompressed size is included in the |
496 | | // block header using memcpy, which makes database non-portable) |
497 | | // compress_format_version == 2 -- decompressed size is included in the block |
498 | | // header in varint32 format |
499 | | inline bool LZ4_Compress(const CompressionOptions& opts, |
500 | | uint32_t compress_format_version, const char* input, |
501 | 19.5k | size_t length, ::std::string* output) { |
502 | 19.5k | #ifdef LZ4 |
503 | 19.5k | if (length > std::numeric_limits<uint32_t>::max()) { |
504 | | // Can't compress more than 4GB |
505 | 0 | return false; |
506 | 0 | } |
507 | | |
508 | 19.5k | size_t output_header_len = 0; |
509 | 19.5k | if (compress_format_version == 2) { |
510 | | // new encoding, using varint32 to store size information |
511 | 10.6k | output_header_len = compression::PutDecompressedSizeInfo( |
512 | 10.6k | output, static_cast<uint32_t>(length)); |
513 | 10.6k | } else { |
514 | | // legacy encoding, which is not really portable (depends on big/little |
515 | | // endianness) |
516 | 8.90k | output_header_len = 8; |
517 | 8.90k | output->resize(output_header_len); |
518 | 8.90k | char* p = const_cast<char*>(output->c_str()); |
519 | 8.90k | memcpy(p, &length, sizeof(length)); |
520 | 8.90k | } |
521 | | |
522 | 19.5k | int compressBound = LZ4_compressBound(static_cast<int>(length)); |
523 | 19.5k | output->resize(static_cast<size_t>(output_header_len + compressBound)); |
524 | 19.5k | int outlen = |
525 | 19.5k | LZ4_compress_limitedOutput(input, &(*output)[output_header_len], |
526 | 19.5k | static_cast<int>(length), compressBound); |
527 | 19.5k | if (outlen == 0) { |
528 | 0 | return false; |
529 | 0 | } |
530 | 19.5k | output->resize(static_cast<size_t>(output_header_len + outlen)); |
531 | 19.5k | return true; |
532 | 0 | #endif |
533 | 0 | return false; |
534 | 19.5k | } |
535 | | |
536 | | // compress_format_version == 1 -- decompressed size is included in the |
537 | | // block header using memcpy, which makes database non-portable) |
538 | | // compress_format_version == 2 -- decompressed size is included in the block |
539 | | // header in varint32 format |
540 | | inline char* LZ4_Uncompress(const char* input_data, size_t input_length, |
541 | | int* decompress_size, |
542 | 26.8k | uint32_t compress_format_version) { |
543 | 26.8k | #ifdef LZ4 |
544 | 26.8k | uint32_t output_len = 0; |
545 | 26.8k | if (compress_format_version == 2) { |
546 | | // new encoding, using varint32 to store size information |
547 | 13.4k | if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, |
548 | 13.4k | &output_len)) { |
549 | 0 | return nullptr; |
550 | 0 | } |
551 | 13.4k | } else { |
552 | | // legacy encoding, which is not really portable (depends on big/little |
553 | | // endianness) |
554 | 13.4k | if (input_length < 8) { |
555 | 0 | return nullptr; |
556 | 0 | } |
557 | 13.4k | memcpy(&output_len, input_data, sizeof(output_len)); |
558 | 13.4k | input_length -= 8; |
559 | 13.4k | input_data += 8; |
560 | 13.4k | } |
561 | 26.8k | char* output = new char[output_len]; |
562 | 26.8k | *decompress_size = |
563 | 26.8k | LZ4_decompress_safe(input_data, output, static_cast<int>(input_length), |
564 | 26.8k | static_cast<int>(output_len)); |
565 | 26.8k | if (*decompress_size < 0) { |
566 | 0 | delete[] output; |
567 | 0 | return nullptr; |
568 | 0 | } |
569 | 26.8k | assert(*decompress_size == static_cast<int>(output_len)); |
570 | 0 | return output; |
571 | 0 | #endif |
572 | 0 | return nullptr; |
573 | 26.8k | } |
574 | | |
575 | | // compress_format_version == 1 -- decompressed size is included in the |
576 | | // block header using memcpy, which makes database non-portable) |
577 | | // compress_format_version == 2 -- decompressed size is included in the block |
578 | | // header in varint32 format |
579 | | inline bool LZ4HC_Compress(const CompressionOptions& opts, |
580 | | uint32_t compress_format_version, const char* input, |
581 | 17.8k | size_t length, ::std::string* output) { |
582 | 17.8k | #ifdef LZ4 |
583 | 17.8k | if (length > std::numeric_limits<uint32_t>::max()) { |
584 | | // Can't compress more than 4GB |
585 | 0 | return false; |
586 | 0 | } |
587 | | |
588 | 17.8k | size_t output_header_len = 0; |
589 | 17.8k | if (compress_format_version == 2) { |
590 | | // new encoding, using varint32 to store size information |
591 | 8.91k | output_header_len = compression::PutDecompressedSizeInfo( |
592 | 8.91k | output, static_cast<uint32_t>(length)); |
593 | 8.91k | } else { |
594 | | // legacy encoding, which is not really portable (depends on big/little |
595 | | // endianness) |
596 | 8.90k | output_header_len = 8; |
597 | 8.90k | output->resize(output_header_len); |
598 | 8.90k | char* p = const_cast<char*>(output->c_str()); |
599 | 8.90k | memcpy(p, &length, sizeof(length)); |
600 | 8.90k | } |
601 | | |
602 | 17.8k | int compressBound = LZ4_compressBound(static_cast<int>(length)); |
603 | 17.8k | output->resize(static_cast<size_t>(output_header_len + compressBound)); |
604 | 17.8k | int outlen; |
605 | 17.8k | #ifdef LZ4_VERSION_MAJOR // they only started defining this since r113 |
606 | 17.8k | outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len], |
607 | 17.8k | static_cast<int>(length), |
608 | 17.8k | compressBound, opts.level); |
609 | | #else |
610 | | outlen = |
611 | | LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len], |
612 | | static_cast<int>(length), compressBound); |
613 | | #endif |
614 | 17.8k | if (outlen == 0) { |
615 | 0 | return false; |
616 | 0 | } |
617 | 17.8k | output->resize(static_cast<size_t>(output_header_len + outlen)); |
618 | 17.8k | return true; |
619 | 0 | #endif |
620 | 0 | return false; |
621 | 17.8k | } |
622 | | |
623 | | inline bool ZSTD_Compress(const CompressionOptions& opts, const char* input, |
624 | 3 | size_t length, ::std::string* output) { |
625 | | #ifdef ZSTD |
626 | | if (length > std::numeric_limits<uint32_t>::max()) { |
627 | | // Can't compress more than 4GB |
628 | | return false; |
629 | | } |
630 | | |
631 | | size_t output_header_len = compression::PutDecompressedSizeInfo( |
632 | | output, static_cast<uint32_t>(length)); |
633 | | |
634 | | size_t compressBound = ZSTD_compressBound(length); |
635 | | output->resize(static_cast<size_t>(output_header_len + compressBound)); |
636 | | size_t outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, |
637 | | input, length, opts.level); |
638 | | if (outlen == 0) { |
639 | | return false; |
640 | | } |
641 | | output->resize(output_header_len + outlen); |
642 | | return true; |
643 | | #endif |
644 | 3 | return false; |
645 | 3 | } |
646 | | |
647 | | inline char* ZSTD_Uncompress(const char* input_data, size_t input_length, |
648 | 0 | int* decompress_size) { |
649 | 0 | #ifdef ZSTD |
650 | 0 | uint32_t output_len = 0; |
651 | 0 | if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, |
652 | 0 | &output_len)) { |
653 | 0 | return nullptr; |
654 | 0 | } |
655 | 0 |
|
656 | 0 | char* output = new char[output_len]; |
657 | 0 | size_t actual_output_length = |
658 | 0 | ZSTD_decompress(output, output_len, input_data, input_length); |
659 | 0 | assert(actual_output_length == output_len); |
660 | 0 | *decompress_size = static_cast<int>(actual_output_length); |
661 | 0 | return output; |
662 | 0 | #endif |
663 | 0 | return nullptr; |
664 | 0 | } |
665 | | |
666 | | } // namespace rocksdb |