/Users/deen/code/yugabyte-db/src/yb/util/decimal.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #ifndef YB_UTIL_DECIMAL_H |
15 | | #define YB_UTIL_DECIMAL_H |
16 | | |
17 | | #include <vector> |
18 | | #include <limits> |
19 | | |
20 | | #include "yb/util/slice.h" |
21 | | #include "yb/util/varint.h" |
22 | | |
23 | | namespace yb { |
24 | | namespace util { |
25 | | |
26 | | // The Decimal class can represent decimal numbers (including fractions) of arbitrary length. |
27 | | // |
28 | | // The API |
29 | | // ------- |
30 | | // Typically Decimals should be used for parsing from String or Double, convert to String or Double, |
31 | | // Serialize or Deserialize. The default constructor (With digit arrays, exponent, sign need not |
32 | | // be used). It is not necessary to keep a decimal object in memory for long time. The encoded |
33 | | // string should serve the same purpose, and it is easy to get the decimal from serialized string. |
34 | | // |
35 | | // The Serialization format specifications |
36 | | // ------- |
37 | | // Both serialization formats are unique from a given decimal. |
38 | | // 1) Comparable Serialization: |
39 | | // This is used by our storage layer. The lexicographical byte comparison of this encoding is the |
40 | | // same as numerical comparison of numbers. Also it is possible to find the end of the encoding |
41 | | // by looking at it. So Decode(slice) gives back the length of the encoding. |
42 | | // |
43 | | // 2) BigDecimal Serialization: |
44 | | // This gives a serialization to Cassandra's way to serializing Java BigDecimal. The scale |
45 | | // component is coded with 4 byte two's complement representation. Then it is followed by a byte |
46 | | // array for the corresponding BigInt's serialization. |
47 | | // See: |
48 | | // https://github.com/apache/cassandra/blob/trunk/doc/native_protocol_v4.spec |
49 | | // https://github.com/apache/cassandra/blob/81f6c784ce967fadb6ed7f58de1328e713eaf53c/ |
50 | | // src/java/org/apache/cassandra/serializers/DecimalSerializer.java |
51 | | // Note that the byte buffer for Java BigInt doesn't have a size prefix or something similar. So |
52 | | // the length is not known. The Decode(slice) function expects the whole slice to encode the |
53 | | // decimal. |
54 | | // |
55 | | // The internal representation in the Decimal class |
56 | | // ------- |
57 | | // A decimal contains a sign bit (bool is_positive_), the exponent part (VarInt exponent_), and |
58 | | // a digit (0-9) array (vector<int8_t> digits_). The corresponding number is |
59 | | // +- 10^exp// 0.d1d2...dk . |
60 | | // A representation is called canonical if and only if d1 != 0 and dk != 0. (If number is zero, |
61 | | // then sign must be positive, and digit array empty). Note that every number can have only one |
62 | | // canonical representation. |
63 | | // Examples: 23.4 = ( is_positive_ = true, exponent_ = 2, digits_ = {2, 3, 4} ) |
64 | | // -0.0004372 is ( is_positive = false, exponent = -3, digits_ = {4, 3, 7, 2} ) |
65 | | // 2378000 is ( is_positive = true, exponent = 7, digits_ = {2, 3, 7, 8} ). |
66 | | // We ensure the state is always canonical, enforced by the make_canonical() function, after |
67 | | // converting from String, double, constructor, or decode() the resulting representation must be |
68 | | // canonical. |
69 | | // |
70 | | // Converting to string |
71 | | // ------- |
72 | | // There are two ways to convert to string, |
73 | | // - PointString Format (-0.0004372 or 2378000) |
74 | | // - Scientific Notation (-4.372e-4 or 2.378e+6) |
75 | | // We have implemented both. Note that the pointstring format is infeasible if the exponent is too |
76 | | // large or too small, so scientific notation is used. |
77 | | // - The default ToString() function uses PointString format if the output has 10 bytes or less |
78 | | // and Scientific notation otherwise (this is a constant defined as kDefaultMaxLength). |
79 | | |
80 | | class Decimal { |
81 | | public: |
82 | | static constexpr int kDefaultMaxLength = 20; // Enough for MIN_BIGINT=-9223372036854775808. |
83 | | static constexpr int kUnlimitedMaxLength = std::numeric_limits<int>::max(); |
84 | | |
85 | 232k | Decimal() {} |
86 | | Decimal(const std::vector<uint8_t>& digits, |
87 | | const VarInt& exponent = VarInt(0), |
88 | | bool is_positive = true) |
89 | 307 | : digits_(digits), exponent_(exponent), is_positive_(is_positive) { make_canonical(); } |
90 | 0 | Decimal(const Decimal& other) : Decimal(other.digits_, other.exponent_, other.is_positive_) {} |
91 | 115 | Decimal& operator=(const Decimal& other) { |
92 | 115 | digits_ = other.digits_; |
93 | 115 | exponent_ = other.exponent_; |
94 | 115 | is_positive_ = other.is_positive_; |
95 | 115 | make_canonical(); |
96 | 115 | return *this; |
97 | 115 | } |
98 | | |
99 | | // Ensure the type conversion is possible if you use these constructors. Use FromX() otherwise. |
100 | | explicit Decimal(const std::string& string_val); |
101 | | explicit Decimal(double double_val); |
102 | | explicit Decimal(const VarInt& varint_val); |
103 | | |
104 | | void clear(); |
105 | | |
106 | | std::string ToDebugString() const; |
107 | | CHECKED_STATUS ToPointString(std::string* string_val, int max_length = kDefaultMaxLength) const; |
108 | | std::string ToScientificString() const; |
109 | | std::string ToString() const; |
110 | | // Note: We are using decimal -> string -> double using std::stod() function. |
111 | | // In future, it may be better to write a direct conversion function. |
112 | | Result<long double> ToDouble() const; |
113 | | |
114 | | Result<VarInt> ToVarInt() const; |
115 | | |
116 | | // The FromX() functions always create a canonical Decimal, |
117 | | // but the (digits, varint, sign) constructor doesn't. |
118 | | |
119 | | // The input is expected to be of the form [+-]?[0-9]*('.'[0-9]*)?([eE][+-]?[0-9]+)?, |
120 | | // whitespace is not allowed. Use this after removing whitespace. |
121 | | CHECKED_STATUS FromString(const Slice &slice); |
122 | | |
123 | | // Note: We are using double -> string -> decimal using std::to_string() function. |
124 | | // In future, it may be better to write a direct conversion function. |
125 | | CHECKED_STATUS FromDouble(double double_val); |
126 | | CHECKED_STATUS FromVarInt(const VarInt& varint_val); |
127 | | |
128 | | // Checks if this is a whole number. Assumes canonical. |
129 | | bool is_integer() const; |
130 | | |
131 | | // <0, =0, >0 if this <,=,> other numerically. Assumes canonical. |
132 | | int CompareTo(const Decimal& other) const; |
133 | | |
134 | | bool operator==(const Decimal& other) const { return CompareTo(other) == 0; } |
135 | 0 | bool operator!=(const Decimal& other) const { return CompareTo(other) != 0; } |
136 | 0 | bool operator<(const Decimal& other) const { return CompareTo(other) < 0; } |
137 | 0 | bool operator<=(const Decimal& other) const { return CompareTo(other) <= 0; } |
138 | | bool operator>(const Decimal& other) const { return CompareTo(other) > 0; } |
139 | 0 | bool operator>=(const Decimal& other) const { return CompareTo(other) >= 0; } |
140 | 0 | Decimal operator-() const { return Decimal(digits_, exponent_, !is_positive_); } |
141 | 0 | Decimal operator+() const { return Decimal(digits_, exponent_, is_positive_); } |
142 | | Decimal operator+(const Decimal& other) const; |
143 | | |
144 | | // Encodes the decimal by using comparable encoding, as described above. |
145 | | std::string EncodeToComparable() const; |
146 | | |
147 | | // Decodes a Decimal from a given Slice. Sets num_decoded_bytes = number of bytes decoded. |
148 | | CHECKED_STATUS DecodeFromComparable(const Slice& slice, size_t *num_decoded_bytes); |
149 | | |
150 | | CHECKED_STATUS DecodeFromComparable(const Slice& string); |
151 | | |
152 | | // Encode the decimal by using to Cassandra serialization format, as described above. |
153 | | std::string EncodeToSerializedBigDecimal(bool* is_out_of_range) const; |
154 | | |
155 | | CHECKED_STATUS DecodeFromSerializedBigDecimal(Slice slice); |
156 | | |
157 | 1.36k | const Decimal& Negate() { is_positive_ = !is_positive_; return *this; } |
158 | | |
159 | | private: |
160 | | friend class DecimalTest; |
161 | | |
162 | | // Checks the representation by components, For testing purposes. For Decimal, == is the same as |
163 | | // IsIdenticalTo, because we guarantee canonical-ness at all times, but the checking method is |
164 | | // different. |
165 | | bool IsIdenticalTo(const Decimal &other) const; |
166 | | |
167 | | bool is_canonical() const; |
168 | | void make_canonical(); |
169 | | |
170 | | std::vector<uint8_t> digits_; |
171 | | VarInt exponent_; |
172 | | bool is_positive_ = false; |
173 | | }; |
174 | | |
175 | | Decimal DecimalFromComparable(const Slice& slice); |
176 | | Decimal DecimalFromComparable(const std::string& string); |
177 | | |
178 | | std::ostream& operator<<(std::ostream& os, const Decimal& d); |
179 | | |
180 | | template <typename T> |
181 | 1.92M | inline T BitMask(int32_t a, int32_t b) { |
182 | 1.92M | T r = 0l; |
183 | 20.7M | for (int i = a; i < b; i++18.8M ) { |
184 | 18.8M | r |= (1l << i); |
185 | 18.8M | } |
186 | 1.92M | return r; |
187 | 1.92M | } int yb::util::BitMask<int>(int, int) Line | Count | Source | 181 | 778k | inline T BitMask(int32_t a, int32_t b) { | 182 | 778k | T r = 0l; | 183 | 7.01M | for (int i = a; i < b; i++6.23M ) { | 184 | 6.23M | r |= (1l << i); | 185 | 6.23M | } | 186 | 778k | return r; | 187 | 778k | } |
long long yb::util::BitMask<long long>(int, int) Line | Count | Source | 181 | 1.14M | inline T BitMask(int32_t a, int32_t b) { | 182 | 1.14M | T r = 0l; | 183 | 13.7M | for (int i = a; i < b; i++12.5M ) { | 184 | 12.5M | r |= (1l << i); | 185 | 12.5M | } | 186 | 1.14M | return r; | 187 | 1.14M | } |
|
188 | | |
189 | 97 | inline int GetFloatFraction(float f) { |
190 | 97 | return BitMask<int32_t>(0, 23) & *(reinterpret_cast<int32_t *>(&f)); |
191 | 97 | } |
192 | | |
193 | 778k | inline int GetFloatExp(float f) { |
194 | 778k | return (BitMask<int32_t>(23, 31) & (*(reinterpret_cast<int32_t *>(&f)))) >> 23; |
195 | 778k | } |
196 | | |
197 | 97 | inline int64_t GetDoubleFraction(double d) { |
198 | 97 | return BitMask<int64_t>(0, 52) & *(reinterpret_cast<int64_t *>(&d)); |
199 | 97 | } |
200 | | |
201 | 1.14M | inline int64_t GetDoubleExp(double d) { |
202 | 1.14M | return (BitMask<int64_t>(52, 63) & (*(reinterpret_cast<int64_t *>(&d)))) >> 52; |
203 | 1.14M | } |
204 | | |
205 | 47 | inline float CreateFloat(int32_t sign, int32_t exponent, int32_t fraction) { |
206 | 47 | int32_t f = (sign << 31) | (exponent << 23) | fraction; |
207 | 47 | return *reinterpret_cast<float *>(&f); |
208 | 47 | } |
209 | | |
210 | 47 | inline double CreateDouble(int64_t sign, int64_t exp, int64_t fraction) { |
211 | 47 | int64_t d = (sign << 63) | (exp << 52) | fraction; |
212 | 47 | return *reinterpret_cast<double *>(&d); |
213 | 47 | } |
214 | | |
215 | 778k | inline bool IsNanFloat(float f) { |
216 | 778k | return (GetFloatExp(f) == 0b11111111) && GetFloatFraction(f)97 ; |
217 | 778k | } |
218 | | |
219 | 1.14M | inline bool IsNanDouble(double d) { |
220 | 1.14M | return (GetDoubleExp(d) == 0b11111111111) && GetDoubleFraction(d)97 ; |
221 | 1.14M | } |
222 | | |
223 | 777k | inline float CanonicalizeFloat(float f) { |
224 | 777k | if (IsNanFloat(f)) { |
225 | 43 | return CreateFloat(0, 0b11111111, (1 << 22)); |
226 | 43 | } |
227 | 777k | return f; |
228 | 777k | } |
229 | | |
230 | 463k | inline double CanonicalizeDouble(double d) { |
231 | 463k | if (IsNanDouble(d)) { |
232 | 43 | return CreateDouble(0, 0b11111111111, (1l << 51)); |
233 | 43 | } |
234 | 462k | return d; |
235 | 463k | } |
236 | | |
237 | | } // namespace util |
238 | | } // namespace yb |
239 | | |
240 | | #endif // YB_UTIL_DECIMAL_H |