PostgreSQL Source Code git master
d2s.c
Go to the documentation of this file.
1/*---------------------------------------------------------------------------
2 *
3 * Ryu floating-point output for double precision.
4 *
5 * Portions Copyright (c) 2018-2025, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/common/d2s.c
9 *
10 * This is a modification of code taken from github.com/ulfjack/ryu under the
11 * terms of the Boost license (not the Apache license). The original copyright
12 * notice follows:
13 *
14 * Copyright 2018 Ulf Adams
15 *
16 * The contents of this file may be used under the terms of the Apache
17 * License, Version 2.0.
18 *
19 * (See accompanying file LICENSE-Apache or copy at
20 * http://www.apache.org/licenses/LICENSE-2.0)
21 *
22 * Alternatively, the contents of this file may be used under the terms of the
23 * Boost Software License, Version 1.0.
24 *
25 * (See accompanying file LICENSE-Boost or copy at
26 * https://www.boost.org/LICENSE_1_0.txt)
27 *
28 * Unless required by applicable law or agreed to in writing, this software is
29 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
30 * KIND, either express or implied.
31 *
32 *---------------------------------------------------------------------------
33 */
34
35/*
36 * Runtime compiler options:
37 *
38 * -DRYU_ONLY_64_BIT_OPS Avoid using uint128 or 64-bit intrinsics. Slower,
39 * depending on your compiler.
40 */
41
42#ifndef FRONTEND
43#include "postgres.h"
44#else
45#include "postgres_fe.h"
46#endif
47
48#include "common/shortest_dec.h"
49
50/*
51 * For consistency, we use 128-bit types if and only if the rest of PG also
52 * does, even though we could use them here without worrying about the
53 * alignment concerns that apply elsewhere.
54 */
55#if !defined(HAVE_INT128) && defined(_MSC_VER) \
56 && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)
57#define HAS_64_BIT_INTRINSICS
58#endif
59
60#include "ryu_common.h"
61#include "digit_table.h"
62#include "d2s_full_table.h"
63#include "d2s_intrinsics.h"
64
65#define DOUBLE_MANTISSA_BITS 52
66#define DOUBLE_EXPONENT_BITS 11
67#define DOUBLE_BIAS 1023
68
69#define DOUBLE_POW5_INV_BITCOUNT 122
70#define DOUBLE_POW5_BITCOUNT 121
71
72
73static inline uint32
75{
76 uint32 count = 0;
77
78 for (;;)
79 {
80 Assert(value != 0);
81 const uint64 q = div5(value);
82 const uint32 r = (uint32) (value - 5 * q);
83
84 if (r != 0)
85 break;
86
87 value = q;
88 ++count;
89 }
90 return count;
91}
92
93/* Returns true if value is divisible by 5^p. */
94static inline bool
96{
97 /*
98 * I tried a case distinction on p, but there was no performance
99 * difference.
100 */
101 return pow5Factor(value) >= p;
102}
103
104/* Returns true if value is divisible by 2^p. */
105static inline bool
107{
108 /* return __builtin_ctzll(value) >= p; */
109 return (value & ((UINT64CONST(1) << p) - 1)) == 0;
110}
111
112/*
113 * We need a 64x128-bit multiplication and a subsequent 128-bit shift.
114 *
115 * Multiplication:
116 *
117 * The 64-bit factor is variable and passed in, the 128-bit factor comes
118 * from a lookup table. We know that the 64-bit factor only has 55
119 * significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
120 * factor only has 124 significant bits (i.e., the 4 topmost bits are
121 * zeros).
122 *
123 * Shift:
124 *
125 * In principle, the multiplication result requires 55 + 124 = 179 bits to
126 * represent. However, we then shift this value to the right by j, which is
127 * at least j >= 115, so the result is guaranteed to fit into 179 - 115 =
128 * 64 bits. This means that we only need the topmost 64 significant bits of
129 * the 64x128-bit multiplication.
130 *
131 * There are several ways to do this:
132 *
133 * 1. Best case: the compiler exposes a 128-bit type.
134 * We perform two 64x64-bit multiplications, add the higher 64 bits of the
135 * lower result to the higher result, and shift by j - 64 bits.
136 *
137 * We explicitly cast from 64-bit to 128-bit, so the compiler can tell
138 * that these are only 64-bit inputs, and can map these to the best
139 * possible sequence of assembly instructions. x86-64 machines happen to
140 * have matching assembly instructions for 64x64-bit multiplications and
141 * 128-bit shifts.
142 *
143 * 2. Second best case: the compiler exposes intrinsics for the x86-64
144 * assembly instructions mentioned in 1.
145 *
146 * 3. We only have 64x64 bit instructions that return the lower 64 bits of
147 * the result, i.e., we have to use plain C.
148 *
149 * Our inputs are less than the full width, so we have three options:
150 * a. Ignore this fact and just implement the intrinsics manually.
151 * b. Split both into 31-bit pieces, which guarantees no internal
152 * overflow, but requires extra work upfront (unless we change the
153 * lookup table).
154 * c. Split only the first factor into 31-bit pieces, which also
155 * guarantees no internal overflow, but requires extra work since the
156 * intermediate results are not perfectly aligned.
157 */
158#if defined(HAVE_INT128)
159
160/* Best case: use 128-bit type. */
161static inline uint64
162mulShift(const uint64 m, const uint64 *const mul, const int32 j)
163{
164 const uint128 b0 = ((uint128) m) * mul[0];
165 const uint128 b2 = ((uint128) m) * mul[1];
166
167 return (uint64) (((b0 >> 64) + b2) >> (j - 64));
168}
169
170static inline uint64
171mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
172 uint64 *const vp, uint64 *const vm, const uint32 mmShift)
173{
174 *vp = mulShift(4 * m + 2, mul, j);
175 *vm = mulShift(4 * m - 1 - mmShift, mul, j);
176 return mulShift(4 * m, mul, j);
177}
178
179#elif defined(HAS_64_BIT_INTRINSICS)
180
181static inline uint64
182mulShift(const uint64 m, const uint64 *const mul, const int32 j)
183{
184 /* m is maximum 55 bits */
185 uint64 high1;
186
187 /* 128 */
188 const uint64 low1 = umul128(m, mul[1], &high1);
189
190 /* 64 */
191 uint64 high0;
192 uint64 sum;
193
194 /* 64 */
195 umul128(m, mul[0], &high0);
196 /* 0 */
197 sum = high0 + low1;
198
199 if (sum < high0)
200 {
201 ++high1;
202 /* overflow into high1 */
203 }
204 return shiftright128(sum, high1, j - 64);
205}
206
207static inline uint64
208mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
209 uint64 *const vp, uint64 *const vm, const uint32 mmShift)
210{
211 *vp = mulShift(4 * m + 2, mul, j);
212 *vm = mulShift(4 * m - 1 - mmShift, mul, j);
213 return mulShift(4 * m, mul, j);
214}
215
216#else /* // !defined(HAVE_INT128) &&
217 * !defined(HAS_64_BIT_INTRINSICS) */
218
219static inline uint64
220mulShiftAll(uint64 m, const uint64 *const mul, const int32 j,
221 uint64 *const vp, uint64 *const vm, const uint32 mmShift)
222{
223 m <<= 1; /* m is maximum 55 bits */
224
225 uint64 tmp;
226 const uint64 lo = umul128(m, mul[0], &tmp);
227 uint64 hi;
228 const uint64 mid = tmp + umul128(m, mul[1], &hi);
229
230 hi += mid < tmp; /* overflow into hi */
231
232 const uint64 lo2 = lo + mul[0];
233 const uint64 mid2 = mid + mul[1] + (lo2 < lo);
234 const uint64 hi2 = hi + (mid2 < mid);
235
236 *vp = shiftright128(mid2, hi2, j - 64 - 1);
237
238 if (mmShift == 1)
239 {
240 const uint64 lo3 = lo - mul[0];
241 const uint64 mid3 = mid - mul[1] - (lo3 > lo);
242 const uint64 hi3 = hi - (mid3 > mid);
243
244 *vm = shiftright128(mid3, hi3, j - 64 - 1);
245 }
246 else
247 {
248 const uint64 lo3 = lo + lo;
249 const uint64 mid3 = mid + mid + (lo3 < lo);
250 const uint64 hi3 = hi + hi + (mid3 < mid);
251 const uint64 lo4 = lo3 - mul[0];
252 const uint64 mid4 = mid3 - mul[1] - (lo4 > lo3);
253 const uint64 hi4 = hi3 - (mid4 > mid3);
254
255 *vm = shiftright128(mid4, hi4, j - 64);
256 }
257
258 return shiftright128(mid, hi, j - 64 - 1);
259}
260
261#endif /* // HAS_64_BIT_INTRINSICS */
262
263static inline uint32
265{
266 /* This is slightly faster than a loop. */
267 /* The average output length is 16.38 digits, so we check high-to-low. */
268 /* Function precondition: v is not an 18, 19, or 20-digit number. */
269 /* (17 digits are sufficient for round-tripping.) */
270 Assert(v < 100000000000000000L);
271 if (v >= 10000000000000000L)
272 {
273 return 17;
274 }
275 if (v >= 1000000000000000L)
276 {
277 return 16;
278 }
279 if (v >= 100000000000000L)
280 {
281 return 15;
282 }
283 if (v >= 10000000000000L)
284 {
285 return 14;
286 }
287 if (v >= 1000000000000L)
288 {
289 return 13;
290 }
291 if (v >= 100000000000L)
292 {
293 return 12;
294 }
295 if (v >= 10000000000L)
296 {
297 return 11;
298 }
299 if (v >= 1000000000L)
300 {
301 return 10;
302 }
303 if (v >= 100000000L)
304 {
305 return 9;
306 }
307 if (v >= 10000000L)
308 {
309 return 8;
310 }
311 if (v >= 1000000L)
312 {
313 return 7;
314 }
315 if (v >= 100000L)
316 {
317 return 6;
318 }
319 if (v >= 10000L)
320 {
321 return 5;
322 }
323 if (v >= 1000L)
324 {
325 return 4;
326 }
327 if (v >= 100L)
328 {
329 return 3;
330 }
331 if (v >= 10L)
332 {
333 return 2;
334 }
335 return 1;
336}
337
338/* A floating decimal representing m * 10^e. */
340{
344
345static inline floating_decimal_64
346d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent)
347{
348 int32 e2;
349 uint64 m2;
350
351 if (ieeeExponent == 0)
352 {
353 /* We subtract 2 so that the bounds computation has 2 additional bits. */
354 e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
355 m2 = ieeeMantissa;
356 }
357 else
358 {
359 e2 = ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
360 m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
361 }
362
363#if STRICTLY_SHORTEST
364 const bool even = (m2 & 1) == 0;
365 const bool acceptBounds = even;
366#else
367 const bool acceptBounds = false;
368#endif
369
370 /* Step 2: Determine the interval of legal decimal representations. */
371 const uint64 mv = 4 * m2;
372
373 /* Implicit bool -> int conversion. True is 1, false is 0. */
374 const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
375
376 /* We would compute mp and mm like this: */
377 /* uint64 mp = 4 * m2 + 2; */
378 /* uint64 mm = mv - 1 - mmShift; */
379
380 /* Step 3: Convert to a decimal power base using 128-bit arithmetic. */
381 uint64 vr,
382 vp,
383 vm;
384 int32 e10;
385 bool vmIsTrailingZeros = false;
386 bool vrIsTrailingZeros = false;
387
388 if (e2 >= 0)
389 {
390 /*
391 * I tried special-casing q == 0, but there was no effect on
392 * performance.
393 *
394 * This expr is slightly faster than max(0, log10Pow2(e2) - 1).
395 */
396 const uint32 q = log10Pow2(e2) - (e2 > 3);
397 const int32 k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q) - 1;
398 const int32 i = -e2 + q + k;
399
400 e10 = q;
401
402 vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
403
404 if (q <= 21)
405 {
406 /*
407 * This should use q <= 22, but I think 21 is also safe. Smaller
408 * values may still be safe, but it's more difficult to reason
409 * about them.
410 *
411 * Only one of mp, mv, and mm can be a multiple of 5, if any.
412 */
413 const uint32 mvMod5 = (uint32) (mv - 5 * div5(mv));
414
415 if (mvMod5 == 0)
416 {
417 vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
418 }
419 else if (acceptBounds)
420 {
421 /*----
422 * Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
423 * <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
424 * <=> true && pow5Factor(mm) >= q, since e2 >= q.
425 *----
426 */
427 vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);
428 }
429 else
430 {
431 /* Same as min(e2 + 1, pow5Factor(mp)) >= q. */
432 vp -= multipleOfPowerOf5(mv + 2, q);
433 }
434 }
435 }
436 else
437 {
438 /*
439 * This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
440 */
441 const uint32 q = log10Pow5(-e2) - (-e2 > 1);
442 const int32 i = -e2 - q;
444 const int32 j = q - k;
445
446 e10 = q + e2;
447
448 vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
449
450 if (q <= 1)
451 {
452 /*
453 * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
454 * trailing 0 bits.
455 */
456 /* mv = 4 * m2, so it always has at least two trailing 0 bits. */
457 vrIsTrailingZeros = true;
458 if (acceptBounds)
459 {
460 /*
461 * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
462 * mmShift == 1.
463 */
464 vmIsTrailingZeros = mmShift == 1;
465 }
466 else
467 {
468 /*
469 * mp = mv + 2, so it always has at least one trailing 0 bit.
470 */
471 --vp;
472 }
473 }
474 else if (q < 63)
475 {
476 /* TODO(ulfjack):Use a tighter bound here. */
477 /*
478 * We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q - 1
479 */
480 /* <=> ntz(mv) >= q - 1 && pow5Factor(mv) - e2 >= q - 1 */
481 /* <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) */
482 /* <=> (mv & ((1 << (q - 1)) - 1)) == 0 */
483
484 /*
485 * We also need to make sure that the left shift does not
486 * overflow.
487 */
488 vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1);
489 }
490 }
491
492 /*
493 * Step 4: Find the shortest decimal representation in the interval of
494 * legal representations.
495 */
496 uint32 removed = 0;
497 uint8 lastRemovedDigit = 0;
499
500 /* On average, we remove ~2 digits. */
501 if (vmIsTrailingZeros || vrIsTrailingZeros)
502 {
503 /* General case, which happens rarely (~0.7%). */
504 for (;;)
505 {
506 const uint64 vpDiv10 = div10(vp);
507 const uint64 vmDiv10 = div10(vm);
508
509 if (vpDiv10 <= vmDiv10)
510 break;
511
512 const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10);
513 const uint64 vrDiv10 = div10(vr);
514 const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
515
516 vmIsTrailingZeros &= vmMod10 == 0;
517 vrIsTrailingZeros &= lastRemovedDigit == 0;
518 lastRemovedDigit = (uint8) vrMod10;
519 vr = vrDiv10;
520 vp = vpDiv10;
521 vm = vmDiv10;
522 ++removed;
523 }
524
525 if (vmIsTrailingZeros)
526 {
527 for (;;)
528 {
529 const uint64 vmDiv10 = div10(vm);
530 const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10);
531
532 if (vmMod10 != 0)
533 break;
534
535 const uint64 vpDiv10 = div10(vp);
536 const uint64 vrDiv10 = div10(vr);
537 const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
538
539 vrIsTrailingZeros &= lastRemovedDigit == 0;
540 lastRemovedDigit = (uint8) vrMod10;
541 vr = vrDiv10;
542 vp = vpDiv10;
543 vm = vmDiv10;
544 ++removed;
545 }
546 }
547
548 if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
549 {
550 /* Round even if the exact number is .....50..0. */
551 lastRemovedDigit = 4;
552 }
553
554 /*
555 * We need to take vr + 1 if vr is outside bounds or we need to round
556 * up.
557 */
558 output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
559 }
560 else
561 {
562 /*
563 * Specialized for the common case (~99.3%). Percentages below are
564 * relative to this.
565 */
566 bool roundUp = false;
567 const uint64 vpDiv100 = div100(vp);
568 const uint64 vmDiv100 = div100(vm);
569
570 if (vpDiv100 > vmDiv100)
571 {
572 /* Optimization:remove two digits at a time(~86.2 %). */
573 const uint64 vrDiv100 = div100(vr);
574 const uint32 vrMod100 = (uint32) (vr - 100 * vrDiv100);
575
576 roundUp = vrMod100 >= 50;
577 vr = vrDiv100;
578 vp = vpDiv100;
579 vm = vmDiv100;
580 removed += 2;
581 }
582
583 /*----
584 * Loop iterations below (approximately), without optimization
585 * above:
586 *
587 * 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%,
588 * 6+: 0.02%
589 *
590 * Loop iterations below (approximately), with optimization
591 * above:
592 *
593 * 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
594 *----
595 */
596 for (;;)
597 {
598 const uint64 vpDiv10 = div10(vp);
599 const uint64 vmDiv10 = div10(vm);
600
601 if (vpDiv10 <= vmDiv10)
602 break;
603
604 const uint64 vrDiv10 = div10(vr);
605 const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
606
607 roundUp = vrMod10 >= 5;
608 vr = vrDiv10;
609 vp = vpDiv10;
610 vm = vmDiv10;
611 ++removed;
612 }
613
614 /*
615 * We need to take vr + 1 if vr is outside bounds or we need to round
616 * up.
617 */
618 output = vr + (vr == vm || roundUp);
619 }
620
621 const int32 exp = e10 + removed;
622
624
625 fd.exponent = exp;
626 fd.mantissa = output;
627 return fd;
628}
629
630static inline int
631to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result)
632{
633 /* Step 5: Print the decimal representation. */
634 int index = 0;
635
637 int32 exp = v.exponent;
638
639 /*----
640 * On entry, mantissa * 10^exp is the result to be output.
641 * Caller has already done the - sign if needed.
642 *
643 * We want to insert the point somewhere depending on the output length
644 * and exponent, which might mean adding zeros:
645 *
646 * exp | format
647 * 1+ | ddddddddd000000
648 * 0 | ddddddddd
649 * -1 .. -len+1 | dddddddd.d to d.ddddddddd
650 * -len ... | 0.ddddddddd to 0.000dddddd
651 */
652 uint32 i = 0;
653 int32 nexp = exp + olength;
654
655 if (nexp <= 0)
656 {
657 /* -nexp is number of 0s to add after '.' */
658 Assert(nexp >= -3);
659 /* 0.000ddddd */
660 index = 2 - nexp;
661 /* won't need more than this many 0s */
662 memcpy(result, "0.000000", 8);
663 }
664 else if (exp < 0)
665 {
666 /*
667 * dddd.dddd; leave space at the start and move the '.' in after
668 */
669 index = 1;
670 }
671 else
672 {
673 /*
674 * We can save some code later by pre-filling with zeros. We know that
675 * there can be no more than 16 output digits in this form, otherwise
676 * we would not choose fixed-point output.
677 */
678 Assert(exp < 16 && exp + olength <= 16);
679 memset(result, '0', 16);
680 }
681
682 /*
683 * We prefer 32-bit operations, even on 64-bit platforms. We have at most
684 * 17 digits, and uint32 can store 9 digits. If output doesn't fit into
685 * uint32, we cut off 8 digits, so the rest will fit into uint32.
686 */
687 if ((output >> 32) != 0)
688 {
689 /* Expensive 64-bit division. */
690 const uint64 q = div1e8(output);
691 uint32 output2 = (uint32) (output - 100000000 * q);
692 const uint32 c = output2 % 10000;
693
694 output = q;
695 output2 /= 10000;
696
697 const uint32 d = output2 % 10000;
698 const uint32 c0 = (c % 100) << 1;
699 const uint32 c1 = (c / 100) << 1;
700 const uint32 d0 = (d % 100) << 1;
701 const uint32 d1 = (d / 100) << 1;
702
703 memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
704 memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
705 memcpy(result + index + olength - i - 6, DIGIT_TABLE + d0, 2);
706 memcpy(result + index + olength - i - 8, DIGIT_TABLE + d1, 2);
707 i += 8;
708 }
709
710 uint32 output2 = (uint32) output;
711
712 while (output2 >= 10000)
713 {
714 const uint32 c = output2 - 10000 * (output2 / 10000);
715 const uint32 c0 = (c % 100) << 1;
716 const uint32 c1 = (c / 100) << 1;
717
718 output2 /= 10000;
719 memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
720 memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
721 i += 4;
722 }
723 if (output2 >= 100)
724 {
725 const uint32 c = (output2 % 100) << 1;
726
727 output2 /= 100;
728 memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
729 i += 2;
730 }
731 if (output2 >= 10)
732 {
733 const uint32 c = output2 << 1;
734
735 memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
736 }
737 else
738 {
739 result[index] = (char) ('0' + output2);
740 }
741
742 if (index == 1)
743 {
744 /*
745 * nexp is 1..15 here, representing the number of digits before the
746 * point. A value of 16 is not possible because we switch to
747 * scientific notation when the display exponent reaches 15.
748 */
749 Assert(nexp < 16);
750 /* gcc only seems to want to optimize memmove for small 2^n */
751 if (nexp & 8)
752 {
753 memmove(result + index - 1, result + index, 8);
754 index += 8;
755 }
756 if (nexp & 4)
757 {
758 memmove(result + index - 1, result + index, 4);
759 index += 4;
760 }
761 if (nexp & 2)
762 {
763 memmove(result + index - 1, result + index, 2);
764 index += 2;
765 }
766 if (nexp & 1)
767 {
768 result[index - 1] = result[index];
769 }
770 result[nexp] = '.';
771 index = olength + 1;
772 }
773 else if (exp >= 0)
774 {
775 /* we supplied the trailing zeros earlier, now just set the length. */
776 index = olength + exp;
777 }
778 else
779 {
780 index = olength + (2 - nexp);
781 }
782
783 return index;
784}
785
786static inline int
787to_chars(floating_decimal_64 v, const bool sign, char *const result)
788{
789 /* Step 5: Print the decimal representation. */
790 int index = 0;
791
793 uint32 olength = decimalLength(output);
794 int32 exp = v.exponent + olength - 1;
795
796 if (sign)
797 {
798 result[index++] = '-';
799 }
800
801 /*
802 * The thresholds for fixed-point output are chosen to match printf
803 * defaults. Beware that both the code of to_chars_df and the value of
804 * DOUBLE_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
805 */
806 if (exp >= -4 && exp < 15)
807 return to_chars_df(v, olength, result + index) + sign;
808
809 /*
810 * If v.exponent is exactly 0, we might have reached here via the small
811 * integer fast path, in which case v.mantissa might contain trailing
812 * (decimal) zeros. For scientific notation we need to move these zeros
813 * into the exponent. (For fixed point this doesn't matter, which is why
814 * we do this here rather than above.)
815 *
816 * Since we already calculated the display exponent (exp) above based on
817 * the old decimal length, that value does not change here. Instead, we
818 * just reduce the display length for each digit removed.
819 *
820 * If we didn't get here via the fast path, the raw exponent will not
821 * usually be 0, and there will be no trailing zeros, so we pay no more
822 * than one div10/multiply extra cost. We claw back half of that by
823 * checking for divisibility by 2 before dividing by 10.
824 */
825 if (v.exponent == 0)
826 {
827 while ((output & 1) == 0)
828 {
829 const uint64 q = div10(output);
830 const uint32 r = (uint32) (output - 10 * q);
831
832 if (r != 0)
833 break;
834 output = q;
835 --olength;
836 }
837 }
838
839 /*----
840 * Print the decimal digits.
841 *
842 * The following code is equivalent to:
843 *
844 * for (uint32 i = 0; i < olength - 1; ++i) {
845 * const uint32 c = output % 10; output /= 10;
846 * result[index + olength - i] = (char) ('0' + c);
847 * }
848 * result[index] = '0' + output % 10;
849 *----
850 */
851
852 uint32 i = 0;
853
854 /*
855 * We prefer 32-bit operations, even on 64-bit platforms. We have at most
856 * 17 digits, and uint32 can store 9 digits. If output doesn't fit into
857 * uint32, we cut off 8 digits, so the rest will fit into uint32.
858 */
859 if ((output >> 32) != 0)
860 {
861 /* Expensive 64-bit division. */
862 const uint64 q = div1e8(output);
863 uint32 output2 = (uint32) (output - 100000000 * q);
864
865 output = q;
866
867 const uint32 c = output2 % 10000;
868
869 output2 /= 10000;
870
871 const uint32 d = output2 % 10000;
872 const uint32 c0 = (c % 100) << 1;
873 const uint32 c1 = (c / 100) << 1;
874 const uint32 d0 = (d % 100) << 1;
875 const uint32 d1 = (d / 100) << 1;
876
877 memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
878 memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
879 memcpy(result + index + olength - i - 5, DIGIT_TABLE + d0, 2);
880 memcpy(result + index + olength - i - 7, DIGIT_TABLE + d1, 2);
881 i += 8;
882 }
883
884 uint32 output2 = (uint32) output;
885
886 while (output2 >= 10000)
887 {
888 const uint32 c = output2 - 10000 * (output2 / 10000);
889
890 output2 /= 10000;
891
892 const uint32 c0 = (c % 100) << 1;
893 const uint32 c1 = (c / 100) << 1;
894
895 memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
896 memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
897 i += 4;
898 }
899 if (output2 >= 100)
900 {
901 const uint32 c = (output2 % 100) << 1;
902
903 output2 /= 100;
904 memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2);
905 i += 2;
906 }
907 if (output2 >= 10)
908 {
909 const uint32 c = output2 << 1;
910
911 /*
912 * We can't use memcpy here: the decimal dot goes between these two
913 * digits.
914 */
915 result[index + olength - i] = DIGIT_TABLE[c + 1];
916 result[index] = DIGIT_TABLE[c];
917 }
918 else
919 {
920 result[index] = (char) ('0' + output2);
921 }
922
923 /* Print decimal point if needed. */
924 if (olength > 1)
925 {
926 result[index + 1] = '.';
927 index += olength + 1;
928 }
929 else
930 {
931 ++index;
932 }
933
934 /* Print the exponent. */
935 result[index++] = 'e';
936 if (exp < 0)
937 {
938 result[index++] = '-';
939 exp = -exp;
940 }
941 else
942 result[index++] = '+';
943
944 if (exp >= 100)
945 {
946 const int32 c = exp % 10;
947
948 memcpy(result + index, DIGIT_TABLE + 2 * (exp / 10), 2);
949 result[index + 2] = (char) ('0' + c);
950 index += 3;
951 }
952 else
953 {
954 memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
955 index += 2;
956 }
957
958 return index;
959}
960
961static inline bool
962d2d_small_int(const uint64 ieeeMantissa,
963 const uint32 ieeeExponent,
965{
966 const int32 e2 = (int32) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
967
968 /*
969 * Avoid using multiple "return false;" here since it tends to provoke the
970 * compiler into inlining multiple copies of d2d, which is undesirable.
971 */
972
973 if (e2 >= -DOUBLE_MANTISSA_BITS && e2 <= 0)
974 {
975 /*----
976 * Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52:
977 * 1 <= f = m2 / 2^-e2 < 2^53.
978 *
979 * Test if the lower -e2 bits of the significand are 0, i.e. whether
980 * the fraction is 0. We can use ieeeMantissa here, since the implied
981 * 1 bit can never be tested by this; the implied 1 can only be part
982 * of a fraction if e2 < -DOUBLE_MANTISSA_BITS which we already
983 * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -53)
984 */
985 const uint64 mask = (UINT64CONST(1) << -e2) - 1;
986 const uint64 fraction = ieeeMantissa & mask;
987
988 if (fraction == 0)
989 {
990 /*----
991 * f is an integer in the range [1, 2^53).
992 * Note: mantissa might contain trailing (decimal) 0's.
993 * Note: since 2^53 < 10^16, there is no need to adjust
994 * decimalLength().
995 */
996 const uint64 m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
997
998 v->mantissa = m2 >> -e2;
999 v->exponent = 0;
1000 return true;
1001 }
1002 }
1003
1004 return false;
1005}
1006
1007/*
1008 * Store the shortest decimal representation of the given double as an
1009 * UNTERMINATED string in the caller's supplied buffer (which must be at least
1010 * DOUBLE_SHORTEST_DECIMAL_LEN-1 bytes long).
1011 *
1012 * Returns the number of bytes stored.
1013 */
1014int
1015double_to_shortest_decimal_bufn(double f, char *result)
1016{
1017 /*
1018 * Step 1: Decode the floating-point number, and unify normalized and
1019 * subnormal cases.
1020 */
1021 const uint64 bits = double_to_bits(f);
1022
1023 /* Decode bits into sign, mantissa, and exponent. */
1024 const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
1025 const uint64 ieeeMantissa = bits & ((UINT64CONST(1) << DOUBLE_MANTISSA_BITS) - 1);
1026 const uint32 ieeeExponent = (uint32) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
1027
1028 /* Case distinction; exit early for the easy cases. */
1029 if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0))
1030 {
1031 return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0));
1032 }
1033
1035 const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);
1036
1037 if (!isSmallInt)
1038 {
1039 v = d2d(ieeeMantissa, ieeeExponent);
1040 }
1041
1042 return to_chars(v, ieeeSign, result);
1043}
1044
1045/*
1046 * Store the shortest decimal representation of the given double as a
1047 * null-terminated string in the caller's supplied buffer (which must be at
1048 * least DOUBLE_SHORTEST_DECIMAL_LEN bytes long).
1049 *
1050 * Returns the string length.
1051 */
1052int
1053double_to_shortest_decimal_buf(double f, char *result)
1054{
1055 const int index = double_to_shortest_decimal_bufn(f, result);
1056
1057 /* Terminate the string. */
1059 result[index] = '\0';
1060 return index;
1061}
1062
1063/*
1064 * Return the shortest decimal representation as a null-terminated palloc'd
1065 * string (outside the backend, uses malloc() instead).
1066 *
1067 * Caller is responsible for freeing the result.
1068 */
1069char *
1071{
1072 char *const result = (char *) palloc(DOUBLE_SHORTEST_DECIMAL_LEN);
1073
1075 return result;
1076}
uint8_t uint8
Definition: c.h:500
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
uint32_t uint32
Definition: c.h:502
#define UINT64CONST(x)
Definition: c.h:517
int double_to_shortest_decimal_bufn(double f, char *result)
Definition: d2s.c:1015
#define DOUBLE_POW5_BITCOUNT
Definition: d2s.c:70
static int to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result)
Definition: d2s.c:631
#define DOUBLE_BIAS
Definition: d2s.c:67
#define DOUBLE_POW5_INV_BITCOUNT
Definition: d2s.c:69
#define DOUBLE_EXPONENT_BITS
Definition: d2s.c:66
static floating_decimal_64 d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent)
Definition: d2s.c:346
static uint64 mulShiftAll(uint64 m, const uint64 *const mul, const int32 j, uint64 *const vp, uint64 *const vm, const uint32 mmShift)
Definition: d2s.c:220
static bool multipleOfPowerOf5(const uint64 value, const uint32 p)
Definition: d2s.c:95
struct floating_decimal_64 floating_decimal_64
#define DOUBLE_MANTISSA_BITS
Definition: d2s.c:65
static bool multipleOfPowerOf2(const uint64 value, const uint32 p)
Definition: d2s.c:106
static uint32 pow5Factor(uint64 value)
Definition: d2s.c:74
static uint32 decimalLength(const uint64 v)
Definition: d2s.c:264
static bool d2d_small_int(const uint64 ieeeMantissa, const uint32 ieeeExponent, floating_decimal_64 *v)
Definition: d2s.c:962
int double_to_shortest_decimal_buf(double f, char *result)
Definition: d2s.c:1053
static int to_chars(floating_decimal_64 v, const bool sign, char *const result)
Definition: d2s.c:787
char * double_to_shortest_decimal(double f)
Definition: d2s.c:1070
static const uint64 DOUBLE_POW5_SPLIT[326][2]
static const uint64 DOUBLE_POW5_INV_SPLIT[292][2]
static uint64 div1e8(const uint64 x)
static uint64 div10(const uint64 x)
static uint64 umul128(const uint64 a, const uint64 b, uint64 *const productHi)
static uint64 div5(const uint64 x)
static uint64 shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
static uint64 div100(const uint64 x)
static uint32 mulShift(const uint32 m, const uint64 factor, const int32 shift)
Definition: f2s.c:120
Assert(PointerIsAligned(start, uint64))
FILE * output
static struct @165 value
char sign
Definition: informix.c:693
int j
Definition: isn.c:75
int i
Definition: isn.c:74
void * palloc(Size size)
Definition: mcxt.c:1317
static const char DIGIT_TABLE[200]
Definition: numutils.c:29
char * c
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static uint64 double_to_bits(const double d)
Definition: ryu_common.h:125
static uint32 pow5bits(const int32 e)
Definition: ryu_common.h:54
static int32 log10Pow5(const int32 e)
Definition: ryu_common.h:83
static int copy_special_str(char *const result, const bool sign, const bool exponent, const bool mantissa)
Definition: ryu_common.h:95
static int32 log10Pow2(const int32 e)
Definition: ryu_common.h:70
#define DOUBLE_SHORTEST_DECIMAL_LEN
Definition: shortest_dec.h:44
int32 exponent
Definition: d2s.c:342
uint64 mantissa
Definition: d2s.c:341
Definition: type.h:96