PostgreSQL Source Code  git master
d2s.c
Go to the documentation of this file.
1 /*---------------------------------------------------------------------------
2  *
3  * Ryu floating-point output for double precision.
4  *
5  * Portions Copyright (c) 2018-2024, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  * src/common/d2s.c
9  *
10  * This is a modification of code taken from github.com/ulfjack/ryu under the
11  * terms of the Boost license (not the Apache license). The original copyright
12  * notice follows:
13  *
14  * Copyright 2018 Ulf Adams
15  *
16  * The contents of this file may be used under the terms of the Apache
17  * License, Version 2.0.
18  *
19  * (See accompanying file LICENSE-Apache or copy at
20  * http://www.apache.org/licenses/LICENSE-2.0)
21  *
22  * Alternatively, the contents of this file may be used under the terms of the
23  * Boost Software License, Version 1.0.
24  *
25  * (See accompanying file LICENSE-Boost or copy at
26  * https://www.boost.org/LICENSE_1_0.txt)
27  *
28  * Unless required by applicable law or agreed to in writing, this software is
29  * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
30  * KIND, either express or implied.
31  *
32  *---------------------------------------------------------------------------
33  */
34 
35 /*
36  * Runtime compiler options:
37  *
38  * -DRYU_ONLY_64_BIT_OPS Avoid using uint128 or 64-bit intrinsics. Slower,
39  * depending on your compiler.
40  */
41 
42 #ifndef FRONTEND
43 #include "postgres.h"
44 #else
45 #include "postgres_fe.h"
46 #endif
47 
48 #include "common/shortest_dec.h"
49 
50 /*
51  * For consistency, we use 128-bit types if and only if the rest of PG also
52  * does, even though we could use them here without worrying about the
53  * alignment concerns that apply elsewhere.
54  */
55 #if !defined(HAVE_INT128) && defined(_MSC_VER) \
56  && !defined(RYU_ONLY_64_BIT_OPS) && defined(_M_X64)
57 #define HAS_64_BIT_INTRINSICS
58 #endif
59 
60 #include "ryu_common.h"
61 #include "digit_table.h"
62 #include "d2s_full_table.h"
63 #include "d2s_intrinsics.h"
64 
65 #define DOUBLE_MANTISSA_BITS 52
66 #define DOUBLE_EXPONENT_BITS 11
67 #define DOUBLE_BIAS 1023
68 
69 #define DOUBLE_POW5_INV_BITCOUNT 122
70 #define DOUBLE_POW5_BITCOUNT 121
71 
72 
73 static inline uint32
75 {
76  uint32 count = 0;
77 
78  for (;;)
79  {
80  Assert(value != 0);
81  const uint64 q = div5(value);
82  const uint32 r = (uint32) (value - 5 * q);
83 
84  if (r != 0)
85  break;
86 
87  value = q;
88  ++count;
89  }
90  return count;
91 }
92 
93 /* Returns true if value is divisible by 5^p. */
94 static inline bool
95 multipleOfPowerOf5(const uint64 value, const uint32 p)
96 {
97  /*
98  * I tried a case distinction on p, but there was no performance
99  * difference.
100  */
101  return pow5Factor(value) >= p;
102 }
103 
104 /* Returns true if value is divisible by 2^p. */
105 static inline bool
106 multipleOfPowerOf2(const uint64 value, const uint32 p)
107 {
108  /* return __builtin_ctzll(value) >= p; */
109  return (value & ((UINT64CONST(1) << p) - 1)) == 0;
110 }
111 
112 /*
113  * We need a 64x128-bit multiplication and a subsequent 128-bit shift.
114  *
115  * Multiplication:
116  *
117  * The 64-bit factor is variable and passed in, the 128-bit factor comes
118  * from a lookup table. We know that the 64-bit factor only has 55
119  * significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
120  * factor only has 124 significant bits (i.e., the 4 topmost bits are
121  * zeros).
122  *
123  * Shift:
124  *
125  * In principle, the multiplication result requires 55 + 124 = 179 bits to
126  * represent. However, we then shift this value to the right by j, which is
127  * at least j >= 115, so the result is guaranteed to fit into 179 - 115 =
128  * 64 bits. This means that we only need the topmost 64 significant bits of
129  * the 64x128-bit multiplication.
130  *
131  * There are several ways to do this:
132  *
133  * 1. Best case: the compiler exposes a 128-bit type.
134  * We perform two 64x64-bit multiplications, add the higher 64 bits of the
135  * lower result to the higher result, and shift by j - 64 bits.
136  *
137  * We explicitly cast from 64-bit to 128-bit, so the compiler can tell
138  * that these are only 64-bit inputs, and can map these to the best
139  * possible sequence of assembly instructions. x86-64 machines happen to
140  * have matching assembly instructions for 64x64-bit multiplications and
141  * 128-bit shifts.
142  *
143  * 2. Second best case: the compiler exposes intrinsics for the x86-64
144  * assembly instructions mentioned in 1.
145  *
146  * 3. We only have 64x64 bit instructions that return the lower 64 bits of
147  * the result, i.e., we have to use plain C.
148  *
149  * Our inputs are less than the full width, so we have three options:
150  * a. Ignore this fact and just implement the intrinsics manually.
151  * b. Split both into 31-bit pieces, which guarantees no internal
152  * overflow, but requires extra work upfront (unless we change the
153  * lookup table).
154  * c. Split only the first factor into 31-bit pieces, which also
155  * guarantees no internal overflow, but requires extra work since the
156  * intermediate results are not perfectly aligned.
157  */
158 #if defined(HAVE_INT128)
159 
160 /* Best case: use 128-bit type. */
161 static inline uint64
162 mulShift(const uint64 m, const uint64 *const mul, const int32 j)
163 {
164  const uint128 b0 = ((uint128) m) * mul[0];
165  const uint128 b2 = ((uint128) m) * mul[1];
166 
167  return (uint64) (((b0 >> 64) + b2) >> (j - 64));
168 }
169 
170 static inline uint64
171 mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
172  uint64 *const vp, uint64 *const vm, const uint32 mmShift)
173 {
174  *vp = mulShift(4 * m + 2, mul, j);
175  *vm = mulShift(4 * m - 1 - mmShift, mul, j);
176  return mulShift(4 * m, mul, j);
177 }
178 
179 #elif defined(HAS_64_BIT_INTRINSICS)
180 
181 static inline uint64
182 mulShift(const uint64 m, const uint64 *const mul, const int32 j)
183 {
184  /* m is maximum 55 bits */
185  uint64 high1;
186 
187  /* 128 */
188  const uint64 low1 = umul128(m, mul[1], &high1);
189 
190  /* 64 */
191  uint64 high0;
192  uint64 sum;
193 
194  /* 64 */
195  umul128(m, mul[0], &high0);
196  /* 0 */
197  sum = high0 + low1;
198 
199  if (sum < high0)
200  {
201  ++high1;
202  /* overflow into high1 */
203  }
204  return shiftright128(sum, high1, j - 64);
205 }
206 
207 static inline uint64
208 mulShiftAll(const uint64 m, const uint64 *const mul, const int32 j,
209  uint64 *const vp, uint64 *const vm, const uint32 mmShift)
210 {
211  *vp = mulShift(4 * m + 2, mul, j);
212  *vm = mulShift(4 * m - 1 - mmShift, mul, j);
213  return mulShift(4 * m, mul, j);
214 }
215 
216 #else /* // !defined(HAVE_INT128) &&
217  * !defined(HAS_64_BIT_INTRINSICS) */
218 
219 static inline uint64
220 mulShiftAll(uint64 m, const uint64 *const mul, const int32 j,
221  uint64 *const vp, uint64 *const vm, const uint32 mmShift)
222 {
223  m <<= 1; /* m is maximum 55 bits */
224 
225  uint64 tmp;
226  const uint64 lo = umul128(m, mul[0], &tmp);
227  uint64 hi;
228  const uint64 mid = tmp + umul128(m, mul[1], &hi);
229 
230  hi += mid < tmp; /* overflow into hi */
231 
232  const uint64 lo2 = lo + mul[0];
233  const uint64 mid2 = mid + mul[1] + (lo2 < lo);
234  const uint64 hi2 = hi + (mid2 < mid);
235 
236  *vp = shiftright128(mid2, hi2, j - 64 - 1);
237 
238  if (mmShift == 1)
239  {
240  const uint64 lo3 = lo - mul[0];
241  const uint64 mid3 = mid - mul[1] - (lo3 > lo);
242  const uint64 hi3 = hi - (mid3 > mid);
243 
244  *vm = shiftright128(mid3, hi3, j - 64 - 1);
245  }
246  else
247  {
248  const uint64 lo3 = lo + lo;
249  const uint64 mid3 = mid + mid + (lo3 < lo);
250  const uint64 hi3 = hi + hi + (mid3 < mid);
251  const uint64 lo4 = lo3 - mul[0];
252  const uint64 mid4 = mid3 - mul[1] - (lo4 > lo3);
253  const uint64 hi4 = hi3 - (mid4 > mid3);
254 
255  *vm = shiftright128(mid4, hi4, j - 64);
256  }
257 
258  return shiftright128(mid, hi, j - 64 - 1);
259 }
260 
261 #endif /* // HAS_64_BIT_INTRINSICS */
262 
263 static inline uint32
264 decimalLength(const uint64 v)
265 {
266  /* This is slightly faster than a loop. */
267  /* The average output length is 16.38 digits, so we check high-to-low. */
268  /* Function precondition: v is not an 18, 19, or 20-digit number. */
269  /* (17 digits are sufficient for round-tripping.) */
270  Assert(v < 100000000000000000L);
271  if (v >= 10000000000000000L)
272  {
273  return 17;
274  }
275  if (v >= 1000000000000000L)
276  {
277  return 16;
278  }
279  if (v >= 100000000000000L)
280  {
281  return 15;
282  }
283  if (v >= 10000000000000L)
284  {
285  return 14;
286  }
287  if (v >= 1000000000000L)
288  {
289  return 13;
290  }
291  if (v >= 100000000000L)
292  {
293  return 12;
294  }
295  if (v >= 10000000000L)
296  {
297  return 11;
298  }
299  if (v >= 1000000000L)
300  {
301  return 10;
302  }
303  if (v >= 100000000L)
304  {
305  return 9;
306  }
307  if (v >= 10000000L)
308  {
309  return 8;
310  }
311  if (v >= 1000000L)
312  {
313  return 7;
314  }
315  if (v >= 100000L)
316  {
317  return 6;
318  }
319  if (v >= 10000L)
320  {
321  return 5;
322  }
323  if (v >= 1000L)
324  {
325  return 4;
326  }
327  if (v >= 100L)
328  {
329  return 3;
330  }
331  if (v >= 10L)
332  {
333  return 2;
334  }
335  return 1;
336 }
337 
338 /* A floating decimal representing m * 10^e. */
339 typedef struct floating_decimal_64
340 {
341  uint64 mantissa;
344 
345 static inline floating_decimal_64
346 d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent)
347 {
348  int32 e2;
349  uint64 m2;
350 
351  if (ieeeExponent == 0)
352  {
353  /* We subtract 2 so that the bounds computation has 2 additional bits. */
354  e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
355  m2 = ieeeMantissa;
356  }
357  else
358  {
359  e2 = ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2;
360  m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
361  }
362 
363 #if STRICTLY_SHORTEST
364  const bool even = (m2 & 1) == 0;
365  const bool acceptBounds = even;
366 #else
367  const bool acceptBounds = false;
368 #endif
369 
370  /* Step 2: Determine the interval of legal decimal representations. */
371  const uint64 mv = 4 * m2;
372 
373  /* Implicit bool -> int conversion. True is 1, false is 0. */
374  const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
375 
376  /* We would compute mp and mm like this: */
377  /* uint64 mp = 4 * m2 + 2; */
378  /* uint64 mm = mv - 1 - mmShift; */
379 
380  /* Step 3: Convert to a decimal power base using 128-bit arithmetic. */
381  uint64 vr,
382  vp,
383  vm;
384  int32 e10;
385  bool vmIsTrailingZeros = false;
386  bool vrIsTrailingZeros = false;
387 
388  if (e2 >= 0)
389  {
390  /*
391  * I tried special-casing q == 0, but there was no effect on
392  * performance.
393  *
394  * This expr is slightly faster than max(0, log10Pow2(e2) - 1).
395  */
396  const uint32 q = log10Pow2(e2) - (e2 > 3);
397  const int32 k = DOUBLE_POW5_INV_BITCOUNT + pow5bits(q) - 1;
398  const int32 i = -e2 + q + k;
399 
400  e10 = q;
401 
402  vr = mulShiftAll(m2, DOUBLE_POW5_INV_SPLIT[q], i, &vp, &vm, mmShift);
403 
404  if (q <= 21)
405  {
406  /*
407  * This should use q <= 22, but I think 21 is also safe. Smaller
408  * values may still be safe, but it's more difficult to reason
409  * about them.
410  *
411  * Only one of mp, mv, and mm can be a multiple of 5, if any.
412  */
413  const uint32 mvMod5 = (uint32) (mv - 5 * div5(mv));
414 
415  if (mvMod5 == 0)
416  {
417  vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
418  }
419  else if (acceptBounds)
420  {
421  /*----
422  * Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q
423  * <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q
424  * <=> true && pow5Factor(mm) >= q, since e2 >= q.
425  *----
426  */
427  vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q);
428  }
429  else
430  {
431  /* Same as min(e2 + 1, pow5Factor(mp)) >= q. */
432  vp -= multipleOfPowerOf5(mv + 2, q);
433  }
434  }
435  }
436  else
437  {
438  /*
439  * This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
440  */
441  const uint32 q = log10Pow5(-e2) - (-e2 > 1);
442  const int32 i = -e2 - q;
443  const int32 k = pow5bits(i) - DOUBLE_POW5_BITCOUNT;
444  const int32 j = q - k;
445 
446  e10 = q + e2;
447 
448  vr = mulShiftAll(m2, DOUBLE_POW5_SPLIT[i], j, &vp, &vm, mmShift);
449 
450  if (q <= 1)
451  {
452  /*
453  * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
454  * trailing 0 bits.
455  */
456  /* mv = 4 * m2, so it always has at least two trailing 0 bits. */
457  vrIsTrailingZeros = true;
458  if (acceptBounds)
459  {
460  /*
461  * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
462  * mmShift == 1.
463  */
464  vmIsTrailingZeros = mmShift == 1;
465  }
466  else
467  {
468  /*
469  * mp = mv + 2, so it always has at least one trailing 0 bit.
470  */
471  --vp;
472  }
473  }
474  else if (q < 63)
475  {
476  /* TODO(ulfjack):Use a tighter bound here. */
477  /*
478  * We need to compute min(ntz(mv), pow5Factor(mv) - e2) >= q - 1
479  */
480  /* <=> ntz(mv) >= q - 1 && pow5Factor(mv) - e2 >= q - 1 */
481  /* <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) */
482  /* <=> (mv & ((1 << (q - 1)) - 1)) == 0 */
483 
484  /*
485  * We also need to make sure that the left shift does not
486  * overflow.
487  */
488  vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1);
489  }
490  }
491 
492  /*
493  * Step 4: Find the shortest decimal representation in the interval of
494  * legal representations.
495  */
496  uint32 removed = 0;
497  uint8 lastRemovedDigit = 0;
498  uint64 output;
499 
500  /* On average, we remove ~2 digits. */
501  if (vmIsTrailingZeros || vrIsTrailingZeros)
502  {
503  /* General case, which happens rarely (~0.7%). */
504  for (;;)
505  {
506  const uint64 vpDiv10 = div10(vp);
507  const uint64 vmDiv10 = div10(vm);
508 
509  if (vpDiv10 <= vmDiv10)
510  break;
511 
512  const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10);
513  const uint64 vrDiv10 = div10(vr);
514  const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
515 
516  vmIsTrailingZeros &= vmMod10 == 0;
517  vrIsTrailingZeros &= lastRemovedDigit == 0;
518  lastRemovedDigit = (uint8) vrMod10;
519  vr = vrDiv10;
520  vp = vpDiv10;
521  vm = vmDiv10;
522  ++removed;
523  }
524 
525  if (vmIsTrailingZeros)
526  {
527  for (;;)
528  {
529  const uint64 vmDiv10 = div10(vm);
530  const uint32 vmMod10 = (uint32) (vm - 10 * vmDiv10);
531 
532  if (vmMod10 != 0)
533  break;
534 
535  const uint64 vpDiv10 = div10(vp);
536  const uint64 vrDiv10 = div10(vr);
537  const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
538 
539  vrIsTrailingZeros &= lastRemovedDigit == 0;
540  lastRemovedDigit = (uint8) vrMod10;
541  vr = vrDiv10;
542  vp = vpDiv10;
543  vm = vmDiv10;
544  ++removed;
545  }
546  }
547 
548  if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
549  {
550  /* Round even if the exact number is .....50..0. */
551  lastRemovedDigit = 4;
552  }
553 
554  /*
555  * We need to take vr + 1 if vr is outside bounds or we need to round
556  * up.
557  */
558  output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
559  }
560  else
561  {
562  /*
563  * Specialized for the common case (~99.3%). Percentages below are
564  * relative to this.
565  */
566  bool roundUp = false;
567  const uint64 vpDiv100 = div100(vp);
568  const uint64 vmDiv100 = div100(vm);
569 
570  if (vpDiv100 > vmDiv100)
571  {
572  /* Optimization:remove two digits at a time(~86.2 %). */
573  const uint64 vrDiv100 = div100(vr);
574  const uint32 vrMod100 = (uint32) (vr - 100 * vrDiv100);
575 
576  roundUp = vrMod100 >= 50;
577  vr = vrDiv100;
578  vp = vpDiv100;
579  vm = vmDiv100;
580  removed += 2;
581  }
582 
583  /*----
584  * Loop iterations below (approximately), without optimization
585  * above:
586  *
587  * 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%,
588  * 6+: 0.02%
589  *
590  * Loop iterations below (approximately), with optimization
591  * above:
592  *
593  * 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
594  *----
595  */
596  for (;;)
597  {
598  const uint64 vpDiv10 = div10(vp);
599  const uint64 vmDiv10 = div10(vm);
600 
601  if (vpDiv10 <= vmDiv10)
602  break;
603 
604  const uint64 vrDiv10 = div10(vr);
605  const uint32 vrMod10 = (uint32) (vr - 10 * vrDiv10);
606 
607  roundUp = vrMod10 >= 5;
608  vr = vrDiv10;
609  vp = vpDiv10;
610  vm = vmDiv10;
611  ++removed;
612  }
613 
614  /*
615  * We need to take vr + 1 if vr is outside bounds or we need to round
616  * up.
617  */
618  output = vr + (vr == vm || roundUp);
619  }
620 
621  const int32 exp = e10 + removed;
622 
624 
625  fd.exponent = exp;
626  fd.mantissa = output;
627  return fd;
628 }
629 
630 static inline int
631 to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result)
632 {
633  /* Step 5: Print the decimal representation. */
634  int index = 0;
635 
636  uint64 output = v.mantissa;
637  int32 exp = v.exponent;
638 
639  /*----
640  * On entry, mantissa * 10^exp is the result to be output.
641  * Caller has already done the - sign if needed.
642  *
643  * We want to insert the point somewhere depending on the output length
644  * and exponent, which might mean adding zeros:
645  *
646  * exp | format
647  * 1+ | ddddddddd000000
648  * 0 | ddddddddd
649  * -1 .. -len+1 | dddddddd.d to d.ddddddddd
650  * -len ... | 0.ddddddddd to 0.000dddddd
651  */
652  uint32 i = 0;
653  int32 nexp = exp + olength;
654 
655  if (nexp <= 0)
656  {
657  /* -nexp is number of 0s to add after '.' */
658  Assert(nexp >= -3);
659  /* 0.000ddddd */
660  index = 2 - nexp;
661  /* won't need more than this many 0s */
662  memcpy(result, "0.000000", 8);
663  }
664  else if (exp < 0)
665  {
666  /*
667  * dddd.dddd; leave space at the start and move the '.' in after
668  */
669  index = 1;
670  }
671  else
672  {
673  /*
674  * We can save some code later by pre-filling with zeros. We know that
675  * there can be no more than 16 output digits in this form, otherwise
676  * we would not choose fixed-point output.
677  */
678  Assert(exp < 16 && exp + olength <= 16);
679  memset(result, '0', 16);
680  }
681 
682  /*
683  * We prefer 32-bit operations, even on 64-bit platforms. We have at most
684  * 17 digits, and uint32 can store 9 digits. If output doesn't fit into
685  * uint32, we cut off 8 digits, so the rest will fit into uint32.
686  */
687  if ((output >> 32) != 0)
688  {
689  /* Expensive 64-bit division. */
690  const uint64 q = div1e8(output);
691  uint32 output2 = (uint32) (output - 100000000 * q);
692  const uint32 c = output2 % 10000;
693 
694  output = q;
695  output2 /= 10000;
696 
697  const uint32 d = output2 % 10000;
698  const uint32 c0 = (c % 100) << 1;
699  const uint32 c1 = (c / 100) << 1;
700  const uint32 d0 = (d % 100) << 1;
701  const uint32 d1 = (d / 100) << 1;
702 
703  memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
704  memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
705  memcpy(result + index + olength - i - 6, DIGIT_TABLE + d0, 2);
706  memcpy(result + index + olength - i - 8, DIGIT_TABLE + d1, 2);
707  i += 8;
708  }
709 
710  uint32 output2 = (uint32) output;
711 
712  while (output2 >= 10000)
713  {
714  const uint32 c = output2 - 10000 * (output2 / 10000);
715  const uint32 c0 = (c % 100) << 1;
716  const uint32 c1 = (c / 100) << 1;
717 
718  output2 /= 10000;
719  memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
720  memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
721  i += 4;
722  }
723  if (output2 >= 100)
724  {
725  const uint32 c = (output2 % 100) << 1;
726 
727  output2 /= 100;
728  memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
729  i += 2;
730  }
731  if (output2 >= 10)
732  {
733  const uint32 c = output2 << 1;
734 
735  memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
736  }
737  else
738  {
739  result[index] = (char) ('0' + output2);
740  }
741 
742  if (index == 1)
743  {
744  /*
745  * nexp is 1..15 here, representing the number of digits before the
746  * point. A value of 16 is not possible because we switch to
747  * scientific notation when the display exponent reaches 15.
748  */
749  Assert(nexp < 16);
750  /* gcc only seems to want to optimize memmove for small 2^n */
751  if (nexp & 8)
752  {
753  memmove(result + index - 1, result + index, 8);
754  index += 8;
755  }
756  if (nexp & 4)
757  {
758  memmove(result + index - 1, result + index, 4);
759  index += 4;
760  }
761  if (nexp & 2)
762  {
763  memmove(result + index - 1, result + index, 2);
764  index += 2;
765  }
766  if (nexp & 1)
767  {
768  result[index - 1] = result[index];
769  }
770  result[nexp] = '.';
771  index = olength + 1;
772  }
773  else if (exp >= 0)
774  {
775  /* we supplied the trailing zeros earlier, now just set the length. */
776  index = olength + exp;
777  }
778  else
779  {
780  index = olength + (2 - nexp);
781  }
782 
783  return index;
784 }
785 
786 static inline int
787 to_chars(floating_decimal_64 v, const bool sign, char *const result)
788 {
789  /* Step 5: Print the decimal representation. */
790  int index = 0;
791 
792  uint64 output = v.mantissa;
793  uint32 olength = decimalLength(output);
794  int32 exp = v.exponent + olength - 1;
795 
796  if (sign)
797  {
798  result[index++] = '-';
799  }
800 
801  /*
802  * The thresholds for fixed-point output are chosen to match printf
803  * defaults. Beware that both the code of to_chars_df and the value of
804  * DOUBLE_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
805  */
806  if (exp >= -4 && exp < 15)
807  return to_chars_df(v, olength, result + index) + sign;
808 
809  /*
810  * If v.exponent is exactly 0, we might have reached here via the small
811  * integer fast path, in which case v.mantissa might contain trailing
812  * (decimal) zeros. For scientific notation we need to move these zeros
813  * into the exponent. (For fixed point this doesn't matter, which is why
814  * we do this here rather than above.)
815  *
816  * Since we already calculated the display exponent (exp) above based on
817  * the old decimal length, that value does not change here. Instead, we
818  * just reduce the display length for each digit removed.
819  *
820  * If we didn't get here via the fast path, the raw exponent will not
821  * usually be 0, and there will be no trailing zeros, so we pay no more
822  * than one div10/multiply extra cost. We claw back half of that by
823  * checking for divisibility by 2 before dividing by 10.
824  */
825  if (v.exponent == 0)
826  {
827  while ((output & 1) == 0)
828  {
829  const uint64 q = div10(output);
830  const uint32 r = (uint32) (output - 10 * q);
831 
832  if (r != 0)
833  break;
834  output = q;
835  --olength;
836  }
837  }
838 
839  /*----
840  * Print the decimal digits.
841  *
842  * The following code is equivalent to:
843  *
844  * for (uint32 i = 0; i < olength - 1; ++i) {
845  * const uint32 c = output % 10; output /= 10;
846  * result[index + olength - i] = (char) ('0' + c);
847  * }
848  * result[index] = '0' + output % 10;
849  *----
850  */
851 
852  uint32 i = 0;
853 
854  /*
855  * We prefer 32-bit operations, even on 64-bit platforms. We have at most
856  * 17 digits, and uint32 can store 9 digits. If output doesn't fit into
857  * uint32, we cut off 8 digits, so the rest will fit into uint32.
858  */
859  if ((output >> 32) != 0)
860  {
861  /* Expensive 64-bit division. */
862  const uint64 q = div1e8(output);
863  uint32 output2 = (uint32) (output - 100000000 * q);
864 
865  output = q;
866 
867  const uint32 c = output2 % 10000;
868 
869  output2 /= 10000;
870 
871  const uint32 d = output2 % 10000;
872  const uint32 c0 = (c % 100) << 1;
873  const uint32 c1 = (c / 100) << 1;
874  const uint32 d0 = (d % 100) << 1;
875  const uint32 d1 = (d / 100) << 1;
876 
877  memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
878  memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
879  memcpy(result + index + olength - i - 5, DIGIT_TABLE + d0, 2);
880  memcpy(result + index + olength - i - 7, DIGIT_TABLE + d1, 2);
881  i += 8;
882  }
883 
884  uint32 output2 = (uint32) output;
885 
886  while (output2 >= 10000)
887  {
888  const uint32 c = output2 - 10000 * (output2 / 10000);
889 
890  output2 /= 10000;
891 
892  const uint32 c0 = (c % 100) << 1;
893  const uint32 c1 = (c / 100) << 1;
894 
895  memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
896  memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
897  i += 4;
898  }
899  if (output2 >= 100)
900  {
901  const uint32 c = (output2 % 100) << 1;
902 
903  output2 /= 100;
904  memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2);
905  i += 2;
906  }
907  if (output2 >= 10)
908  {
909  const uint32 c = output2 << 1;
910 
911  /*
912  * We can't use memcpy here: the decimal dot goes between these two
913  * digits.
914  */
915  result[index + olength - i] = DIGIT_TABLE[c + 1];
916  result[index] = DIGIT_TABLE[c];
917  }
918  else
919  {
920  result[index] = (char) ('0' + output2);
921  }
922 
923  /* Print decimal point if needed. */
924  if (olength > 1)
925  {
926  result[index + 1] = '.';
927  index += olength + 1;
928  }
929  else
930  {
931  ++index;
932  }
933 
934  /* Print the exponent. */
935  result[index++] = 'e';
936  if (exp < 0)
937  {
938  result[index++] = '-';
939  exp = -exp;
940  }
941  else
942  result[index++] = '+';
943 
944  if (exp >= 100)
945  {
946  const int32 c = exp % 10;
947 
948  memcpy(result + index, DIGIT_TABLE + 2 * (exp / 10), 2);
949  result[index + 2] = (char) ('0' + c);
950  index += 3;
951  }
952  else
953  {
954  memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
955  index += 2;
956  }
957 
958  return index;
959 }
960 
961 static inline bool
962 d2d_small_int(const uint64 ieeeMantissa,
963  const uint32 ieeeExponent,
965 {
966  const int32 e2 = (int32) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
967 
968  /*
969  * Avoid using multiple "return false;" here since it tends to provoke the
970  * compiler into inlining multiple copies of d2d, which is undesirable.
971  */
972 
973  if (e2 >= -DOUBLE_MANTISSA_BITS && e2 <= 0)
974  {
975  /*----
976  * Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52:
977  * 1 <= f = m2 / 2^-e2 < 2^53.
978  *
979  * Test if the lower -e2 bits of the significand are 0, i.e. whether
980  * the fraction is 0. We can use ieeeMantissa here, since the implied
981  * 1 bit can never be tested by this; the implied 1 can only be part
982  * of a fraction if e2 < -DOUBLE_MANTISSA_BITS which we already
983  * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -53)
984  */
985  const uint64 mask = (UINT64CONST(1) << -e2) - 1;
986  const uint64 fraction = ieeeMantissa & mask;
987 
988  if (fraction == 0)
989  {
990  /*----
991  * f is an integer in the range [1, 2^53).
992  * Note: mantissa might contain trailing (decimal) 0's.
993  * Note: since 2^53 < 10^16, there is no need to adjust
994  * decimalLength().
995  */
996  const uint64 m2 = (UINT64CONST(1) << DOUBLE_MANTISSA_BITS) | ieeeMantissa;
997 
998  v->mantissa = m2 >> -e2;
999  v->exponent = 0;
1000  return true;
1001  }
1002  }
1003 
1004  return false;
1005 }
1006 
1007 /*
1008  * Store the shortest decimal representation of the given double as an
1009  * UNTERMINATED string in the caller's supplied buffer (which must be at least
1010  * DOUBLE_SHORTEST_DECIMAL_LEN-1 bytes long).
1011  *
1012  * Returns the number of bytes stored.
1013  */
1014 int
1015 double_to_shortest_decimal_bufn(double f, char *result)
1016 {
1017  /*
1018  * Step 1: Decode the floating-point number, and unify normalized and
1019  * subnormal cases.
1020  */
1021  const uint64 bits = double_to_bits(f);
1022 
1023  /* Decode bits into sign, mantissa, and exponent. */
1024  const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
1025  const uint64 ieeeMantissa = bits & ((UINT64CONST(1) << DOUBLE_MANTISSA_BITS) - 1);
1026  const uint32 ieeeExponent = (uint32) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
1027 
1028  /* Case distinction; exit early for the easy cases. */
1029  if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0))
1030  {
1031  return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0));
1032  }
1033 
1035  const bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, &v);
1036 
1037  if (!isSmallInt)
1038  {
1039  v = d2d(ieeeMantissa, ieeeExponent);
1040  }
1041 
1042  return to_chars(v, ieeeSign, result);
1043 }
1044 
1045 /*
1046  * Store the shortest decimal representation of the given double as a
1047  * null-terminated string in the caller's supplied buffer (which must be at
1048  * least DOUBLE_SHORTEST_DECIMAL_LEN bytes long).
1049  *
1050  * Returns the string length.
1051  */
1052 int
1053 double_to_shortest_decimal_buf(double f, char *result)
1054 {
1055  const int index = double_to_shortest_decimal_bufn(f, result);
1056 
1057  /* Terminate the string. */
1059  result[index] = '\0';
1060  return index;
1061 }
1062 
1063 /*
1064  * Return the shortest decimal representation as a null-terminated palloc'd
1065  * string (outside the backend, uses malloc() instead).
1066  *
1067  * Caller is responsible for freeing the result.
1068  */
1069 char *
1071 {
1072  char *const result = (char *) palloc(DOUBLE_SHORTEST_DECIMAL_LEN);
1073 
1074  double_to_shortest_decimal_buf(f, result);
1075  return result;
1076 }
unsigned int uint32
Definition: c.h:493
signed int int32
Definition: c.h:481
unsigned char uint8
Definition: c.h:491
char * double_to_shortest_decimal(double f)
Definition: d2s.c:1070
int double_to_shortest_decimal_bufn(double f, char *result)
Definition: d2s.c:1015
#define DOUBLE_POW5_BITCOUNT
Definition: d2s.c:70
static int to_chars_df(const floating_decimal_64 v, const uint32 olength, char *const result)
Definition: d2s.c:631
#define DOUBLE_BIAS
Definition: d2s.c:67
#define DOUBLE_POW5_INV_BITCOUNT
Definition: d2s.c:69
#define DOUBLE_EXPONENT_BITS
Definition: d2s.c:66
static floating_decimal_64 d2d(const uint64 ieeeMantissa, const uint32 ieeeExponent)
Definition: d2s.c:346
static uint64 mulShiftAll(uint64 m, const uint64 *const mul, const int32 j, uint64 *const vp, uint64 *const vm, const uint32 mmShift)
Definition: d2s.c:220
static bool multipleOfPowerOf5(const uint64 value, const uint32 p)
Definition: d2s.c:95
struct floating_decimal_64 floating_decimal_64
#define DOUBLE_MANTISSA_BITS
Definition: d2s.c:65
static bool multipleOfPowerOf2(const uint64 value, const uint32 p)
Definition: d2s.c:106
static uint32 pow5Factor(uint64 value)
Definition: d2s.c:74
static uint32 decimalLength(const uint64 v)
Definition: d2s.c:264
static bool d2d_small_int(const uint64 ieeeMantissa, const uint32 ieeeExponent, floating_decimal_64 *v)
Definition: d2s.c:962
int double_to_shortest_decimal_buf(double f, char *result)
Definition: d2s.c:1053
static int to_chars(floating_decimal_64 v, const bool sign, char *const result)
Definition: d2s.c:787
static const uint64 DOUBLE_POW5_SPLIT[326][2]
static const uint64 DOUBLE_POW5_INV_SPLIT[292][2]
static uint64 div1e8(const uint64 x)
static uint64 div10(const uint64 x)
static uint64 umul128(const uint64 a, const uint64 b, uint64 *const productHi)
static uint64 div5(const uint64 x)
static uint64 shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
static uint64 div100(const uint64 x)
static uint32 mulShift(const uint32 m, const uint64 factor, const int32 shift)
Definition: f2s.c:120
FILE * output
static struct @154 value
char sign
Definition: informix.c:674
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1304
static const char DIGIT_TABLE[200]
Definition: numutils.c:28
char * c
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static uint64 double_to_bits(const double d)
Definition: ryu_common.h:125
static uint32 pow5bits(const int32 e)
Definition: ryu_common.h:54
static int32 log10Pow5(const int32 e)
Definition: ryu_common.h:83
static int copy_special_str(char *const result, const bool sign, const bool exponent, const bool mantissa)
Definition: ryu_common.h:95
static int32 log10Pow2(const int32 e)
Definition: ryu_common.h:70
#define DOUBLE_SHORTEST_DECIMAL_LEN
Definition: shortest_dec.h:44
int32 exponent
Definition: d2s.c:342
uint64 mantissa
Definition: d2s.c:341
Definition: type.h:95