PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for roman number to standard number conversion
53  * - add support for number spelling
54  * - add support for string to string formatting (we must be better
55  * than Oracle :-),
56  * to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 
73 /*
74  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75  * declare them in <wchar.h>.
76  */
77 #ifdef HAVE_WCHAR_H
78 #include <wchar.h>
79 #endif
80 #ifdef HAVE_WCTYPE_H
81 #include <wctype.h>
82 #endif
83 
84 #ifdef USE_ICU
85 #include <unicode/ustring.h>
86 #endif
87 
88 #include "catalog/pg_collation.h"
89 #include "mb/pg_wchar.h"
90 #include "utils/builtins.h"
91 #include "utils/date.h"
92 #include "utils/datetime.h"
93 #include "utils/float.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/memutils.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99 
100 /* ----------
101  * Routines type
102  * ----------
103  */
104 #define DCH_TYPE 1 /* DATE-TIME version */
105 #define NUM_TYPE 2 /* NUMBER version */
106 
107 /* ----------
108  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
109  * ----------
110  */
111 #define KeyWord_INDEX_SIZE ('~' - ' ')
112 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
113 
114 /* ----------
115  * Maximal length of one node
116  * ----------
117  */
118 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
119 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
120 
121 
122 /* ----------
123  * Format parser structs
124  * ----------
125  */
126 typedef struct
127 {
128  const char *name; /* suffix string */
129  int len, /* suffix length */
130  id, /* used in node->suffix */
131  type; /* prefix / postfix */
132 } KeySuffix;
133 
134 /* ----------
135  * FromCharDateMode
136  * ----------
137  *
138  * This value is used to nominate one of several distinct (and mutually
139  * exclusive) date conventions that a keyword can belong to.
140  */
141 typedef enum
142 {
143  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
144  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
145  FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
147 
148 typedef struct
149 {
150  const char *name;
151  int len;
152  int id;
153  bool is_digit;
155 } KeyWord;
156 
157 typedef struct
158 {
159  uint8 type; /* NODE_TYPE_XXX, see below */
160  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
161  uint8 suffix; /* keyword prefix/suffix code, if any */
162  const KeyWord *key; /* if type is ACTION */
163 } FormatNode;
164 
165 #define NODE_TYPE_END 1
166 #define NODE_TYPE_ACTION 2
167 #define NODE_TYPE_CHAR 3
168 #define NODE_TYPE_SEPARATOR 4
169 #define NODE_TYPE_SPACE 5
170 
171 #define SUFFTYPE_PREFIX 1
172 #define SUFFTYPE_POSTFIX 2
173 
174 #define CLOCK_24_HOUR 0
175 #define CLOCK_12_HOUR 1
176 
177 
178 /* ----------
179  * Full months
180  * ----------
181  */
182 static const char *const months_full[] = {
183  "January", "February", "March", "April", "May", "June", "July",
184  "August", "September", "October", "November", "December", NULL
185 };
186 
187 static const char *const days_short[] = {
188  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
189 };
190 
191 /* ----------
192  * AD / BC
193  * ----------
194  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
195  * positive and map year == -1 to year zero, and shift all negative
196  * years up one. For interval years, we just return the year.
197  */
198 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
199 
200 #define A_D_STR "A.D."
201 #define a_d_STR "a.d."
202 #define AD_STR "AD"
203 #define ad_STR "ad"
204 
205 #define B_C_STR "B.C."
206 #define b_c_STR "b.c."
207 #define BC_STR "BC"
208 #define bc_STR "bc"
209 
210 /*
211  * AD / BC strings for seq_search.
212  *
213  * These are given in two variants, a long form with periods and a standard
214  * form without.
215  *
216  * The array is laid out such that matches for AD have an even index, and
217  * matches for BC have an odd index. So the boolean value for BC is given by
218  * taking the array index of the match, modulo 2.
219  */
220 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
221 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
222 
223 /* ----------
224  * AM / PM
225  * ----------
226  */
227 #define A_M_STR "A.M."
228 #define a_m_STR "a.m."
229 #define AM_STR "AM"
230 #define am_STR "am"
231 
232 #define P_M_STR "P.M."
233 #define p_m_STR "p.m."
234 #define PM_STR "PM"
235 #define pm_STR "pm"
236 
237 /*
238  * AM / PM strings for seq_search.
239  *
240  * These are given in two variants, a long form with periods and a standard
241  * form without.
242  *
243  * The array is laid out such that matches for AM have an even index, and
244  * matches for PM have an odd index. So the boolean value for PM is given by
245  * taking the array index of the match, modulo 2.
246  */
247 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
248 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
249 
250 /* ----------
251  * Months in roman-numeral
252  * (Must be in reverse order for seq_search (in FROM_CHAR), because
253  * 'VIII' must have higher precedence than 'V')
254  * ----------
255  */
256 static const char *const rm_months_upper[] =
257 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
258 
259 static const char *const rm_months_lower[] =
260 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
261 
262 /* ----------
263  * Roman numbers
264  * ----------
265  */
266 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
267 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
268 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
269 
270 /* ----------
271  * Ordinal postfixes
272  * ----------
273  */
274 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
275 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
276 
277 /* ----------
278  * Flags & Options:
279  * ----------
280  */
281 #define ONE_UPPER 1 /* Name */
282 #define ALL_UPPER 2 /* NAME */
283 #define ALL_LOWER 3 /* name */
284 
285 #define MAX_MONTH_LEN 9
286 #define MAX_MON_LEN 3
287 #define MAX_DAY_LEN 9
288 #define MAX_DY_LEN 3
289 #define MAX_RM_LEN 4
290 
291 #define TH_UPPER 1
292 #define TH_LOWER 2
293 
294 /* ----------
295  * Number description struct
296  * ----------
297  */
298 typedef struct
299 {
300  int pre, /* (count) numbers before decimal */
301  post, /* (count) numbers after decimal */
302  lsign, /* want locales sign */
303  flag, /* number parameters */
304  pre_lsign_num, /* tmp value for lsign */
305  multi, /* multiplier for 'V' */
306  zero_start, /* position of first zero */
307  zero_end, /* position of last zero */
308  need_locale; /* needs it locale */
309 } NUMDesc;
310 
311 /* ----------
312  * Flags for NUMBER version
313  * ----------
314  */
315 #define NUM_F_DECIMAL (1 << 1)
316 #define NUM_F_LDECIMAL (1 << 2)
317 #define NUM_F_ZERO (1 << 3)
318 #define NUM_F_BLANK (1 << 4)
319 #define NUM_F_FILLMODE (1 << 5)
320 #define NUM_F_LSIGN (1 << 6)
321 #define NUM_F_BRACKET (1 << 7)
322 #define NUM_F_MINUS (1 << 8)
323 #define NUM_F_PLUS (1 << 9)
324 #define NUM_F_ROMAN (1 << 10)
325 #define NUM_F_MULTI (1 << 11)
326 #define NUM_F_PLUS_POST (1 << 12)
327 #define NUM_F_MINUS_POST (1 << 13)
328 #define NUM_F_EEEE (1 << 14)
329 
330 #define NUM_LSIGN_PRE (-1)
331 #define NUM_LSIGN_POST 1
332 #define NUM_LSIGN_NONE 0
333 
334 /* ----------
335  * Tests
336  * ----------
337  */
338 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
339 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
340 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
341 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
342 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
343 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
344 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
345 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
346 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
347 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
348 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
349 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
350 
351 /* ----------
352  * Format picture cache
353  *
354  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
355  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
356  *
357  * For simplicity, the cache entries are fixed-size, so they allow for the
358  * worst case of a FormatNode for each byte in the picture string.
359  *
360  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
361  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
362  * we don't waste too much space by palloc'ing them individually. Be sure
363  * to adjust those macros if you add fields to those structs.
364  *
365  * The max number of entries in each cache is DCH_CACHE_ENTRIES
366  * resp. NUM_CACHE_ENTRIES.
367  * ----------
368  */
369 #define DCH_CACHE_OVERHEAD \
370  MAXALIGN(sizeof(bool) + sizeof(int))
371 #define NUM_CACHE_OVERHEAD \
372  MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
373 
374 #define DCH_CACHE_SIZE \
375  ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
376 #define NUM_CACHE_SIZE \
377  ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
378 
379 #define DCH_CACHE_ENTRIES 20
380 #define NUM_CACHE_ENTRIES 20
381 
382 typedef struct
383 {
385  char str[DCH_CACHE_SIZE + 1];
386  bool valid;
387  int age;
388 } DCHCacheEntry;
389 
390 typedef struct
391 {
393  char str[NUM_CACHE_SIZE + 1];
394  bool valid;
395  int age;
397 } NUMCacheEntry;
398 
399 /* global cache for date/time format pictures */
401 static int n_DCHCache = 0; /* current number of entries */
402 static int DCHCounter = 0; /* aging-event counter */
403 
404 /* global cache for number format pictures */
406 static int n_NUMCache = 0; /* current number of entries */
407 static int NUMCounter = 0; /* aging-event counter */
408 
409 /* ----------
410  * For char->date/time conversion
411  * ----------
412  */
413 typedef struct
414 {
416  int hh,
417  pm,
418  mi,
419  ss,
420  ssss,
421  d, /* stored as 1-7, Sunday = 1, 0 means missing */
422  dd,
423  ddd,
424  mm,
425  ms,
426  year,
427  bc,
428  ww,
429  w,
430  cc,
431  j,
432  us,
433  yysz, /* is it YY or YYYY ? */
434  clock, /* 12 or 24 hour clock? */
435  tzsign, /* +1, -1 or 0 if timezone info is absent */
436  tzh,
437  tzm;
438 } TmFromChar;
439 
440 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
441 
442 /* ----------
443  * Debug
444  * ----------
445  */
446 #ifdef DEBUG_TO_FROM_CHAR
447 #define DEBUG_TMFC(_X) \
448  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
449  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
450  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
451  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
452  (_X)->yysz, (_X)->clock);
453 #define DEBUG_TM(_X) \
454  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
455  (_X)->tm_sec, (_X)->tm_year,\
456  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
457  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
458 #else
459 #define DEBUG_TMFC(_X)
460 #define DEBUG_TM(_X)
461 #endif
462 
463 /* ----------
464  * Datetime to char conversion
465  * ----------
466  */
467 typedef struct TmToChar
468 {
469  struct pg_tm tm; /* classic 'tm' struct */
470  fsec_t fsec; /* fractional seconds */
471  const char *tzn; /* timezone */
472 } TmToChar;
473 
474 #define tmtcTm(_X) (&(_X)->tm)
475 #define tmtcTzn(_X) ((_X)->tzn)
476 #define tmtcFsec(_X) ((_X)->fsec)
477 
478 #define ZERO_tm(_X) \
479 do { \
480  (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
481  (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
482  (_X)->tm_mday = (_X)->tm_mon = 1; \
483  (_X)->tm_zone = NULL; \
484 } while(0)
485 
486 #define ZERO_tmtc(_X) \
487 do { \
488  ZERO_tm( tmtcTm(_X) ); \
489  tmtcFsec(_X) = 0; \
490  tmtcTzn(_X) = NULL; \
491 } while(0)
492 
493 /*
494  * to_char(time) appears to to_char() as an interval, so this check
495  * is really for interval and time data types.
496  */
497 #define INVALID_FOR_INTERVAL \
498 do { \
499  if (is_interval) \
500  ereport(ERROR, \
501  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
502  errmsg("invalid format specification for an interval value"), \
503  errhint("Intervals are not tied to specific calendar dates."))); \
504 } while(0)
505 
506 /*****************************************************************************
507  * KeyWord definitions
508  *****************************************************************************/
509 
510 /* ----------
511  * Suffixes (FormatNode.suffix is an OR of these codes)
512  * ----------
513  */
514 #define DCH_S_FM 0x01
515 #define DCH_S_TH 0x02
516 #define DCH_S_th 0x04
517 #define DCH_S_SP 0x08
518 #define DCH_S_TM 0x10
519 
520 /* ----------
521  * Suffix tests
522  * ----------
523  */
524 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
525 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
526 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
527 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
528 
529 /* Oracle toggles FM behavior, we don't; see docs. */
530 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
531 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
532 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
533 
534 /* ----------
535  * Suffixes definition for DATE-TIME TO/FROM CHAR
536  * ----------
537  */
538 #define TM_SUFFIX_LEN 2
539 
540 static const KeySuffix DCH_suff[] = {
541  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
542  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
543  {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
544  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
545  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
546  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
547  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
548  /* last */
549  {NULL, 0, 0, 0}
550 };
551 
552 
553 /* ----------
554  * Format-pictures (KeyWord).
555  *
556  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
557  * complicated -to-> easy:
558  *
559  * (example: "DDD","DD","Day","D" )
560  *
561  * (this specific sort needs the algorithm for sequential search for strings,
562  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
563  * or "HH12"? You must first try "HH12", because "HH" is in string, but
564  * it is not good.
565  *
566  * (!)
567  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
568  * (!)
569  *
570  * For fast search is used the 'int index[]', index is ascii table from position
571  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
572  * position or -1 if char is not used in the KeyWord. Search example for
573  * string "MM":
574  * 1) see in index to index['M' - 32],
575  * 2) take keywords position (enum DCH_MI) from index
576  * 3) run sequential search in keywords[] from this position
577  *
578  * ----------
579  */
580 
581 typedef enum
582 {
597  DCH_FX, /* global suffix */
678 
679  /* last */
681 } DCH_poz;
682 
683 typedef enum
684 {
721 
722  /* last */
724 } NUM_poz;
725 
726 /* ----------
727  * KeyWords for DATE-TIME version
728  * ----------
729  */
730 static const KeyWord DCH_keywords[] = {
731 /* name, len, id, is_digit, date_mode */
732  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
733  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
734  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
735  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
736  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
737  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
738  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
739  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
740  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
741  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
742  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
743  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
744  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
745  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
746  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* F */
747  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
748  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
749  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
750  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
751  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
752  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
753  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
754  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
755  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
756  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
757  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
758  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
759  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
760  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
761  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
762  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
763  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
764  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
765  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
766  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
767  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
768  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
769  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
770  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
771  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
772  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
773  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
774  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
775  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
776  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
777  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
778  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
779  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
780  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
781  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
782  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
783  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
784  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
785  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
786  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
787  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
788  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
789  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
790  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
791  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
792  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
793  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
794  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
795  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* f */
796  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
797  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
798  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
799  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
800  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
801  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
802  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
803  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
804  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
805  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
806  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
807  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
808  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
809  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
810  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
811  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
812  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
813  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
814  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
815  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
816  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
817  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
818  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
819  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
820  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
821  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
822  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
823  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
824  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
825  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
826  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
827 
828  /* last */
829  {NULL, 0, 0, 0, 0}
830 };
831 
832 /* ----------
833  * KeyWords for NUMBER version
834  *
835  * The is_digit and date_mode fields are not relevant here.
836  * ----------
837  */
838 static const KeyWord NUM_keywords[] = {
839 /* name, len, id is in Index */
840  {",", 1, NUM_COMMA}, /* , */
841  {".", 1, NUM_DEC}, /* . */
842  {"0", 1, NUM_0}, /* 0 */
843  {"9", 1, NUM_9}, /* 9 */
844  {"B", 1, NUM_B}, /* B */
845  {"C", 1, NUM_C}, /* C */
846  {"D", 1, NUM_D}, /* D */
847  {"EEEE", 4, NUM_E}, /* E */
848  {"FM", 2, NUM_FM}, /* F */
849  {"G", 1, NUM_G}, /* G */
850  {"L", 1, NUM_L}, /* L */
851  {"MI", 2, NUM_MI}, /* M */
852  {"PL", 2, NUM_PL}, /* P */
853  {"PR", 2, NUM_PR},
854  {"RN", 2, NUM_RN}, /* R */
855  {"SG", 2, NUM_SG}, /* S */
856  {"SP", 2, NUM_SP},
857  {"S", 1, NUM_S},
858  {"TH", 2, NUM_TH}, /* T */
859  {"V", 1, NUM_V}, /* V */
860  {"b", 1, NUM_B}, /* b */
861  {"c", 1, NUM_C}, /* c */
862  {"d", 1, NUM_D}, /* d */
863  {"eeee", 4, NUM_E}, /* e */
864  {"fm", 2, NUM_FM}, /* f */
865  {"g", 1, NUM_G}, /* g */
866  {"l", 1, NUM_L}, /* l */
867  {"mi", 2, NUM_MI}, /* m */
868  {"pl", 2, NUM_PL}, /* p */
869  {"pr", 2, NUM_PR},
870  {"rn", 2, NUM_rn}, /* r */
871  {"sg", 2, NUM_SG}, /* s */
872  {"sp", 2, NUM_SP},
873  {"s", 1, NUM_S},
874  {"th", 2, NUM_th}, /* t */
875  {"v", 1, NUM_V}, /* v */
876 
877  /* last */
878  {NULL, 0, 0}
879 };
880 
881 
882 /* ----------
883  * KeyWords index for DATE-TIME version
884  * ----------
885  */
886 static const int DCH_index[KeyWord_INDEX_SIZE] = {
887 /*
888 0 1 2 3 4 5 6 7 8 9
889 */
890  /*---- first 0..31 chars are skipped ----*/
891 
892  -1, -1, -1, -1, -1, -1, -1, -1,
893  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
894  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
895  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
896  DCH_FX, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
898  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
899  DCH_day, -1, DCH_fx, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
900  -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_ssss, DCH_tz, DCH_us, -1, DCH_ww,
901  -1, DCH_y_yyy, -1, -1, -1, -1
902 
903  /*---- chars over 126 are skipped ----*/
904 };
905 
906 /* ----------
907  * KeyWords index for NUMBER version
908  * ----------
909  */
910 static const int NUM_index[KeyWord_INDEX_SIZE] = {
911 /*
912 0 1 2 3 4 5 6 7 8 9
913 */
914  /*---- first 0..31 chars are skipped ----*/
915 
916  -1, -1, -1, -1, -1, -1, -1, -1,
917  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
918  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
919  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
920  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
921  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
922  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
923  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
924  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
925  -1, -1, -1, -1, -1, -1
926 
927  /*---- chars over 126 are skipped ----*/
928 };
929 
930 /* ----------
931  * Number processor struct
932  * ----------
933  */
934 typedef struct NUMProc
935 {
937  NUMDesc *Num; /* number description */
938 
939  int sign, /* '-' or '+' */
940  sign_wrote, /* was sign write */
941  num_count, /* number of write digits */
942  num_in, /* is inside number */
943  num_curr, /* current position in number */
944  out_pre_spaces, /* spaces before first digit */
945 
946  read_dec, /* to_number - was read dec. point */
947  read_post, /* to_number - number of dec. digit */
948  read_pre; /* to_number - number non-dec. digit */
949 
950  char *number, /* string with number */
951  *number_p, /* pointer to current number position */
952  *inout, /* in / out buffer */
953  *inout_p, /* pointer to current inout position */
954  *last_relevant, /* last relevant number after decimal point */
955 
956  *L_negative_sign, /* Locale */
957  *L_positive_sign,
958  *decimal,
959  *L_thousands_sep,
960  *L_currency_symbol;
961 } NUMProc;
962 
963 
964 /* ----------
965  * Functions
966  * ----------
967  */
968 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
969  const int *index);
970 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
971 static bool is_separator_char(const char *str);
972 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
973 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
974  const KeySuffix *suf, const int *index, int ver, NUMDesc *Num);
975 
976 static void DCH_to_char(FormatNode *node, bool is_interval,
977  TmToChar *in, char *out, Oid collid);
978 static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out);
979 
980 #ifdef DEBUG_TO_FROM_CHAR
981 static void dump_index(const KeyWord *k, const int *index);
982 static void dump_node(FormatNode *node, int max);
983 #endif
984 
985 static const char *get_th(char *num, int type);
986 static char *str_numth(char *dest, char *num, int type);
987 static int adjust_partial_year_to_2020(int year);
988 static int strspace_len(char *str);
989 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode);
990 static void from_char_set_int(int *dest, const int value, const FormatNode *node);
991 static int from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node);
992 static int from_char_parse_int(int *dest, char **src, FormatNode *node);
993 static int seq_search(char *name, const char *const *array, int type, int max, int *len);
994 static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node);
995 static void do_to_timestamp(text *date_txt, text *fmt,
996  struct pg_tm *tm, fsec_t *fsec);
997 static char *fill_str(char *str, int c, int max);
998 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
999 static char *int_to_roman(int number);
1000 static void NUM_prepare_locale(NUMProc *Np);
1001 static char *get_last_relevant_decnum(char *num);
1002 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1003 static void NUM_numpart_to_char(NUMProc *Np, int id);
1004 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1005  char *number, int input_len, int to_char_out_pre_spaces,
1006  int sign, bool is_to_char, Oid collid);
1007 static DCHCacheEntry *DCH_cache_getnew(const char *str);
1008 static DCHCacheEntry *DCH_cache_search(const char *str);
1009 static DCHCacheEntry *DCH_cache_fetch(const char *str);
1010 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1011 static NUMCacheEntry *NUM_cache_search(const char *str);
1012 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1013 
1014 
1015 /* ----------
1016  * Fast sequential search, use index for data selection which
1017  * go to seq. cycle (it is very fast for unwanted strings)
1018  * (can't be used binary search in format parsing)
1019  * ----------
1020  */
1021 static const KeyWord *
1022 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1023 {
1024  int poz;
1025 
1026  if (!KeyWord_INDEX_FILTER(*str))
1027  return NULL;
1028 
1029  if ((poz = *(index + (*str - ' '))) > -1)
1030  {
1031  const KeyWord *k = kw + poz;
1032 
1033  do
1034  {
1035  if (strncmp(str, k->name, k->len) == 0)
1036  return k;
1037  k++;
1038  if (!k->name)
1039  return NULL;
1040  } while (*str == *k->name);
1041  }
1042  return NULL;
1043 }
1044 
1045 static const KeySuffix *
1046 suff_search(const char *str, const KeySuffix *suf, int type)
1047 {
1048  const KeySuffix *s;
1049 
1050  for (s = suf; s->name != NULL; s++)
1051  {
1052  if (s->type != type)
1053  continue;
1054 
1055  if (strncmp(str, s->name, s->len) == 0)
1056  return s;
1057  }
1058  return NULL;
1059 }
1060 
1061 static bool
1063 {
1064  /* ASCII printable character, but not letter or digit */
1065  return (*str > 0x20 && *str < 0x7F &&
1066  !(*str >= 'A' && *str <= 'Z') &&
1067  !(*str >= 'a' && *str <= 'z') &&
1068  !(*str >= '0' && *str <= '9'));
1069 }
1070 
1071 /* ----------
1072  * Prepare NUMDesc (number description struct) via FormatNode struct
1073  * ----------
1074  */
1075 static void
1077 {
1078  if (n->type != NODE_TYPE_ACTION)
1079  return;
1080 
1081  if (IS_EEEE(num) && n->key->id != NUM_E)
1082  ereport(ERROR,
1083  (errcode(ERRCODE_SYNTAX_ERROR),
1084  errmsg("\"EEEE\" must be the last pattern used")));
1085 
1086  switch (n->key->id)
1087  {
1088  case NUM_9:
1089  if (IS_BRACKET(num))
1090  ereport(ERROR,
1091  (errcode(ERRCODE_SYNTAX_ERROR),
1092  errmsg("\"9\" must be ahead of \"PR\"")));
1093  if (IS_MULTI(num))
1094  {
1095  ++num->multi;
1096  break;
1097  }
1098  if (IS_DECIMAL(num))
1099  ++num->post;
1100  else
1101  ++num->pre;
1102  break;
1103 
1104  case NUM_0:
1105  if (IS_BRACKET(num))
1106  ereport(ERROR,
1107  (errcode(ERRCODE_SYNTAX_ERROR),
1108  errmsg("\"0\" must be ahead of \"PR\"")));
1109  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1110  {
1111  num->flag |= NUM_F_ZERO;
1112  num->zero_start = num->pre + 1;
1113  }
1114  if (!IS_DECIMAL(num))
1115  ++num->pre;
1116  else
1117  ++num->post;
1118 
1119  num->zero_end = num->pre + num->post;
1120  break;
1121 
1122  case NUM_B:
1123  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1124  num->flag |= NUM_F_BLANK;
1125  break;
1126 
1127  case NUM_D:
1128  num->flag |= NUM_F_LDECIMAL;
1129  num->need_locale = true;
1130  /* FALLTHROUGH */
1131  case NUM_DEC:
1132  if (IS_DECIMAL(num))
1133  ereport(ERROR,
1134  (errcode(ERRCODE_SYNTAX_ERROR),
1135  errmsg("multiple decimal points")));
1136  if (IS_MULTI(num))
1137  ereport(ERROR,
1138  (errcode(ERRCODE_SYNTAX_ERROR),
1139  errmsg("cannot use \"V\" and decimal point together")));
1140  num->flag |= NUM_F_DECIMAL;
1141  break;
1142 
1143  case NUM_FM:
1144  num->flag |= NUM_F_FILLMODE;
1145  break;
1146 
1147  case NUM_S:
1148  if (IS_LSIGN(num))
1149  ereport(ERROR,
1150  (errcode(ERRCODE_SYNTAX_ERROR),
1151  errmsg("cannot use \"S\" twice")));
1152  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1153  ereport(ERROR,
1154  (errcode(ERRCODE_SYNTAX_ERROR),
1155  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1156  if (!IS_DECIMAL(num))
1157  {
1158  num->lsign = NUM_LSIGN_PRE;
1159  num->pre_lsign_num = num->pre;
1160  num->need_locale = true;
1161  num->flag |= NUM_F_LSIGN;
1162  }
1163  else if (num->lsign == NUM_LSIGN_NONE)
1164  {
1165  num->lsign = NUM_LSIGN_POST;
1166  num->need_locale = true;
1167  num->flag |= NUM_F_LSIGN;
1168  }
1169  break;
1170 
1171  case NUM_MI:
1172  if (IS_LSIGN(num))
1173  ereport(ERROR,
1174  (errcode(ERRCODE_SYNTAX_ERROR),
1175  errmsg("cannot use \"S\" and \"MI\" together")));
1176  num->flag |= NUM_F_MINUS;
1177  if (IS_DECIMAL(num))
1178  num->flag |= NUM_F_MINUS_POST;
1179  break;
1180 
1181  case NUM_PL:
1182  if (IS_LSIGN(num))
1183  ereport(ERROR,
1184  (errcode(ERRCODE_SYNTAX_ERROR),
1185  errmsg("cannot use \"S\" and \"PL\" together")));
1186  num->flag |= NUM_F_PLUS;
1187  if (IS_DECIMAL(num))
1188  num->flag |= NUM_F_PLUS_POST;
1189  break;
1190 
1191  case NUM_SG:
1192  if (IS_LSIGN(num))
1193  ereport(ERROR,
1194  (errcode(ERRCODE_SYNTAX_ERROR),
1195  errmsg("cannot use \"S\" and \"SG\" together")));
1196  num->flag |= NUM_F_MINUS;
1197  num->flag |= NUM_F_PLUS;
1198  break;
1199 
1200  case NUM_PR:
1201  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1202  ereport(ERROR,
1203  (errcode(ERRCODE_SYNTAX_ERROR),
1204  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1205  num->flag |= NUM_F_BRACKET;
1206  break;
1207 
1208  case NUM_rn:
1209  case NUM_RN:
1210  num->flag |= NUM_F_ROMAN;
1211  break;
1212 
1213  case NUM_L:
1214  case NUM_G:
1215  num->need_locale = true;
1216  break;
1217 
1218  case NUM_V:
1219  if (IS_DECIMAL(num))
1220  ereport(ERROR,
1221  (errcode(ERRCODE_SYNTAX_ERROR),
1222  errmsg("cannot use \"V\" and decimal point together")));
1223  num->flag |= NUM_F_MULTI;
1224  break;
1225 
1226  case NUM_E:
1227  if (IS_EEEE(num))
1228  ereport(ERROR,
1229  (errcode(ERRCODE_SYNTAX_ERROR),
1230  errmsg("cannot use \"EEEE\" twice")));
1231  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1232  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1233  IS_ROMAN(num) || IS_MULTI(num))
1234  ereport(ERROR,
1235  (errcode(ERRCODE_SYNTAX_ERROR),
1236  errmsg("\"EEEE\" is incompatible with other formats"),
1237  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1238  num->flag |= NUM_F_EEEE;
1239  break;
1240  }
1241 }
1242 
1243 /* ----------
1244  * Format parser, search small keywords and keyword's suffixes, and make
1245  * format-node tree.
1246  *
1247  * for DATE-TIME & NUMBER version
1248  * ----------
1249  */
1250 static void
1251 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1252  const KeySuffix *suf, const int *index, int ver, NUMDesc *Num)
1253 {
1254  FormatNode *n;
1255 
1256 #ifdef DEBUG_TO_FROM_CHAR
1257  elog(DEBUG_elog_output, "to_char/number(): run parser");
1258 #endif
1259 
1260  n = node;
1261 
1262  while (*str)
1263  {
1264  int suffix = 0;
1265  const KeySuffix *s;
1266 
1267  /*
1268  * Prefix
1269  */
1270  if (ver == DCH_TYPE &&
1271  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1272  {
1273  suffix |= s->id;
1274  if (s->len)
1275  str += s->len;
1276  }
1277 
1278  /*
1279  * Keyword
1280  */
1281  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1282  {
1283  n->type = NODE_TYPE_ACTION;
1284  n->suffix = suffix;
1285  if (n->key->len)
1286  str += n->key->len;
1287 
1288  /*
1289  * NUM version: Prepare global NUMDesc struct
1290  */
1291  if (ver == NUM_TYPE)
1292  NUMDesc_prepare(Num, n);
1293 
1294  /*
1295  * Postfix
1296  */
1297  if (ver == DCH_TYPE && *str &&
1298  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1299  {
1300  n->suffix |= s->id;
1301  if (s->len)
1302  str += s->len;
1303  }
1304 
1305  n++;
1306  }
1307  else if (*str)
1308  {
1309  int chlen;
1310 
1311  /*
1312  * Process double-quoted literal string, if any
1313  */
1314  if (*str == '"')
1315  {
1316  str++;
1317  while (*str)
1318  {
1319  if (*str == '"')
1320  {
1321  str++;
1322  break;
1323  }
1324  /* backslash quotes the next character, if any */
1325  if (*str == '\\' && *(str + 1))
1326  str++;
1327  chlen = pg_mblen(str);
1328  n->type = NODE_TYPE_CHAR;
1329  memcpy(n->character, str, chlen);
1330  n->character[chlen] = '\0';
1331  n->key = NULL;
1332  n->suffix = 0;
1333  n++;
1334  str += chlen;
1335  }
1336  }
1337  else
1338  {
1339  /*
1340  * Outside double-quoted strings, backslash is only special if
1341  * it immediately precedes a double quote.
1342  */
1343  if (*str == '\\' && *(str + 1) == '"')
1344  str++;
1345  chlen = pg_mblen(str);
1346 
1347  if (ver == DCH_TYPE && is_separator_char(str))
1349  else if (isspace((unsigned char) *str))
1350  n->type = NODE_TYPE_SPACE;
1351  else
1352  n->type = NODE_TYPE_CHAR;
1353 
1354  memcpy(n->character, str, chlen);
1355  n->character[chlen] = '\0';
1356  n->key = NULL;
1357  n->suffix = 0;
1358  n++;
1359  str += chlen;
1360  }
1361  }
1362  }
1363 
1364  n->type = NODE_TYPE_END;
1365  n->suffix = 0;
1366 }
1367 
1368 /* ----------
1369  * DEBUG: Dump the FormatNode Tree (debug)
1370  * ----------
1371  */
1372 #ifdef DEBUG_TO_FROM_CHAR
1373 
1374 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1375 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1376 
1377 static void
1378 dump_node(FormatNode *node, int max)
1379 {
1380  FormatNode *n;
1381  int a;
1382 
1383  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1384 
1385  for (a = 0, n = node; a <= max; n++, a++)
1386  {
1387  if (n->type == NODE_TYPE_ACTION)
1388  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1389  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1390  else if (n->type == NODE_TYPE_CHAR)
1391  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1392  a, n->character);
1393  else if (n->type == NODE_TYPE_END)
1394  {
1395  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1396  return;
1397  }
1398  else
1399  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1400  }
1401 }
1402 #endif /* DEBUG */
1403 
1404 /*****************************************************************************
1405  * Private utils
1406  *****************************************************************************/
1407 
1408 /* ----------
1409  * Return ST/ND/RD/TH for simple (1..9) numbers
1410  * type --> 0 upper, 1 lower
1411  * ----------
1412  */
1413 static const char *
1414 get_th(char *num, int type)
1415 {
1416  int len = strlen(num),
1417  last,
1418  seclast;
1419 
1420  last = *(num + (len - 1));
1421  if (!isdigit((unsigned char) last))
1422  ereport(ERROR,
1423  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1424  errmsg("\"%s\" is not a number", num)));
1425 
1426  /*
1427  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1428  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1429  */
1430  if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1431  last = 0;
1432 
1433  switch (last)
1434  {
1435  case '1':
1436  if (type == TH_UPPER)
1437  return numTH[0];
1438  return numth[0];
1439  case '2':
1440  if (type == TH_UPPER)
1441  return numTH[1];
1442  return numth[1];
1443  case '3':
1444  if (type == TH_UPPER)
1445  return numTH[2];
1446  return numth[2];
1447  default:
1448  if (type == TH_UPPER)
1449  return numTH[3];
1450  return numth[3];
1451  }
1452 }
1453 
1454 /* ----------
1455  * Convert string-number to ordinal string-number
1456  * type --> 0 upper, 1 lower
1457  * ----------
1458  */
1459 static char *
1460 str_numth(char *dest, char *num, int type)
1461 {
1462  if (dest != num)
1463  strcpy(dest, num);
1464  strcat(dest, get_th(num, type));
1465  return dest;
1466 }
1467 
1468 /*****************************************************************************
1469  * upper/lower/initcap functions
1470  *****************************************************************************/
1471 
1472 #ifdef USE_ICU
1473 
1474 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1475  const UChar *src, int32_t srcLength,
1476  const char *locale,
1477  UErrorCode *pErrorCode);
1478 
1479 static int32_t
1480 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1481  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1482 {
1483  UErrorCode status;
1484  int32_t len_dest;
1485 
1486  len_dest = len_source; /* try first with same length */
1487  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1488  status = U_ZERO_ERROR;
1489  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1490  mylocale->info.icu.locale, &status);
1491  if (status == U_BUFFER_OVERFLOW_ERROR)
1492  {
1493  /* try again with adjusted length */
1494  pfree(*buff_dest);
1495  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1496  status = U_ZERO_ERROR;
1497  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1498  mylocale->info.icu.locale, &status);
1499  }
1500  if (U_FAILURE(status))
1501  ereport(ERROR,
1502  (errmsg("case conversion failed: %s", u_errorName(status))));
1503  return len_dest;
1504 }
1505 
1506 static int32_t
1507 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1508  const UChar *src, int32_t srcLength,
1509  const char *locale,
1510  UErrorCode *pErrorCode)
1511 {
1512  return u_strToTitle(dest, destCapacity, src, srcLength,
1513  NULL, locale, pErrorCode);
1514 }
1515 
1516 #endif /* USE_ICU */
1517 
1518 /*
1519  * If the system provides the needed functions for wide-character manipulation
1520  * (which are all standardized by C99), then we implement upper/lower/initcap
1521  * using wide-character functions, if necessary. Otherwise we use the
1522  * traditional <ctype.h> functions, which of course will not work as desired
1523  * in multibyte character sets. Note that in either case we are effectively
1524  * assuming that the database character encoding matches the encoding implied
1525  * by LC_CTYPE.
1526  *
1527  * If the system provides locale_t and associated functions (which are
1528  * standardized by Open Group's XBD), we can support collations that are
1529  * neither default nor C. The code is written to handle both combinations
1530  * of have-wide-characters and have-locale_t, though it's rather unlikely
1531  * a platform would have the latter without the former.
1532  */
1533 
1534 /*
1535  * collation-aware, wide-character-aware lower function
1536  *
1537  * We pass the number of bytes so we can pass varlena and char*
1538  * to this function. The result is a palloc'd, null-terminated string.
1539  */
1540 char *
1541 str_tolower(const char *buff, size_t nbytes, Oid collid)
1542 {
1543  char *result;
1544 
1545  if (!buff)
1546  return NULL;
1547 
1548  /* C/POSIX collations use this path regardless of database encoding */
1549  if (lc_ctype_is_c(collid))
1550  {
1551  result = asc_tolower(buff, nbytes);
1552  }
1553  else
1554  {
1555  pg_locale_t mylocale = 0;
1556 
1557  if (collid != DEFAULT_COLLATION_OID)
1558  {
1559  if (!OidIsValid(collid))
1560  {
1561  /*
1562  * This typically means that the parser could not resolve a
1563  * conflict of implicit collations, so report it that way.
1564  */
1565  ereport(ERROR,
1566  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1567  errmsg("could not determine which collation to use for %s function",
1568  "lower()"),
1569  errhint("Use the COLLATE clause to set the collation explicitly.")));
1570  }
1571  mylocale = pg_newlocale_from_collation(collid);
1572  }
1573 
1574 #ifdef USE_ICU
1575  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1576  {
1577  int32_t len_uchar;
1578  int32_t len_conv;
1579  UChar *buff_uchar;
1580  UChar *buff_conv;
1581 
1582  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1583  len_conv = icu_convert_case(u_strToLower, mylocale,
1584  &buff_conv, buff_uchar, len_uchar);
1585  icu_from_uchar(&result, buff_conv, len_conv);
1586  pfree(buff_uchar);
1587  pfree(buff_conv);
1588  }
1589  else
1590 #endif
1591  {
1593  {
1594  wchar_t *workspace;
1595  size_t curr_char;
1596  size_t result_size;
1597 
1598  /* Overflow paranoia */
1599  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1600  ereport(ERROR,
1601  (errcode(ERRCODE_OUT_OF_MEMORY),
1602  errmsg("out of memory")));
1603 
1604  /* Output workspace cannot have more codes than input bytes */
1605  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1606 
1607  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1608 
1609  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1610  {
1611 #ifdef HAVE_LOCALE_T
1612  if (mylocale)
1613  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1614  else
1615 #endif
1616  workspace[curr_char] = towlower(workspace[curr_char]);
1617  }
1618 
1619  /*
1620  * Make result large enough; case change might change number
1621  * of bytes
1622  */
1623  result_size = curr_char * pg_database_encoding_max_length() + 1;
1624  result = palloc(result_size);
1625 
1626  wchar2char(result, workspace, result_size, mylocale);
1627  pfree(workspace);
1628  }
1629  else
1630  {
1631  char *p;
1632 
1633  result = pnstrdup(buff, nbytes);
1634 
1635  /*
1636  * Note: we assume that tolower_l() will not be so broken as
1637  * to need an isupper_l() guard test. When using the default
1638  * collation, we apply the traditional Postgres behavior that
1639  * forces ASCII-style treatment of I/i, but in non-default
1640  * collations you get exactly what the collation says.
1641  */
1642  for (p = result; *p; p++)
1643  {
1644 #ifdef HAVE_LOCALE_T
1645  if (mylocale)
1646  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1647  else
1648 #endif
1649  *p = pg_tolower((unsigned char) *p);
1650  }
1651  }
1652  }
1653  }
1654 
1655  return result;
1656 }
1657 
1658 /*
1659  * collation-aware, wide-character-aware upper function
1660  *
1661  * We pass the number of bytes so we can pass varlena and char*
1662  * to this function. The result is a palloc'd, null-terminated string.
1663  */
1664 char *
1665 str_toupper(const char *buff, size_t nbytes, Oid collid)
1666 {
1667  char *result;
1668 
1669  if (!buff)
1670  return NULL;
1671 
1672  /* C/POSIX collations use this path regardless of database encoding */
1673  if (lc_ctype_is_c(collid))
1674  {
1675  result = asc_toupper(buff, nbytes);
1676  }
1677  else
1678  {
1679  pg_locale_t mylocale = 0;
1680 
1681  if (collid != DEFAULT_COLLATION_OID)
1682  {
1683  if (!OidIsValid(collid))
1684  {
1685  /*
1686  * This typically means that the parser could not resolve a
1687  * conflict of implicit collations, so report it that way.
1688  */
1689  ereport(ERROR,
1690  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1691  errmsg("could not determine which collation to use for %s function",
1692  "upper()"),
1693  errhint("Use the COLLATE clause to set the collation explicitly.")));
1694  }
1695  mylocale = pg_newlocale_from_collation(collid);
1696  }
1697 
1698 #ifdef USE_ICU
1699  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1700  {
1701  int32_t len_uchar,
1702  len_conv;
1703  UChar *buff_uchar;
1704  UChar *buff_conv;
1705 
1706  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1707  len_conv = icu_convert_case(u_strToUpper, mylocale,
1708  &buff_conv, buff_uchar, len_uchar);
1709  icu_from_uchar(&result, buff_conv, len_conv);
1710  pfree(buff_uchar);
1711  pfree(buff_conv);
1712  }
1713  else
1714 #endif
1715  {
1717  {
1718  wchar_t *workspace;
1719  size_t curr_char;
1720  size_t result_size;
1721 
1722  /* Overflow paranoia */
1723  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1724  ereport(ERROR,
1725  (errcode(ERRCODE_OUT_OF_MEMORY),
1726  errmsg("out of memory")));
1727 
1728  /* Output workspace cannot have more codes than input bytes */
1729  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1730 
1731  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1732 
1733  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1734  {
1735 #ifdef HAVE_LOCALE_T
1736  if (mylocale)
1737  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1738  else
1739 #endif
1740  workspace[curr_char] = towupper(workspace[curr_char]);
1741  }
1742 
1743  /*
1744  * Make result large enough; case change might change number
1745  * of bytes
1746  */
1747  result_size = curr_char * pg_database_encoding_max_length() + 1;
1748  result = palloc(result_size);
1749 
1750  wchar2char(result, workspace, result_size, mylocale);
1751  pfree(workspace);
1752  }
1753  else
1754  {
1755  char *p;
1756 
1757  result = pnstrdup(buff, nbytes);
1758 
1759  /*
1760  * Note: we assume that toupper_l() will not be so broken as
1761  * to need an islower_l() guard test. When using the default
1762  * collation, we apply the traditional Postgres behavior that
1763  * forces ASCII-style treatment of I/i, but in non-default
1764  * collations you get exactly what the collation says.
1765  */
1766  for (p = result; *p; p++)
1767  {
1768 #ifdef HAVE_LOCALE_T
1769  if (mylocale)
1770  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1771  else
1772 #endif
1773  *p = pg_toupper((unsigned char) *p);
1774  }
1775  }
1776  }
1777  }
1778 
1779  return result;
1780 }
1781 
1782 /*
1783  * collation-aware, wide-character-aware initcap function
1784  *
1785  * We pass the number of bytes so we can pass varlena and char*
1786  * to this function. The result is a palloc'd, null-terminated string.
1787  */
1788 char *
1789 str_initcap(const char *buff, size_t nbytes, Oid collid)
1790 {
1791  char *result;
1792  int wasalnum = false;
1793 
1794  if (!buff)
1795  return NULL;
1796 
1797  /* C/POSIX collations use this path regardless of database encoding */
1798  if (lc_ctype_is_c(collid))
1799  {
1800  result = asc_initcap(buff, nbytes);
1801  }
1802  else
1803  {
1804  pg_locale_t mylocale = 0;
1805 
1806  if (collid != DEFAULT_COLLATION_OID)
1807  {
1808  if (!OidIsValid(collid))
1809  {
1810  /*
1811  * This typically means that the parser could not resolve a
1812  * conflict of implicit collations, so report it that way.
1813  */
1814  ereport(ERROR,
1815  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1816  errmsg("could not determine which collation to use for %s function",
1817  "initcap()"),
1818  errhint("Use the COLLATE clause to set the collation explicitly.")));
1819  }
1820  mylocale = pg_newlocale_from_collation(collid);
1821  }
1822 
1823 #ifdef USE_ICU
1824  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1825  {
1826  int32_t len_uchar,
1827  len_conv;
1828  UChar *buff_uchar;
1829  UChar *buff_conv;
1830 
1831  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1832  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1833  &buff_conv, buff_uchar, len_uchar);
1834  icu_from_uchar(&result, buff_conv, len_conv);
1835  pfree(buff_uchar);
1836  pfree(buff_conv);
1837  }
1838  else
1839 #endif
1840  {
1842  {
1843  wchar_t *workspace;
1844  size_t curr_char;
1845  size_t result_size;
1846 
1847  /* Overflow paranoia */
1848  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1849  ereport(ERROR,
1850  (errcode(ERRCODE_OUT_OF_MEMORY),
1851  errmsg("out of memory")));
1852 
1853  /* Output workspace cannot have more codes than input bytes */
1854  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1855 
1856  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1857 
1858  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1859  {
1860 #ifdef HAVE_LOCALE_T
1861  if (mylocale)
1862  {
1863  if (wasalnum)
1864  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1865  else
1866  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1867  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1868  }
1869  else
1870 #endif
1871  {
1872  if (wasalnum)
1873  workspace[curr_char] = towlower(workspace[curr_char]);
1874  else
1875  workspace[curr_char] = towupper(workspace[curr_char]);
1876  wasalnum = iswalnum(workspace[curr_char]);
1877  }
1878  }
1879 
1880  /*
1881  * Make result large enough; case change might change number
1882  * of bytes
1883  */
1884  result_size = curr_char * pg_database_encoding_max_length() + 1;
1885  result = palloc(result_size);
1886 
1887  wchar2char(result, workspace, result_size, mylocale);
1888  pfree(workspace);
1889  }
1890  else
1891  {
1892  char *p;
1893 
1894  result = pnstrdup(buff, nbytes);
1895 
1896  /*
1897  * Note: we assume that toupper_l()/tolower_l() will not be so
1898  * broken as to need guard tests. When using the default
1899  * collation, we apply the traditional Postgres behavior that
1900  * forces ASCII-style treatment of I/i, but in non-default
1901  * collations you get exactly what the collation says.
1902  */
1903  for (p = result; *p; p++)
1904  {
1905 #ifdef HAVE_LOCALE_T
1906  if (mylocale)
1907  {
1908  if (wasalnum)
1909  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1910  else
1911  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1912  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
1913  }
1914  else
1915 #endif
1916  {
1917  if (wasalnum)
1918  *p = pg_tolower((unsigned char) *p);
1919  else
1920  *p = pg_toupper((unsigned char) *p);
1921  wasalnum = isalnum((unsigned char) *p);
1922  }
1923  }
1924  }
1925  }
1926  }
1927 
1928  return result;
1929 }
1930 
1931 /*
1932  * ASCII-only lower function
1933  *
1934  * We pass the number of bytes so we can pass varlena and char*
1935  * to this function. The result is a palloc'd, null-terminated string.
1936  */
1937 char *
1938 asc_tolower(const char *buff, size_t nbytes)
1939 {
1940  char *result;
1941  char *p;
1942 
1943  if (!buff)
1944  return NULL;
1945 
1946  result = pnstrdup(buff, nbytes);
1947 
1948  for (p = result; *p; p++)
1949  *p = pg_ascii_tolower((unsigned char) *p);
1950 
1951  return result;
1952 }
1953 
1954 /*
1955  * ASCII-only upper function
1956  *
1957  * We pass the number of bytes so we can pass varlena and char*
1958  * to this function. The result is a palloc'd, null-terminated string.
1959  */
1960 char *
1961 asc_toupper(const char *buff, size_t nbytes)
1962 {
1963  char *result;
1964  char *p;
1965 
1966  if (!buff)
1967  return NULL;
1968 
1969  result = pnstrdup(buff, nbytes);
1970 
1971  for (p = result; *p; p++)
1972  *p = pg_ascii_toupper((unsigned char) *p);
1973 
1974  return result;
1975 }
1976 
1977 /*
1978  * ASCII-only initcap function
1979  *
1980  * We pass the number of bytes so we can pass varlena and char*
1981  * to this function. The result is a palloc'd, null-terminated string.
1982  */
1983 char *
1984 asc_initcap(const char *buff, size_t nbytes)
1985 {
1986  char *result;
1987  char *p;
1988  int wasalnum = false;
1989 
1990  if (!buff)
1991  return NULL;
1992 
1993  result = pnstrdup(buff, nbytes);
1994 
1995  for (p = result; *p; p++)
1996  {
1997  char c;
1998 
1999  if (wasalnum)
2000  *p = c = pg_ascii_tolower((unsigned char) *p);
2001  else
2002  *p = c = pg_ascii_toupper((unsigned char) *p);
2003  /* we don't trust isalnum() here */
2004  wasalnum = ((c >= 'A' && c <= 'Z') ||
2005  (c >= 'a' && c <= 'z') ||
2006  (c >= '0' && c <= '9'));
2007  }
2008 
2009  return result;
2010 }
2011 
2012 /* convenience routines for when the input is null-terminated */
2013 
2014 static char *
2015 str_tolower_z(const char *buff, Oid collid)
2016 {
2017  return str_tolower(buff, strlen(buff), collid);
2018 }
2019 
2020 static char *
2021 str_toupper_z(const char *buff, Oid collid)
2022 {
2023  return str_toupper(buff, strlen(buff), collid);
2024 }
2025 
2026 static char *
2027 str_initcap_z(const char *buff, Oid collid)
2028 {
2029  return str_initcap(buff, strlen(buff), collid);
2030 }
2031 
2032 static char *
2033 asc_tolower_z(const char *buff)
2034 {
2035  return asc_tolower(buff, strlen(buff));
2036 }
2037 
2038 static char *
2039 asc_toupper_z(const char *buff)
2040 {
2041  return asc_toupper(buff, strlen(buff));
2042 }
2043 
2044 /* asc_initcap_z is not currently needed */
2045 
2046 
2047 /* ----------
2048  * Skip TM / th in FROM_CHAR
2049  *
2050  * If S_THth is on, skip two chars, assuming there are two available
2051  * ----------
2052  */
2053 #define SKIP_THth(ptr, _suf) \
2054  do { \
2055  if (S_THth(_suf)) \
2056  { \
2057  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2058  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2059  } \
2060  } while (0)
2061 
2062 
2063 #ifdef DEBUG_TO_FROM_CHAR
2064 /* -----------
2065  * DEBUG: Call for debug and for index checking; (Show ASCII char
2066  * and defined keyword for each used position
2067  * ----------
2068  */
2069 static void
2070 dump_index(const KeyWord *k, const int *index)
2071 {
2072  int i,
2073  count = 0,
2074  free_i = 0;
2075 
2076  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2077 
2078  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2079  {
2080  if (index[i] != -1)
2081  {
2082  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2083  count++;
2084  }
2085  else
2086  {
2087  free_i++;
2088  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2089  }
2090  }
2091  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2092  count, free_i);
2093 }
2094 #endif /* DEBUG */
2095 
2096 /* ----------
2097  * Return true if next format picture is not digit value
2098  * ----------
2099  */
2100 static bool
2102 {
2103  if (n->type == NODE_TYPE_END)
2104  return false;
2105 
2106  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2107  return true;
2108 
2109  /*
2110  * Next node
2111  */
2112  n++;
2113 
2114  /* end of format string is treated like a non-digit separator */
2115  if (n->type == NODE_TYPE_END)
2116  return true;
2117 
2118  if (n->type == NODE_TYPE_ACTION)
2119  {
2120  if (n->key->is_digit)
2121  return false;
2122 
2123  return true;
2124  }
2125  else if (n->character[1] == '\0' &&
2126  isdigit((unsigned char) n->character[0]))
2127  return false;
2128 
2129  return true; /* some non-digit input (separator) */
2130 }
2131 
2132 
2133 static int
2135 {
2136  /*
2137  * Adjust all dates toward 2020; this is effectively what happens when we
2138  * assume '70' is 1970 and '69' is 2069.
2139  */
2140  /* Force 0-69 into the 2000's */
2141  if (year < 70)
2142  return year + 2000;
2143  /* Force 70-99 into the 1900's */
2144  else if (year < 100)
2145  return year + 1900;
2146  /* Force 100-519 into the 2000's */
2147  else if (year < 520)
2148  return year + 2000;
2149  /* Force 520-999 into the 1000's */
2150  else if (year < 1000)
2151  return year + 1000;
2152  else
2153  return year;
2154 }
2155 
2156 
2157 static int
2159 {
2160  int len = 0;
2161 
2162  while (*str && isspace((unsigned char) *str))
2163  {
2164  str++;
2165  len++;
2166  }
2167  return len;
2168 }
2169 
2170 /*
2171  * Set the date mode of a from-char conversion.
2172  *
2173  * Puke if the date mode has already been set, and the caller attempts to set
2174  * it to a conflicting mode.
2175  */
2176 static void
2178 {
2179  if (mode != FROM_CHAR_DATE_NONE)
2180  {
2181  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2182  tmfc->mode = mode;
2183  else if (tmfc->mode != mode)
2184  ereport(ERROR,
2185  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2186  errmsg("invalid combination of date conventions"),
2187  errhint("Do not mix Gregorian and ISO week date "
2188  "conventions in a formatting template.")));
2189  }
2190 }
2191 
2192 /*
2193  * Set the integer pointed to by 'dest' to the given value.
2194  *
2195  * Puke if the destination integer has previously been set to some other
2196  * non-zero value.
2197  */
2198 static void
2199 from_char_set_int(int *dest, const int value, const FormatNode *node)
2200 {
2201  if (*dest != 0 && *dest != value)
2202  ereport(ERROR,
2203  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2204  errmsg("conflicting values for \"%s\" field in formatting string",
2205  node->key->name),
2206  errdetail("This value contradicts a previous setting for "
2207  "the same field type.")));
2208  *dest = value;
2209 }
2210 
2211 /*
2212  * Read a single integer from the source string, into the int pointed to by
2213  * 'dest'. If 'dest' is NULL, the result is discarded.
2214  *
2215  * In fixed-width mode (the node does not have the FM suffix), consume at most
2216  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2217  *
2218  * We use strtol() to recover the integer value from the source string, in
2219  * accordance with the given FormatNode.
2220  *
2221  * If the conversion completes successfully, src will have been advanced to
2222  * point at the character immediately following the last character used in the
2223  * conversion.
2224  *
2225  * Return the number of characters consumed.
2226  *
2227  * Note that from_char_parse_int() provides a more convenient wrapper where
2228  * the length of the field is the same as the length of the format keyword (as
2229  * with DD and MI).
2230  */
2231 static int
2232 from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
2233 {
2234  long result;
2235  char copy[DCH_MAX_ITEM_SIZ + 1];
2236  char *init = *src;
2237  int used;
2238 
2239  /*
2240  * Skip any whitespace before parsing the integer.
2241  */
2242  *src += strspace_len(*src);
2243 
2244  Assert(len <= DCH_MAX_ITEM_SIZ);
2245  used = (int) strlcpy(copy, *src, len + 1);
2246 
2247  if (S_FM(node->suffix) || is_next_separator(node))
2248  {
2249  /*
2250  * This node is in Fill Mode, or the next node is known to be a
2251  * non-digit value, so we just slurp as many characters as we can get.
2252  */
2253  errno = 0;
2254  result = strtol(init, src, 10);
2255  }
2256  else
2257  {
2258  /*
2259  * We need to pull exactly the number of characters given in 'len' out
2260  * of the string, and convert those.
2261  */
2262  char *last;
2263 
2264  if (used < len)
2265  ereport(ERROR,
2266  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2267  errmsg("source string too short for \"%s\" formatting field",
2268  node->key->name),
2269  errdetail("Field requires %d characters, but only %d "
2270  "remain.",
2271  len, used),
2272  errhint("If your source string is not fixed-width, try "
2273  "using the \"FM\" modifier.")));
2274 
2275  errno = 0;
2276  result = strtol(copy, &last, 10);
2277  used = last - copy;
2278 
2279  if (used > 0 && used < len)
2280  ereport(ERROR,
2281  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2282  errmsg("invalid value \"%s\" for \"%s\"",
2283  copy, node->key->name),
2284  errdetail("Field requires %d characters, but only %d "
2285  "could be parsed.", len, used),
2286  errhint("If your source string is not fixed-width, try "
2287  "using the \"FM\" modifier.")));
2288 
2289  *src += used;
2290  }
2291 
2292  if (*src == init)
2293  ereport(ERROR,
2294  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2295  errmsg("invalid value \"%s\" for \"%s\"",
2296  copy, node->key->name),
2297  errdetail("Value must be an integer.")));
2298 
2299  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2300  ereport(ERROR,
2301  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2302  errmsg("value for \"%s\" in source string is out of range",
2303  node->key->name),
2304  errdetail("Value must be in the range %d to %d.",
2305  INT_MIN, INT_MAX)));
2306 
2307  if (dest != NULL)
2308  from_char_set_int(dest, (int) result, node);
2309  return *src - init;
2310 }
2311 
2312 /*
2313  * Call from_char_parse_int_len(), using the length of the format keyword as
2314  * the expected length of the field.
2315  *
2316  * Don't call this function if the field differs in length from the format
2317  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2318  * In such cases, call from_char_parse_int_len() instead to specify the
2319  * required length explicitly.
2320  */
2321 static int
2322 from_char_parse_int(int *dest, char **src, FormatNode *node)
2323 {
2324  return from_char_parse_int_len(dest, src, node->key->len, node);
2325 }
2326 
2327 /* ----------
2328  * Sequential search with to upper/lower conversion
2329  * ----------
2330  */
2331 static int
2332 seq_search(char *name, const char *const *array, int type, int max, int *len)
2333 {
2334  const char *p;
2335  const char *const *a;
2336  char *n;
2337  int last,
2338  i;
2339 
2340  *len = 0;
2341 
2342  if (!*name)
2343  return -1;
2344 
2345  /* set first char */
2346  if (type == ONE_UPPER || type == ALL_UPPER)
2347  *name = pg_toupper((unsigned char) *name);
2348  else if (type == ALL_LOWER)
2349  *name = pg_tolower((unsigned char) *name);
2350 
2351  for (last = 0, a = array; *a != NULL; a++)
2352  {
2353  /* compare first chars */
2354  if (*name != **a)
2355  continue;
2356 
2357  for (i = 1, p = *a + 1, n = name + 1;; n++, p++, i++)
2358  {
2359  /* search fragment (max) only */
2360  if (max && i == max)
2361  {
2362  *len = i;
2363  return a - array;
2364  }
2365  /* full size */
2366  if (*p == '\0')
2367  {
2368  *len = i;
2369  return a - array;
2370  }
2371  /* Not found in array 'a' */
2372  if (*n == '\0')
2373  break;
2374 
2375  /*
2376  * Convert (but convert new chars only)
2377  */
2378  if (i > last)
2379  {
2380  if (type == ONE_UPPER || type == ALL_LOWER)
2381  *n = pg_tolower((unsigned char) *n);
2382  else if (type == ALL_UPPER)
2383  *n = pg_toupper((unsigned char) *n);
2384  last = i;
2385  }
2386 
2387 #ifdef DEBUG_TO_FROM_CHAR
2388  elog(DEBUG_elog_output, "N: %c, P: %c, A: %s (%s)",
2389  *n, *p, *a, name);
2390 #endif
2391  if (*n != *p)
2392  break;
2393  }
2394  }
2395 
2396  return -1;
2397 }
2398 
2399 /*
2400  * Perform a sequential search in 'array' for text matching the first 'max'
2401  * characters of the source string.
2402  *
2403  * If a match is found, copy the array index of the match into the integer
2404  * pointed to by 'dest', advance 'src' to the end of the part of the string
2405  * which matched, and return the number of characters consumed.
2406  *
2407  * If the string doesn't match, throw an error.
2408  */
2409 static int
2410 from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max,
2411  FormatNode *node)
2412 {
2413  int len;
2414 
2415  *dest = seq_search(*src, array, type, max, &len);
2416  if (len <= 0)
2417  {
2418  char copy[DCH_MAX_ITEM_SIZ + 1];
2419 
2420  Assert(max <= DCH_MAX_ITEM_SIZ);
2421  strlcpy(copy, *src, max + 1);
2422 
2423  ereport(ERROR,
2424  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2425  errmsg("invalid value \"%s\" for \"%s\"",
2426  copy, node->key->name),
2427  errdetail("The given value did not match any of the allowed "
2428  "values for this field.")));
2429  }
2430  *src += len;
2431  return len;
2432 }
2433 
2434 /* ----------
2435  * Process a TmToChar struct as denoted by a list of FormatNodes.
2436  * The formatted data is written to the string pointed to by 'out'.
2437  * ----------
2438  */
2439 static void
2440 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2441 {
2442  FormatNode *n;
2443  char *s;
2444  struct pg_tm *tm = &in->tm;
2445  int i;
2446 
2447  /* cache localized days and months */
2449 
2450  s = out;
2451  for (n = node; n->type != NODE_TYPE_END; n++)
2452  {
2453  if (n->type != NODE_TYPE_ACTION)
2454  {
2455  strcpy(s, n->character);
2456  s += strlen(s);
2457  continue;
2458  }
2459 
2460  switch (n->key->id)
2461  {
2462  case DCH_A_M:
2463  case DCH_P_M:
2464  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2465  ? P_M_STR : A_M_STR);
2466  s += strlen(s);
2467  break;
2468  case DCH_AM:
2469  case DCH_PM:
2470  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2471  ? PM_STR : AM_STR);
2472  s += strlen(s);
2473  break;
2474  case DCH_a_m:
2475  case DCH_p_m:
2476  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2477  ? p_m_STR : a_m_STR);
2478  s += strlen(s);
2479  break;
2480  case DCH_am:
2481  case DCH_pm:
2482  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2483  ? pm_STR : am_STR);
2484  s += strlen(s);
2485  break;
2486  case DCH_HH:
2487  case DCH_HH12:
2488 
2489  /*
2490  * display time as shown on a 12-hour clock, even for
2491  * intervals
2492  */
2493  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2494  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2495  tm->tm_hour % (HOURS_PER_DAY / 2));
2496  if (S_THth(n->suffix))
2497  str_numth(s, s, S_TH_TYPE(n->suffix));
2498  s += strlen(s);
2499  break;
2500  case DCH_HH24:
2501  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2502  tm->tm_hour);
2503  if (S_THth(n->suffix))
2504  str_numth(s, s, S_TH_TYPE(n->suffix));
2505  s += strlen(s);
2506  break;
2507  case DCH_MI:
2508  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2509  tm->tm_min);
2510  if (S_THth(n->suffix))
2511  str_numth(s, s, S_TH_TYPE(n->suffix));
2512  s += strlen(s);
2513  break;
2514  case DCH_SS:
2515  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2516  tm->tm_sec);
2517  if (S_THth(n->suffix))
2518  str_numth(s, s, S_TH_TYPE(n->suffix));
2519  s += strlen(s);
2520  break;
2521  case DCH_MS: /* millisecond */
2522  sprintf(s, "%03d", (int) (in->fsec / INT64CONST(1000)));
2523  if (S_THth(n->suffix))
2524  str_numth(s, s, S_TH_TYPE(n->suffix));
2525  s += strlen(s);
2526  break;
2527  case DCH_US: /* microsecond */
2528  sprintf(s, "%06d", (int) in->fsec);
2529  if (S_THth(n->suffix))
2530  str_numth(s, s, S_TH_TYPE(n->suffix));
2531  s += strlen(s);
2532  break;
2533  case DCH_SSSS:
2534  sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2535  tm->tm_min * SECS_PER_MINUTE +
2536  tm->tm_sec);
2537  if (S_THth(n->suffix))
2538  str_numth(s, s, S_TH_TYPE(n->suffix));
2539  s += strlen(s);
2540  break;
2541  case DCH_tz:
2543  if (tmtcTzn(in))
2544  {
2545  /* We assume here that timezone names aren't localized */
2546  char *p = asc_tolower_z(tmtcTzn(in));
2547 
2548  strcpy(s, p);
2549  pfree(p);
2550  s += strlen(s);
2551  }
2552  break;
2553  case DCH_TZ:
2555  if (tmtcTzn(in))
2556  {
2557  strcpy(s, tmtcTzn(in));
2558  s += strlen(s);
2559  }
2560  break;
2561  case DCH_TZH:
2563  sprintf(s, "%c%02d",
2564  (tm->tm_gmtoff >= 0) ? '+' : '-',
2565  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2566  s += strlen(s);
2567  break;
2568  case DCH_TZM:
2570  sprintf(s, "%02d",
2571  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2572  s += strlen(s);
2573  break;
2574  case DCH_OF:
2576  sprintf(s, "%c%0*d",
2577  (tm->tm_gmtoff >= 0) ? '+' : '-',
2578  S_FM(n->suffix) ? 0 : 2,
2579  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2580  s += strlen(s);
2581  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2582  {
2583  sprintf(s, ":%02d",
2584  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2585  s += strlen(s);
2586  }
2587  break;
2588  case DCH_A_D:
2589  case DCH_B_C:
2591  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2592  s += strlen(s);
2593  break;
2594  case DCH_AD:
2595  case DCH_BC:
2597  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2598  s += strlen(s);
2599  break;
2600  case DCH_a_d:
2601  case DCH_b_c:
2603  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2604  s += strlen(s);
2605  break;
2606  case DCH_ad:
2607  case DCH_bc:
2609  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2610  s += strlen(s);
2611  break;
2612  case DCH_MONTH:
2614  if (!tm->tm_mon)
2615  break;
2616  if (S_TM(n->suffix))
2617  {
2618  char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2619 
2620  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2621  strcpy(s, str);
2622  else
2623  ereport(ERROR,
2624  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2625  errmsg("localized string format value too long")));
2626  }
2627  else
2628  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2629  asc_toupper_z(months_full[tm->tm_mon - 1]));
2630  s += strlen(s);
2631  break;
2632  case DCH_Month:
2634  if (!tm->tm_mon)
2635  break;
2636  if (S_TM(n->suffix))
2637  {
2638  char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2639 
2640  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2641  strcpy(s, str);
2642  else
2643  ereport(ERROR,
2644  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2645  errmsg("localized string format value too long")));
2646  }
2647  else
2648  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2649  months_full[tm->tm_mon - 1]);
2650  s += strlen(s);
2651  break;
2652  case DCH_month:
2654  if (!tm->tm_mon)
2655  break;
2656  if (S_TM(n->suffix))
2657  {
2658  char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2659 
2660  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2661  strcpy(s, str);
2662  else
2663  ereport(ERROR,
2664  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2665  errmsg("localized string format value too long")));
2666  }
2667  else
2668  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2669  asc_tolower_z(months_full[tm->tm_mon - 1]));
2670  s += strlen(s);
2671  break;
2672  case DCH_MON:
2674  if (!tm->tm_mon)
2675  break;
2676  if (S_TM(n->suffix))
2677  {
2678  char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2679 
2680  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2681  strcpy(s, str);
2682  else
2683  ereport(ERROR,
2684  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2685  errmsg("localized string format value too long")));
2686  }
2687  else
2688  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2689  s += strlen(s);
2690  break;
2691  case DCH_Mon:
2693  if (!tm->tm_mon)
2694  break;
2695  if (S_TM(n->suffix))
2696  {
2697  char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2698 
2699  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2700  strcpy(s, str);
2701  else
2702  ereport(ERROR,
2703  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2704  errmsg("localized string format value too long")));
2705  }
2706  else
2707  strcpy(s, months[tm->tm_mon - 1]);
2708  s += strlen(s);
2709  break;
2710  case DCH_mon:
2712  if (!tm->tm_mon)
2713  break;
2714  if (S_TM(n->suffix))
2715  {
2716  char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2717 
2718  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2719  strcpy(s, str);
2720  else
2721  ereport(ERROR,
2722  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2723  errmsg("localized string format value too long")));
2724  }
2725  else
2726  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2727  s += strlen(s);
2728  break;
2729  case DCH_MM:
2730  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2731  tm->tm_mon);
2732  if (S_THth(n->suffix))
2733  str_numth(s, s, S_TH_TYPE(n->suffix));
2734  s += strlen(s);
2735  break;
2736  case DCH_DAY:
2738  if (S_TM(n->suffix))
2739  {
2740  char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2741 
2742  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2743  strcpy(s, str);
2744  else
2745  ereport(ERROR,
2746  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2747  errmsg("localized string format value too long")));
2748  }
2749  else
2750  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2751  asc_toupper_z(days[tm->tm_wday]));
2752  s += strlen(s);
2753  break;
2754  case DCH_Day:
2756  if (S_TM(n->suffix))
2757  {
2758  char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2759 
2760  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2761  strcpy(s, str);
2762  else
2763  ereport(ERROR,
2764  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2765  errmsg("localized string format value too long")));
2766  }
2767  else
2768  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2769  days[tm->tm_wday]);
2770  s += strlen(s);
2771  break;
2772  case DCH_day:
2774  if (S_TM(n->suffix))
2775  {
2776  char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
2777 
2778  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2779  strcpy(s, str);
2780  else
2781  ereport(ERROR,
2782  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2783  errmsg("localized string format value too long")));
2784  }
2785  else
2786  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2787  asc_tolower_z(days[tm->tm_wday]));
2788  s += strlen(s);
2789  break;
2790  case DCH_DY:
2792  if (S_TM(n->suffix))
2793  {
2794  char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
2795 
2796  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2797  strcpy(s, str);
2798  else
2799  ereport(ERROR,
2800  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2801  errmsg("localized string format value too long")));
2802  }
2803  else
2804  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
2805  s += strlen(s);
2806  break;
2807  case DCH_Dy:
2809  if (S_TM(n->suffix))
2810  {
2811  char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
2812 
2813  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2814  strcpy(s, str);
2815  else
2816  ereport(ERROR,
2817  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2818  errmsg("localized string format value too long")));
2819  }
2820  else
2821  strcpy(s, days_short[tm->tm_wday]);
2822  s += strlen(s);
2823  break;
2824  case DCH_dy:
2826  if (S_TM(n->suffix))
2827  {
2828  char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
2829 
2830  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2831  strcpy(s, str);
2832  else
2833  ereport(ERROR,
2834  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2835  errmsg("localized string format value too long")));
2836  }
2837  else
2838  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
2839  s += strlen(s);
2840  break;
2841  case DCH_DDD:
2842  case DCH_IDDD:
2843  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
2844  (n->key->id == DCH_DDD) ?
2845  tm->tm_yday :
2846  date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
2847  if (S_THth(n->suffix))
2848  str_numth(s, s, S_TH_TYPE(n->suffix));
2849  s += strlen(s);
2850  break;
2851  case DCH_DD:
2852  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
2853  if (S_THth(n->suffix))
2854  str_numth(s, s, S_TH_TYPE(n->suffix));
2855  s += strlen(s);
2856  break;
2857  case DCH_D:
2859  sprintf(s, "%d", tm->tm_wday + 1);
2860  if (S_THth(n->suffix))
2861  str_numth(s, s, S_TH_TYPE(n->suffix));
2862  s += strlen(s);
2863  break;
2864  case DCH_ID:
2866  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
2867  if (S_THth(n->suffix))
2868  str_numth(s, s, S_TH_TYPE(n->suffix));
2869  s += strlen(s);
2870  break;
2871  case DCH_WW:
2872  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2873  (tm->tm_yday - 1) / 7 + 1);
2874  if (S_THth(n->suffix))
2875  str_numth(s, s, S_TH_TYPE(n->suffix));
2876  s += strlen(s);
2877  break;
2878  case DCH_IW:
2879  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2880  date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
2881  if (S_THth(n->suffix))
2882  str_numth(s, s, S_TH_TYPE(n->suffix));
2883  s += strlen(s);
2884  break;
2885  case DCH_Q:
2886  if (!tm->tm_mon)
2887  break;
2888  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
2889  if (S_THth(n->suffix))
2890  str_numth(s, s, S_TH_TYPE(n->suffix));
2891  s += strlen(s);
2892  break;
2893  case DCH_CC:
2894  if (is_interval) /* straight calculation */
2895  i = tm->tm_year / 100;
2896  else
2897  {
2898  if (tm->tm_year > 0)
2899  /* Century 20 == 1901 - 2000 */
2900  i = (tm->tm_year - 1) / 100 + 1;
2901  else
2902  /* Century 6BC == 600BC - 501BC */
2903  i = tm->tm_year / 100 - 1;
2904  }
2905  if (i <= 99 && i >= -99)
2906  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
2907  else
2908  sprintf(s, "%d", i);
2909  if (S_THth(n->suffix))
2910  str_numth(s, s, S_TH_TYPE(n->suffix));
2911  s += strlen(s);
2912  break;
2913  case DCH_Y_YYY:
2914  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
2915  sprintf(s, "%d,%03d", i,
2916  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
2917  if (S_THth(n->suffix))
2918  str_numth(s, s, S_TH_TYPE(n->suffix));
2919  s += strlen(s);
2920  break;
2921  case DCH_YYYY:
2922  case DCH_IYYY:
2923  sprintf(s, "%0*d",
2924  S_FM(n->suffix) ? 0 :
2925  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
2926  (n->key->id == DCH_YYYY ?
2927  ADJUST_YEAR(tm->tm_year, is_interval) :
2929  tm->tm_mon,
2930  tm->tm_mday),
2931  is_interval)));
2932  if (S_THth(n->suffix))
2933  str_numth(s, s, S_TH_TYPE(n->suffix));
2934  s += strlen(s);
2935  break;
2936  case DCH_YYY:
2937  case DCH_IYY:
2938  sprintf(s, "%0*d",
2939  S_FM(n->suffix) ? 0 :
2940  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
2941  (n->key->id == DCH_YYY ?
2942  ADJUST_YEAR(tm->tm_year, is_interval) :
2944  tm->tm_mon,
2945  tm->tm_mday),
2946  is_interval)) % 1000);
2947  if (S_THth(n->suffix))
2948  str_numth(s, s, S_TH_TYPE(n->suffix));
2949  s += strlen(s);
2950  break;
2951  case DCH_YY:
2952  case DCH_IY:
2953  sprintf(s, "%0*d",
2954  S_FM(n->suffix) ? 0 :
2955  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
2956  (n->key->id == DCH_YY ?
2957  ADJUST_YEAR(tm->tm_year, is_interval) :
2959  tm->tm_mon,
2960  tm->tm_mday),
2961  is_interval)) % 100);
2962  if (S_THth(n->suffix))
2963  str_numth(s, s, S_TH_TYPE(n->suffix));
2964  s += strlen(s);
2965  break;
2966  case DCH_Y:
2967  case DCH_I:
2968  sprintf(s, "%1d",
2969  (n->key->id == DCH_Y ?
2970  ADJUST_YEAR(tm->tm_year, is_interval) :
2972  tm->tm_mon,
2973  tm->tm_mday),
2974  is_interval)) % 10);
2975  if (S_THth(n->suffix))
2976  str_numth(s, s, S_TH_TYPE(n->suffix));
2977  s += strlen(s);
2978  break;
2979  case DCH_RM:
2980  if (!tm->tm_mon)
2981  break;
2982  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
2984  s += strlen(s);
2985  break;
2986  case DCH_rm:
2987  if (!tm->tm_mon)
2988  break;
2989  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
2991  s += strlen(s);
2992  break;
2993  case DCH_W:
2994  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
2995  if (S_THth(n->suffix))
2996  str_numth(s, s, S_TH_TYPE(n->suffix));
2997  s += strlen(s);
2998  break;
2999  case DCH_J:
3000  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3001  if (S_THth(n->suffix))
3002  str_numth(s, s, S_TH_TYPE(n->suffix));
3003  s += strlen(s);
3004  break;
3005  }
3006  }
3007 
3008  *s = '\0';
3009 }
3010 
3011 /* ----------
3012  * Process a string as denoted by a list of FormatNodes.
3013  * The TmFromChar struct pointed to by 'out' is populated with the results.
3014  *
3015  * Note: we currently don't have any to_interval() function, so there
3016  * is no need here for INVALID_FOR_INTERVAL checks.
3017  * ----------
3018  */
3019 static void
3020 DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
3021 {
3022  FormatNode *n;
3023  char *s;
3024  int len,
3025  value;
3026  bool fx_mode = false;
3027 
3028  /* number of extra skipped characters (more than given in format string) */
3029  int extra_skip = 0;
3030 
3031  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3032  {
3033  /*
3034  * Ignore spaces at the beginning of the string and before fields when
3035  * not in FX (fixed width) mode.
3036  */
3037  if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3038  (n->type == NODE_TYPE_ACTION || n == node))
3039  {
3040  while (*s != '\0' && isspace((unsigned char) *s))
3041  {
3042  s++;
3043  extra_skip++;
3044  }
3045  }
3046 
3047  if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3048  {
3049  if (!fx_mode)
3050  {
3051  /*
3052  * In non FX (fixed format) mode one format string space or
3053  * separator match to one space or separator in input string.
3054  * Or match nothing if there is no space or separator in the
3055  * current position of input string.
3056  */
3057  extra_skip--;
3058  if (isspace((unsigned char) *s) || is_separator_char(s))
3059  {
3060  s++;
3061  extra_skip++;
3062  }
3063  }
3064  else
3065  {
3066  /*
3067  * In FX mode, on format string space or separator we consume
3068  * exactly one character from input string. Notice we don't
3069  * insist that the consumed character match the format's
3070  * character.
3071  */
3072  s += pg_mblen(s);
3073  }
3074  continue;
3075  }
3076  else if (n->type != NODE_TYPE_ACTION)
3077  {
3078  /*
3079  * Text character, so consume one character from input string.
3080  * Notice we don't insist that the consumed character match the
3081  * format's character.
3082  */
3083  if (!fx_mode)
3084  {
3085  /*
3086  * In non FX mode we might have skipped some extra characters
3087  * (more than specified in format string) before. In this
3088  * case we don't skip input string character, because it might
3089  * be part of field.
3090  */
3091  if (extra_skip > 0)
3092  extra_skip--;
3093  else
3094  s += pg_mblen(s);
3095  }
3096  else
3097  {
3098  s += pg_mblen(s);
3099  }
3100  continue;
3101  }
3102 
3103  from_char_set_mode(out, n->key->date_mode);
3104 
3105  switch (n->key->id)
3106  {
3107  case DCH_FX:
3108  fx_mode = true;
3109  break;
3110  case DCH_A_M:
3111  case DCH_P_M:
3112  case DCH_a_m:
3113  case DCH_p_m:
3115  ALL_UPPER, n->key->len, n);
3116  from_char_set_int(&out->pm, value % 2, n);
3117  out->clock = CLOCK_12_HOUR;
3118  break;
3119  case DCH_AM:
3120  case DCH_PM:
3121  case DCH_am:
3122  case DCH_pm:
3123  from_char_seq_search(&value, &s, ampm_strings,
3124  ALL_UPPER, n->key->len, n);
3125  from_char_set_int(&out->pm, value % 2, n);
3126  out->clock = CLOCK_12_HOUR;
3127  break;
3128  case DCH_HH:
3129  case DCH_HH12:
3130  from_char_parse_int_len(&out->hh, &s, 2, n);
3131  out->clock = CLOCK_12_HOUR;
3132  SKIP_THth(s, n->suffix);
3133  break;
3134  case DCH_HH24:
3135  from_char_parse_int_len(&out->hh, &s, 2, n);
3136  SKIP_THth(s, n->suffix);
3137  break;
3138  case DCH_MI:
3139  from_char_parse_int(&out->mi, &s, n);
3140  SKIP_THth(s, n->suffix);
3141  break;
3142  case DCH_SS:
3143  from_char_parse_int(&out->ss, &s, n);
3144  SKIP_THth(s, n->suffix);
3145  break;
3146  case DCH_MS: /* millisecond */
3147  len = from_char_parse_int_len(&out->ms, &s, 3, n);
3148 
3149  /*
3150  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3151  */
3152  out->ms *= len == 1 ? 100 :
3153  len == 2 ? 10 : 1;
3154 
3155  SKIP_THth(s, n->suffix);
3156  break;
3157  case DCH_US: /* microsecond */
3158  len = from_char_parse_int_len(&out->us, &s, 6, n);
3159 
3160  out->us *= len == 1 ? 100000 :
3161  len == 2 ? 10000 :
3162  len == 3 ? 1000 :
3163  len == 4 ? 100 :
3164  len == 5 ? 10 : 1;
3165 
3166  SKIP_THth(s, n->suffix);
3167  break;
3168  case DCH_SSSS:
3169  from_char_parse_int(&out->ssss, &s, n);
3170  SKIP_THth(s, n->suffix);
3171  break;
3172  case DCH_tz:
3173  case DCH_TZ:
3174  case DCH_OF:
3175  ereport(ERROR,
3176  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3177  errmsg("formatting field \"%s\" is only supported in to_char",
3178  n->key->name)));
3179  break;
3180  case DCH_TZH:
3181 
3182  /*
3183  * Value of TZH might be negative. And the issue is that we
3184  * might swallow minus sign as the separator. So, if we have
3185  * skipped more characters than specified in the format
3186  * string, then we consider prepending last skipped minus to
3187  * TZH.
3188  */
3189  if (*s == '+' || *s == '-' || *s == ' ')
3190  {
3191  out->tzsign = *s == '-' ? -1 : +1;
3192  s++;
3193  }
3194  else
3195  {
3196  if (extra_skip > 0 && *(s - 1) == '-')
3197  out->tzsign = -1;
3198  else
3199  out->tzsign = +1;
3200  }
3201 
3202  from_char_parse_int_len(&out->tzh, &s, 2, n);
3203  break;
3204  case DCH_TZM:
3205  /* assign positive timezone sign if TZH was not seen before */
3206  if (!out->tzsign)
3207  out->tzsign = +1;
3208  from_char_parse_int_len(&out->tzm, &s, 2, n);
3209  break;
3210  case DCH_A_D:
3211  case DCH_B_C:
3212  case DCH_a_d:
3213  case DCH_b_c:
3215  ALL_UPPER, n->key->len, n);
3216  from_char_set_int(&out->bc, value % 2, n);
3217  break;
3218  case DCH_AD:
3219  case DCH_BC:
3220  case DCH_ad:
3221  case DCH_bc:
3222  from_char_seq_search(&value, &s, adbc_strings,
3223  ALL_UPPER, n->key->len, n);
3224  from_char_set_int(&out->bc, value % 2, n);
3225  break;
3226  case DCH_MONTH:
3227  case DCH_Month:
3228  case DCH_month:
3230  MAX_MONTH_LEN, n);
3231  from_char_set_int(&out->mm, value + 1, n);
3232  break;
3233  case DCH_MON:
3234  case DCH_Mon:
3235  case DCH_mon:
3236  from_char_seq_search(&value, &s, months, ONE_UPPER,
3237  MAX_MON_LEN, n);
3238  from_char_set_int(&out->mm, value + 1, n);
3239  break;
3240  case DCH_MM:
3241  from_char_parse_int(&out->mm, &s, n);
3242  SKIP_THth(s, n->suffix);
3243  break;
3244  case DCH_DAY:
3245  case DCH_Day:
3246  case DCH_day:
3247  from_char_seq_search(&value, &s, days, ONE_UPPER,
3248  MAX_DAY_LEN, n);
3249  from_char_set_int(&out->d, value, n);
3250  out->d++;
3251  break;
3252  case DCH_DY:
3253  case DCH_Dy:
3254  case DCH_dy:
3255  from_char_seq_search(&value, &s, days, ONE_UPPER,
3256  MAX_DY_LEN, n);
3257  from_char_set_int(&out->d, value, n);
3258  out->d++;
3259  break;
3260  case DCH_DDD:
3261  from_char_parse_int(&out->ddd, &s, n);
3262  SKIP_THth(s, n->suffix);
3263  break;
3264  case DCH_IDDD:
3265  from_char_parse_int_len(&out->ddd, &s, 3, n);
3266  SKIP_THth(s, n->suffix);
3267  break;
3268  case DCH_DD:
3269  from_char_parse_int(&out->dd, &s, n);
3270  SKIP_THth(s, n->suffix);
3271  break;
3272  case DCH_D:
3273  from_char_parse_int(&out->d, &s, n);
3274  SKIP_THth(s, n->suffix);
3275  break;
3276  case DCH_ID:
3277  from_char_parse_int_len(&out->d, &s, 1, n);
3278  /* Shift numbering to match Gregorian where Sunday = 1 */
3279  if (++out->d > 7)
3280  out->d = 1;
3281  SKIP_THth(s, n->suffix);
3282  break;
3283  case DCH_WW:
3284  case DCH_IW:
3285  from_char_parse_int(&out->ww, &s, n);
3286  SKIP_THth(s, n->suffix);
3287  break;
3288  case DCH_Q:
3289 
3290  /*
3291  * We ignore 'Q' when converting to date because it is unclear
3292  * which date in the quarter to use, and some people specify
3293  * both quarter and month, so if it was honored it might
3294  * conflict with the supplied month. That is also why we don't
3295  * throw an error.
3296  *
3297  * We still parse the source string for an integer, but it
3298  * isn't stored anywhere in 'out'.
3299  */
3300  from_char_parse_int((int *) NULL, &s, n);
3301  SKIP_THth(s, n->suffix);
3302  break;
3303  case DCH_CC:
3304  from_char_parse_int(&out->cc, &s, n);
3305  SKIP_THth(s, n->suffix);
3306  break;
3307  case DCH_Y_YYY:
3308  {
3309  int matched,
3310  years,
3311  millennia,
3312  nch;
3313 
3314  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3315  if (matched < 2)
3316  ereport(ERROR,
3317  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3318  errmsg("invalid input string for \"Y,YYY\"")));
3319  years += (millennia * 1000);
3320  from_char_set_int(&out->year, years, n);
3321  out->yysz = 4;
3322  s += nch;
3323  SKIP_THth(s, n->suffix);
3324  }
3325  break;
3326  case DCH_YYYY:
3327  case DCH_IYYY:
3328  from_char_parse_int(&out->year, &s, n);
3329  out->yysz = 4;
3330  SKIP_THth(s, n->suffix);
3331  break;
3332  case DCH_YYY:
3333  case DCH_IYY:
3334  if (from_char_parse_int(&out->year, &s, n) < 4)
3335  out->year = adjust_partial_year_to_2020(out->year);
3336  out->yysz = 3;
3337  SKIP_THth(s, n->suffix);
3338  break;
3339  case DCH_YY:
3340  case DCH_IY:
3341  if (from_char_parse_int(&out->year, &s, n) < 4)
3342  out->year = adjust_partial_year_to_2020(out->year);
3343  out->yysz = 2;
3344  SKIP_THth(s, n->suffix);
3345  break;
3346  case DCH_Y:
3347  case DCH_I:
3348  if (from_char_parse_int(&out->year, &s, n) < 4)
3349  out->year = adjust_partial_year_to_2020(out->year);
3350  out->yysz = 1;
3351  SKIP_THth(s, n->suffix);
3352  break;
3353  case DCH_RM:
3355  ALL_UPPER, MAX_RM_LEN, n);
3356  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n);
3357  break;
3358  case DCH_rm:
3360  ALL_LOWER, MAX_RM_LEN, n);
3361  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n);
3362  break;
3363  case DCH_W:
3364  from_char_parse_int(&out->w, &s, n);
3365  SKIP_THth(s, n->suffix);
3366  break;
3367  case DCH_J:
3368  from_char_parse_int(&out->j, &s, n);
3369  SKIP_THth(s, n->suffix);
3370  break;
3371  }
3372 
3373  /* Ignore all spaces after fields */
3374  if (!fx_mode)
3375  {
3376  extra_skip = 0;
3377  while (*s != '\0' && isspace((unsigned char) *s))
3378  {
3379  s++;
3380  extra_skip++;
3381  }
3382  }
3383  }
3384 }
3385 
3386 /*
3387  * The invariant for DCH cache entry management is that DCHCounter is equal
3388  * to the maximum age value among the existing entries, and we increment it
3389  * whenever an access occurs. If we approach overflow, deal with that by
3390  * halving all the age values, so that we retain a fairly accurate idea of
3391  * which entries are oldest.
3392  */
3393 static inline void
3395 {
3396  if (DCHCounter >= (INT_MAX - 1))
3397  {
3398  for (int i = 0; i < n_DCHCache; i++)
3399  DCHCache[i]->age >>= 1;
3400  DCHCounter >>= 1;
3401  }
3402 }
3403 
3404 /* select a DCHCacheEntry to hold the given format picture */
3405 static DCHCacheEntry *
3406 DCH_cache_getnew(const char *str)
3407 {
3408  DCHCacheEntry *ent;
3409 
3410  /* Ensure we can advance DCHCounter below */
3412 
3413  /*
3414  * If cache is full, remove oldest entry (or recycle first not-valid one)
3415  */
3417  {
3418  DCHCacheEntry *old = DCHCache[0];
3419 
3420 #ifdef DEBUG_TO_FROM_CHAR
3421  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3422 #endif
3423  if (old->valid)
3424  {
3425  for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3426  {
3427  ent = DCHCache[i];
3428  if (!ent->valid)
3429  {
3430  old = ent;
3431  break;
3432  }
3433  if (ent->age < old->age)
3434  old = ent;
3435  }
3436  }
3437 #ifdef DEBUG_TO_FROM_CHAR
3438  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3439 #endif
3440  old->valid = false;
3441  StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
3442  old->age = (++DCHCounter);
3443  /* caller is expected to fill format, then set valid */
3444  return old;
3445  }
3446  else
3447  {
3448 #ifdef DEBUG_TO_FROM_CHAR
3449  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3450 #endif
3451  Assert(DCHCache[n_DCHCache] == NULL);
3452  DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3454  ent->valid = false;
3455  StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3456  ent->age = (++DCHCounter);
3457  /* caller is expected to fill format, then set valid */
3458  ++n_DCHCache;
3459  return ent;
3460  }
3461 }
3462 
3463 /* look for an existing DCHCacheEntry matching the given format picture */
3464 static DCHCacheEntry *
3465 DCH_cache_search(const char *str)
3466 {
3467  /* Ensure we can advance DCHCounter below */
3469 
3470  for (int i = 0; i < n_DCHCache; i++)
3471  {
3472  DCHCacheEntry *ent = DCHCache[i];
3473 
3474  if (ent->valid && strcmp(ent->str, str) == 0)
3475  {
3476  ent->age = (++DCHCounter);
3477  return ent;
3478  }
3479  }
3480 
3481  return NULL;
3482 }
3483 
3484 /* Find or create a DCHCacheEntry for the given format picture */
3485 static DCHCacheEntry *
3486 DCH_cache_fetch(const char *str)
3487 {
3488  DCHCacheEntry *ent;
3489 
3490  if ((ent = DCH_cache_search(str)) == NULL)
3491  {
3492  /*
3493  * Not in the cache, must run parser and save a new format-picture to
3494  * the cache. Do not mark the cache entry valid until parsing
3495  * succeeds.
3496  */
3497  ent = DCH_cache_getnew(str);
3498 
3499  parse_format(ent->format, str, DCH_keywords,
3500  DCH_suff, DCH_index, DCH_TYPE, NULL);
3501 
3502  ent->valid = true;
3503  }
3504  return ent;
3505 }
3506 
3507 /*
3508  * Format a date/time or interval into a string according to fmt.
3509  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
3510  * for formatting.
3511  */
3512 static text *
3513 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
3514 {
3515  FormatNode *format;
3516  char *fmt_str,
3517  *result;
3518  bool incache;
3519  int fmt_len;
3520  text *res;
3521 
3522  /*
3523  * Convert fmt to C string
3524  */
3525  fmt_str = text_to_cstring(fmt);
3526  fmt_len = strlen(fmt_str);
3527 
3528  /*
3529  * Allocate workspace for result as C string
3530  */
3531  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
3532  *result = '\0';
3533 
3534  if (fmt_len > DCH_CACHE_SIZE)
3535  {
3536  /*
3537  * Allocate new memory if format picture is bigger than static cache
3538  * and do not use cache (call parser always)
3539  */
3540  incache = false;
3541 
3542  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3543 
3544  parse_format(format, fmt_str, DCH_keywords,
3545  DCH_suff, DCH_index, DCH_TYPE, NULL);
3546  }
3547  else
3548  {
3549  /*
3550  * Use cache buffers
3551  */
3552  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3553 
3554  incache = true;
3555  format = ent->format;
3556  }
3557 
3558  /* The real work is here */
3559  DCH_to_char(format, is_interval, tmtc, result, collid);
3560 
3561  if (!incache)
3562  pfree(format);
3563 
3564  pfree(fmt_str);
3565 
3566  /* convert C-string result to TEXT format */
3567  res = cstring_to_text(result);
3568 
3569  pfree(result);
3570  return res;
3571 }
3572 
3573 /****************************************************************************
3574  * Public routines
3575  ***************************************************************************/
3576 
3577 /* -------------------
3578  * TIMESTAMP to_char()
3579  * -------------------
3580  */
3581 Datum
3583 {
3585  text *fmt = PG_GETARG_TEXT_PP(1),
3586  *res;
3587  TmToChar tmtc;
3588  struct pg_tm *tm;
3589  int thisdate;
3590 
3591  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3592  PG_RETURN_NULL();
3593 
3594  ZERO_tmtc(&tmtc);
3595  tm = tmtcTm(&tmtc);
3596 
3597  if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
3598  ereport(ERROR,
3599  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3600  errmsg("timestamp out of range")));
3601 
3602  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3603  tm->tm_wday = (thisdate + 1) % 7;
3604  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3605 
3606  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3607  PG_RETURN_NULL();
3608 
3609  PG_RETURN_TEXT_P(res);
3610 }
3611 
3612 Datum
3614 {
3616  text *fmt = PG_GETARG_TEXT_PP(1),
3617  *res;
3618  TmToChar tmtc;
3619  int tz;
3620  struct pg_tm *tm;
3621  int thisdate;
3622 
3623  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3624  PG_RETURN_NULL();
3625 
3626  ZERO_tmtc(&tmtc);
3627  tm = tmtcTm(&tmtc);
3628 
3629  if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
3630  ereport(ERROR,
3631  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3632  errmsg("timestamp out of range")));
3633 
3634  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3635  tm->tm_wday = (thisdate + 1) % 7;
3636  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3637 
3638  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3639  PG_RETURN_NULL();
3640 
3641  PG_RETURN_TEXT_P(res);
3642 }
3643 
3644 
3645 /* -------------------
3646  * INTERVAL to_char()
3647  * -------------------
3648  */
3649 Datum
3651 {
3652  Interval *it = PG_GETARG_INTERVAL_P(0);
3653  text *fmt = PG_GETARG_TEXT_PP(1),
3654  *res;
3655  TmToChar tmtc;
3656  struct pg_tm *tm;
3657 
3658  if (VARSIZE_ANY_EXHDR(fmt) <= 0)
3659  PG_RETURN_NULL();
3660 
3661  ZERO_tmtc(&tmtc);
3662  tm = tmtcTm(&tmtc);
3663 
3664  if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
3665  PG_RETURN_NULL();
3666 
3667  /* wday is meaningless, yday approximates the total span in days */
3668  tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
3669 
3670  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
3671  PG_RETURN_NULL();
3672 
3673  PG_RETURN_TEXT_P(res);
3674 }
3675 
3676 /* ---------------------
3677  * TO_TIMESTAMP()
3678  *
3679  * Make Timestamp from date_str which is formatted at argument 'fmt'
3680  * ( to_timestamp is reverse to_char() )
3681  * ---------------------
3682  */
3683 Datum
3685 {
3686  text *date_txt = PG_GETARG_TEXT_PP(0);
3687  text *fmt = PG_GETARG_TEXT_PP(1);
3688  Timestamp result;
3689  int tz;
3690  struct pg_tm tm;
3691  fsec_t fsec;
3692 
3693  do_to_timestamp(date_txt, fmt, &tm, &fsec);
3694 
3695  /* Use the specified time zone, if any. */
3696  if (tm.tm_zone)
3697  {
3698  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
3699 
3700  if (dterr)
3701  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
3702  }
3703  else
3705 
3706  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
3707  ereport(ERROR,
3708  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3709  errmsg("timestamp out of range")));
3710 
3711  PG_RETURN_TIMESTAMP(result);
3712 }
3713 
3714 /* ----------
3715  * TO_DATE
3716  * Make Date from date_str which is formatted at argument 'fmt'
3717  * ----------
3718  */
3719 Datum
3721 {
3722  text *date_txt = PG_GETARG_TEXT_PP(0);
3723  text *fmt = PG_GETARG_TEXT_PP(1);
3724  DateADT result;
3725  struct pg_tm tm;
3726  fsec_t fsec;
3727 
3728  do_to_timestamp(date_txt, fmt, &tm, &fsec);
3729 
3730  /* Prevent overflow in Julian-day routines */
3731  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
3732  ereport(ERROR,
3733  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3734  errmsg("date out of range: \"%s\"",
3735  text_to_cstring(date_txt))));
3736 
3737  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
3738 
3739  /* Now check for just-out-of-range dates */
3740  if (!IS_VALID_DATE(result))
3741  ereport(ERROR,
3742  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3743  errmsg("date out of range: \"%s\"",
3744  text_to_cstring(date_txt))));
3745 
3746  PG_RETURN_DATEADT(result);
3747 }
3748 
3749 /*
3750  * do_to_timestamp: shared code for to_timestamp and to_date
3751  *
3752  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm
3753  * and fractional seconds.
3754  *
3755  * We parse 'fmt' into a list of FormatNodes, which is then passed to
3756  * DCH_from_char to populate a TmFromChar with the parsed contents of
3757  * 'date_txt'.
3758  *
3759  * The TmFromChar is then analysed and converted into the final results in
3760  * struct 'tm' and 'fsec'.
3761  */
3762 static void
3763 do_to_timestamp(text *date_txt, text *fmt,
3764  struct pg_tm *tm, fsec_t *fsec)
3765 {
3766  FormatNode *format;
3767  TmFromChar tmfc;
3768  int fmt_len;
3769  char *date_str;
3770  int fmask;
3771 
3772  date_str = text_to_cstring(date_txt);
3773 
3774  ZERO_tmfc(&tmfc);
3775  ZERO_tm(tm);
3776  *fsec = 0;
3777  fmask = 0; /* bit mask for ValidateDate() */
3778 
3779  fmt_len = VARSIZE_ANY_EXHDR(fmt);
3780 
3781  if (fmt_len)
3782  {
3783  char *fmt_str;
3784  bool incache;
3785 
3786  fmt_str = text_to_cstring(fmt);
3787 
3788  if (fmt_len > DCH_CACHE_SIZE)
3789  {
3790  /*
3791  * Allocate new memory if format picture is bigger than static
3792  * cache and do not use cache (call parser always)
3793  */
3794  incache = false;
3795 
3796  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3797 
3798  parse_format(format, fmt_str, DCH_keywords,
3799  DCH_suff, DCH_index, DCH_TYPE, NULL);
3800  }
3801  else
3802  {
3803  /*
3804  * Use cache buffers
3805  */
3806  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3807 
3808  incache = true;
3809  format = ent->format;
3810  }
3811 
3812 #ifdef DEBUG_TO_FROM_CHAR
3813  /* dump_node(format, fmt_len); */
3814  /* dump_index(DCH_keywords, DCH_index); */
3815 #endif
3816 
3817  DCH_from_char(format, date_str, &tmfc);
3818 
3819  pfree(fmt_str);
3820  if (!incache)
3821  pfree(format);
3822  }
3823 
3824  DEBUG_TMFC(&tmfc);
3825 
3826  /*
3827  * Convert to_date/to_timestamp input fields to standard 'tm'
3828  */
3829  if (tmfc.ssss)
3830  {
3831  int x = tmfc.ssss;
3832 
3833  tm->tm_hour = x / SECS_PER_HOUR;
3834  x %= SECS_PER_HOUR;
3835  tm->tm_min = x / SECS_PER_MINUTE;
3836  x %= SECS_PER_MINUTE;
3837  tm->tm_sec = x;
3838  }
3839 
3840  if (tmfc.ss)
3841  tm->tm_sec = tmfc.ss;
3842  if (tmfc.mi)
3843  tm->tm_min = tmfc.mi;
3844  if (tmfc.hh)
3845  tm->tm_hour = tmfc.hh;
3846 
3847  if (tmfc.clock == CLOCK_12_HOUR)
3848  {
3849  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
3850  ereport(ERROR,
3851  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3852  errmsg("hour \"%d\" is invalid for the 12-hour clock",
3853  tm->tm_hour),
3854  errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
3855 
3856  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
3857  tm->tm_hour += HOURS_PER_DAY / 2;
3858  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
3859  tm->tm_hour = 0;
3860  }
3861 
3862  if (tmfc.year)
3863  {
3864  /*
3865  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
3866  * the year in the given century. Keep in mind that the 21st century
3867  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
3868  * 600BC to 501BC.
3869  */
3870  if (tmfc.cc && tmfc.yysz <= 2)
3871  {
3872  if (tmfc.bc)
3873  tmfc.cc = -tmfc.cc;
3874  tm->tm_year = tmfc.year % 100;
3875  if (tm->tm_year)
3876  {
3877  if (tmfc.cc >= 0)
3878  tm->tm_year += (tmfc.cc - 1) * 100;
3879  else
3880  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
3881  }
3882  else
3883  {
3884  /* find century year for dates ending in "00" */
3885  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
3886  }
3887  }
3888  else
3889  {
3890  /* If a 4-digit year is provided, we use that and ignore CC. */
3891  tm->tm_year = tmfc.year;
3892  if (tmfc.bc && tm->tm_year > 0)
3893  tm->tm_year = -(tm->tm_year - 1);
3894  }
3895  fmask |= DTK_M(YEAR);
3896  }
3897  else if (tmfc.cc)
3898  {
3899  /* use first year of century */
3900  if (tmfc.bc)
3901  tmfc.cc = -tmfc.cc;
3902  if (tmfc.cc >= 0)
3903  /* +1 because 21st century started in 2001 */
3904  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
3905  else
3906  /* +1 because year == 599 is 600 BC */
3907  tm->tm_year = tmfc.cc * 100 + 1;
3908  fmask |= DTK_M(YEAR);
3909  }
3910 
3911  if (tmfc.j)
3912  {
3913  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3914  fmask |= DTK_DATE_M;
3915  }
3916 
3917  if (tmfc.ww)
3918  {
3919  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3920  {
3921  /*
3922  * If tmfc.d is not set, then the date is left at the beginning of
3923  * the ISO week (Monday).
3924  */
3925  if (tmfc.d)
3926  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3927  else
3928  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3929  fmask |= DTK_DATE_M;
3930  }
3931  else
3932  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
3933  }
3934 
3935  if (tmfc.w)
3936  tmfc.dd = (tmfc.w - 1) * 7 + 1;
3937  if (tmfc.dd)
3938  {
3939  tm->tm_mday = tmfc.dd;
3940  fmask |= DTK_M(DAY);
3941  }
3942  if (tmfc.mm)
3943  {
3944  tm->tm_mon = tmfc.mm;
3945  fmask |= DTK_M(MONTH);
3946  }
3947 
3948  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
3949  {
3950  /*
3951  * The month and day field have not been set, so we use the
3952  * day-of-year field to populate them. Depending on the date mode,
3953  * this field may be interpreted as a Gregorian day-of-year, or an ISO
3954  * week date day-of-year.
3955  */
3956 
3957  if (!tm->tm_year && !tmfc.bc)
3958  ereport(ERROR,
3959  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3960  errmsg("cannot calculate day of year without year information")));
3961 
3962  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3963  {
3964  int j0; /* zeroth day of the ISO year, in Julian */
3965 
3966  j0 = isoweek2j(tm->tm_year, 1) - 1;
3967 
3968  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3969  fmask |= DTK_DATE_M;
3970  }
3971  else
3972  {
3973  const int *y;
3974  int i;
3975 
3976  static const int ysum[2][13] = {
3977  {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
3978  {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
3979 
3980  y = ysum[isleap(tm->tm_year)];
3981 
3982  for (i = 1; i <= MONTHS_PER_YEAR; i++)
3983  {
3984  if (tmfc.ddd <= y[i])
3985  break;
3986  }
3987  if (tm->tm_mon <= 1)
3988  tm->tm_mon = i;
3989 
3990  if (tm->tm_mday <= 1)
3991  tm->tm_mday = tmfc.ddd - y[i - 1];
3992 
3993  fmask |= DTK_M(MONTH) | DTK_M(DAY);
3994  }
3995  }
3996 
3997  if (tmfc.ms)
3998  *fsec += tmfc.ms * 1000;
3999  if (tmfc.us)
4000  *fsec += tmfc.us;
4001 
4002  /* Range-check date fields according to bit mask computed above */
4003  if (fmask != 0)
4004  {
4005  /* We already dealt with AD/BC, so pass isjulian = true */
4006  int dterr = ValidateDate(fmask, true, false, false, tm);
4007 
4008  if (dterr != 0)
4009  {
4010  /*
4011  * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4012  * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4013  * irrelevant hint about datestyle.
4014  */
4015  DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
4016  }
4017  }
4018 
4019  /* Range-check time fields too */
4020  if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4021  tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4022  tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4023  *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4024  DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
4025 
4026  /* Save parsed time-zone into tm->tm_zone if it was specified */
4027  if (tmfc.tzsign)
4028  {
4029  char *tz;
4030 
4031  if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4032  tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4033  DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp");
4034 
4035  tz = psprintf("%c%02d:%02d",
4036  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4037 
4038  tm->tm_zone = tz;
4039  }
4040 
4041  DEBUG_TM(tm);
4042 
4043  pfree(date_str);
4044 }
4045 
4046 
4047 /**********************************************************************
4048  * the NUMBER version part
4049  *********************************************************************/
4050 
4051 
4052 static char *
4053 fill_str(char *str, int c, int max)
4054 {
4055  memset(str, c, max);
4056  *(str + max) = '\0';
4057  return str;
4058 }
4059 
4060 #define zeroize_NUM(_n) \
4061 do { \
4062  (_n)->flag = 0; \
4063  (_n)->lsign = 0; \
4064  (_n)->pre = 0; \
4065  (_n)->post = 0; \
4066  (_n)->pre_lsign_num = 0; \
4067  (_n)->need_locale = 0; \
4068  (_n)->multi = 0; \
4069  (_n)->zero_start = 0; \
4070  (_n)->zero_end = 0; \
4071 } while(0)
4072 
4073 /* This works the same as DCH_prevent_counter_overflow */
4074 static inline void
4076 {
4077  if (NUMCounter >= (INT_MAX - 1))
4078  {
4079  for (int i = 0; i < n_NUMCache; i++)
4080  NUMCache[i]->age >>= 1;
4081  NUMCounter >>= 1;
4082  }
4083 }
4084 
4085 /* select a NUMCacheEntry to hold the given format picture */
4086 static NUMCacheEntry *
4087 NUM_cache_getnew(const char *str)
4088 {
4089  NUMCacheEntry *ent;
4090 
4091  /* Ensure we can advance NUMCounter below */
4093 
4094  /*
4095  * If cache is full, remove oldest entry (or recycle first not-valid one)
4096  */
4098  {
4099  NUMCacheEntry *old = NUMCache[0];
4100 
4101 #ifdef DEBUG_TO_FROM_CHAR
4102  elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4103 #endif
4104  if (old->valid)
4105  {
4106  for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4107  {
4108  ent = NUMCache[i];
4109  if (!ent->valid)
4110  {
4111  old = ent;
4112  break;
4113  }
4114  if (ent->age < old->age)
4115  old = ent;
4116  }
4117  }
4118 #ifdef DEBUG_TO_FROM_CHAR
4119  elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4120 #endif
4121  old->valid = false;
4122  StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
4123  old->age = (++NUMCounter);
4124  /* caller is expected to fill format and Num, then set valid */
4125  return old;
4126  }
4127  else
4128  {
4129 #ifdef DEBUG_TO_FROM_CHAR
4130  elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4131 #endif
4132  Assert(NUMCache[n_NUMCache] == NULL);
4133  NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4135  ent->valid = false;
4136  StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
4137  ent->age = (++NUMCounter);
4138  /* caller is expected to fill format and Num, then set valid */
4139  ++n_NUMCache;
4140  return ent;
4141  }
4142 }
4143 
4144 /* look for an existing NUMCacheEntry matching the given format picture */
4145 static NUMCacheEntry *
4146 NUM_cache_search(const char *str)
4147 {
4148  /* Ensure we can advance NUMCounter below */
4150 
4151  for (int i = 0; i < n_NUMCache; i++)
4152  {
4153  NUMCacheEntry *ent = NUMCache[i];
4154 
4155  if (ent->valid && strcmp(ent->str, str) == 0)
4156  {
4157  ent->age = (++NUMCounter);
4158  return ent;
4159  }
4160  }
4161 
4162  return NULL;
4163 }
4164 
4165 /* Find or create a NUMCacheEntry for the given format picture */
4166 static NUMCacheEntry *
4167 NUM_cache_fetch(const char *str)
4168 {
4169  NUMCacheEntry *ent;
4170 
4171  if ((ent = NUM_cache_search(str)) == NULL)
4172  {
4173  /*
4174  * Not in the cache, must run parser and save a new format-picture to
4175  * the cache. Do not mark the cache entry valid until parsing
4176  * succeeds.
4177  */
4178  ent = NUM_cache_getnew(str);
4179 
4180  zeroize_NUM(&ent->Num);
4181 
4182  parse_format(ent->format, str, NUM_keywords,
4183  NULL, NUM_index, NUM_TYPE, &ent->Num);
4184 
4185  ent->valid = true;
4186  }
4187  return ent;
4188 }
4189 
4190 /* ----------
4191  * Cache routine for NUM to_char version
4192  * ----------
4193  */
4194 static FormatNode *
4195 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4196 {
4197  FormatNode *format = NULL;
4198  char *str;
4199 
4200  str = text_to_cstring(pars_str);
4201 
4202  if (len > NUM_CACHE_SIZE)
4203  {
4204  /*
4205  * Allocate new memory if format picture is bigger than static cache
4206  * and do not use cache (call parser always)
4207  */
4208  format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4209 
4210  *shouldFree = true;
4211 
4212  zeroize_NUM(Num);
4213 
4214  parse_format(format, str, NUM_keywords,
4215  NULL, NUM_index, NUM_TYPE, Num);
4216  }
4217  else
4218  {
4219  /*
4220  * Use cache buffers
4221  */
4222  NUMCacheEntry *ent = NUM_cache_fetch(str);
4223 
4224  *shouldFree = false;
4225 
4226  format = ent->format;
4227 
4228  /*
4229  * Copy cache to used struct
4230  */
4231  Num->flag = ent->Num.flag;
4232  Num->lsign = ent->Num.lsign;
4233  Num->pre = ent->Num.pre;
4234  Num->post = ent->Num.post;
4235  Num->pre_lsign_num = ent->Num.pre_lsign_num;
4236  Num->need_locale = ent->Num.need_locale;
4237  Num->multi = ent->Num.multi;
4238  Num->zero_start = ent->Num.zero_start;
4239  Num->zero_end = ent->Num.zero_end;
4240  }
4241 
4242 #ifdef DEBUG_TO_FROM_CHAR
4243  /* dump_node(format, len); */
4244  dump_index(NUM_keywords, NUM_index);
4245 #endif
4246 
4247  pfree(str);
4248  return format;
4249 }
4250 
4251 
4252 static char *
4253 int_to_roman(int number)
4254 {
4255  int len = 0,
4256  num = 0;
4257  char *p = NULL,
4258  *result,
4259  numstr[12];
4260 
4261  result = (char *) palloc(16);
4262  *result = '\0';
4263 
4264  if (number > 3999 || number < 1)
4265  {
4266  fill_str(result, '#', 15);
4267  return result;
4268  }
4269  len = snprintf(numstr, sizeof(numstr), "%d", number);
4270 
4271  for (p = numstr; *p != '\0'; p++, --len)
4272  {
4273  num = *p - 49; /* 48 ascii + 1 */
4274  if (num < 0)
4275  continue;
4276 
4277  if (len > 3)
4278  {
4279  while (num-- != -1)
4280  strcat(result, "M");
4281  }
4282  else
4283  {
4284  if (len == 3)
4285  strcat(result, rm100[num]);
4286  else if (len == 2)
4287  strcat(result, rm10[num]);
4288  else if (len == 1)
4289  strcat(result, rm1[num]);
4290  }
4291  }
4292  return result;
4293 }
4294 
4295 
4296 
4297 /* ----------
4298  * Locale
4299  * ----------
4300  */
4301 static void
4303 {
4304  if (Np->Num->need_locale)
4305  {
4306  struct lconv *lconv;
4307 
4308  /*
4309  * Get locales
4310  */
4311  lconv = PGLC_localeconv();
4312 
4313  /*
4314  * Positive / Negative number sign
4315  */
4316  if (lconv->negative_sign && *lconv->negative_sign)
4317  Np->L_negative_sign = lconv->negative_sign;
4318  else
4319  Np->L_negative_sign = "-";
4320 
4321  if (lconv->positive_sign && *lconv->positive_sign)
4322  Np->L_positive_sign = lconv->positive_sign;
4323  else
4324  Np->L_positive_sign = "+";
4325 
4326  /*
4327  * Number decimal point
4328  */
4329  if (lconv->decimal_point && *lconv->decimal_point)
4330  Np->decimal = lconv->decimal_point;
4331 
4332  else
4333  Np->decimal = ".";
4334 
4335  if (!IS_LDECIMAL(Np->Num))
4336  Np->decimal = ".";
4337 
4338  /*
4339  * Number thousands separator
4340  *
4341  * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
4342  * but "" for thousands_sep, so we set the thousands_sep too.
4343  * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
4344  */
4345  if (lconv->thousands_sep && *lconv->thousands_sep)
4346  Np->L_thousands_sep = lconv->thousands_sep;
4347  /* Make sure thousands separator doesn't match decimal point symbol. */
4348  else if (strcmp(Np->decimal, ",") !=0)
4349  Np->L_thousands_sep = ",";
4350  else
4351  Np->L_thousands_sep = ".";
4352 
4353  /*
4354  * Currency symbol
4355  */
4356  if (lconv->currency_symbol && *lconv->currency_symbol)
4357  Np->L_currency_symbol = lconv->currency_symbol;
4358  else
4359  Np->L_currency_symbol = " ";
4360  }
4361  else
4362  {
4363  /*
4364  * Default values
4365  */
4366  Np->L_negative_sign = "-";
4367  Np->L_positive_sign = "+";
4368  Np->decimal = ".";
4369 
4370  Np->L_thousands_sep = ",";
4371  Np->L_currency_symbol = " ";
4372  }
4373 }
4374 
4375 /* ----------
4376  * Return pointer of last relevant number after decimal point
4377  * 12.0500 --> last relevant is '5'
4378  * 12.0000 --> last relevant is '.'
4379  * If there is no decimal point, return NULL (which will result in same
4380  * behavior as if FM hadn't been specified).
4381  * ----------
4382  */
4383 static char *
4385 {
4386  char *result,
4387  *p = strchr(num, '.');
4388 
4389 #ifdef DEBUG_TO_FROM_CHAR
4390  elog(DEBUG_elog_output, "get_last_relevant_decnum()");
4391 #endif
4392 
4393  if (!p)
4394  return NULL;
4395 
4396  result = p;
4397 
4398  while (*(++p))
4399  {
4400  if (*p != '0')
4401  result = p;
4402  }
4403 
4404  return result;
4405 }
4406 
4407 /*
4408  * These macros are used in NUM_processor() and its subsidiary routines.
4409  * OVERLOAD_TEST: true if we've reached end of input string
4410  * AMOUNT_TEST(s): true if at least s bytes remain in string
4411  */
4412 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
4413 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
4414 
4415 /* ----------
4416  * Number extraction for TO_NUMBER()
4417  * ----------
4418  */
4419 static void
4420 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
4421 {
4422  bool isread = false;
4423 
4424 #ifdef DEBUG_TO_FROM_CHAR
4425  elog(DEBUG_elog_output, " --- scan start --- id=%s",
4426  (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
4427 #endif
4428 
4429  if (OVERLOAD_TEST)
4430  return;
4431 
4432  if (*Np->inout_p == ' ')
4433  Np->inout_p++;
4434 
4435  if (OVERLOAD_TEST)
4436  return;
4437 
4438  /*
4439  * read sign before number
4440  */
4441  if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
4442  (Np->read_pre + Np->read_post) == 0)
4443  {
4444 #ifdef DEBUG_TO_FROM_CHAR
4445  elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
4446  *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
4447 #endif
4448 
4449  /*
4450  * locale sign
4451  */
4452  if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
4453  {
4454  int x = 0;
4455 
4456 #ifdef DEBUG_TO_FROM_CHAR
4457  elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
4458 #endif
4459  if ((x = strlen(Np->L_negative_sign)) &&
4460  AMOUNT_TEST(x) &&
4461  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4462  {
4463  Np->inout_p += x;
4464  *Np->number = '-';
4465  }
4466  else if ((x = strlen(Np->L_positive_sign)) &&
4467  AMOUNT_TEST(x) &&
4468  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4469  {
4470  Np->inout_p += x;
4471  *Np->number = '+';
4472  }
4473  }
4474  else
4475  {
4476 #ifdef DEBUG_TO_FROM_CHAR
4477  elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
4478 #endif
4479 
4480  /*
4481  * simple + - < >
4482  */
4483  if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
4484  *Np->inout_p == '<'))
4485  {
4486  *Np->number = '-'; /* set - */
4487  Np->inout_p++;
4488  }
4489  else if (*Np->inout_p == '+')
4490  {
4491  *Np->number = '+'; /* set + */
4492  Np->inout_p++;
4493  }
4494  }
4495  }
4496 
4497  if (OVERLOAD_TEST)
4498  return;
4499 
4500 #ifdef DEBUG_TO_FROM_CHAR
4501  elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
4502 #endif
4503 
4504  /*
4505  * read digit or decimal point
4506  */
4507  if (isdigit((unsigned char) *Np->inout_p))
4508  {
4509  if (Np->read_dec && Np->read_post == Np->Num->post)
4510  return;
4511 
4512  *Np->number_p = *Np->inout_p;
4513  Np->number_p++;
4514 
4515  if (Np->read_dec)
4516  Np->read_post++;
4517  else
4518  Np->read_pre++;
4519 
4520  isread = true;
4521 
4522 #ifdef DEBUG_TO_FROM_CHAR
4523  elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
4524 #endif
4525  }
4526  else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
4527  {
4528  /*
4529  * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
4530  * Np->decimal is always just "." if we don't have a D format token.
4531  * So we just unconditionally match to Np->decimal.
4532  */
4533  int x = strlen(Np->decimal);
4534 
4535 #ifdef DEBUG_TO_FROM_CHAR
4536  elog(DEBUG_elog_output, "Try read decimal point (%c)",
4537  *Np->inout_p);
4538 #endif
4539  if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
4540  {
4541  Np->inout_p += x - 1;
4542  *Np->number_p = '.';
4543  Np->number_p++;
4544  Np->read_dec = true;
4545  isread = true;
4546  }
4547  }
4548 
4549  if (OVERLOAD_TEST)
4550  return;
4551 
4552  /*
4553  * Read sign behind "last" number
4554  *
4555  * We need sign detection because determine exact position of post-sign is
4556  * difficult:
4557  *
4558  * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
4559  * 5.01-
4560  */
4561  if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
4562  {
4563  /*
4564  * locale sign (NUM_S) is always anchored behind a last number, if: -
4565  * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
4566  * next char is not digit
4567  */
4568  if (IS_LSIGN(Np->Num) && isread &&
4569  (Np->inout_p + 1) < Np->inout + input_len &&
4570  !isdigit((unsigned char) *(Np->inout_p + 1)))
4571  {
4572  int x;
4573  char *tmp = Np->inout_p++;
4574 
4575 #ifdef DEBUG_TO_FROM_CHAR
4576  elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
4577 #endif
4578  if ((x = strlen(Np->L_negative_sign)) &&
4579  AMOUNT_TEST(x) &&
4580  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4581  {
4582  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4583  *Np->number = '-';
4584  }
4585  else if ((x = strlen(Np->L_positive_sign)) &&
4586  AMOUNT_TEST(x) &&
4587  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4588  {
4589  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4590  *Np->number = '+';
4591  }
4592  if (*Np->number == ' ')
4593  /* no sign read */
4594  Np->inout_p = tmp;
4595  }
4596 
4597  /*
4598  * try read non-locale sign, it's happen only if format is not exact
4599  * and we cannot determine sign position of MI/PL/SG, an example:
4600  *
4601  * FM9.999999MI -> 5.01-
4602  *
4603  * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
4604  * like to_number('1 -', '9S') where sign is not anchored to last
4605  * number.
4606  */
4607  else if (isread == false && IS_LSIGN(Np->Num) == false &&
4608  (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
4609  {
4610 #ifdef DEBUG_TO_FROM_CHAR
4611  elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
4612 #endif
4613 
4614  /*
4615  * simple + -
4616  */
4617  if (*Np->inout_p == '-' || *Np->inout_p == '+')
4618  /* NUM_processor() do inout_p++ */
4619  *Np->number = *Np->inout_p;
4620  }
4621  }
4622 }
4623 
4624 #define IS_PREDEC_SPACE(_n) \
4625  (IS_ZERO((_n)->Num)==false && \
4626  (_n)->number == (_n)->number_p && \
4627  *(_n)->number == '0' && \
4628  (_n)->Num->post != 0)
4629 
4630 /* ----------
4631  * Add digit or sign to number-string
4632  * ----------
4633  */
4634 static void
4636 {
4637  int end;
4638 
4639  if (IS_ROMAN(Np->Num))
4640  return;
4641 
4642  /* Note: in this elog() output not set '\0' in 'inout' */
4643 
4644 #ifdef DEBUG_TO_FROM_CHAR
4645 
4646  /*
4647  * Np->num_curr is number of current item in format-picture, it is not
4648  * current position in inout!
4649  */
4650  elog(DEBUG_elog_output,
4651  "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
4652  Np->sign_wrote,
4653  Np->num_curr,
4654  Np->number_p,
4655  Np->inout);
4656 #endif
4657  Np->num_in = false;
4658 
4659  /*
4660  * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
4661  * handle "9.9" --> " .1"
4662  */
4663  if (Np->sign_wrote == false &&
4664  (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
4665  (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
4666  {
4667  if (IS_LSIGN(Np->Num))
4668  {
4669  if (Np->Num->lsign == NUM_LSIGN_PRE)
4670  {
4671  if (Np->sign == '-')
4672  strcpy(Np->inout_p, Np->L_negative_sign);
4673  else
4674  strcpy(Np->inout_p, Np->L_positive_sign);
4675  Np->inout_p += strlen(Np->inout_p);
4676  Np->sign_wrote = true;
4677  }
4678  }
4679  else if (IS_BRACKET(Np->Num))
4680  {
4681  *Np->inout_p = Np->sign == '+' ? ' ' : '<';
4682  ++Np->inout_p;
4683  Np->sign_wrote = true;
4684  }
4685  else if (Np->sign == '+')
4686  {
4687  if (!IS_FILLMODE(Np->Num))
4688  {
4689  *Np->inout_p = ' '; /* Write + */
4690  ++Np->inout_p;
4691  }
4692  Np->sign_wrote = true;
4693  }
4694  else if (Np->sign == '-')
4695  { /* Write - */
4696  *Np->inout_p = '-';
4697  ++Np->inout_p;
4698  Np->sign_wrote = true;
4699  }
4700  }
4701 
4702 
4703  /*
4704  * digits / FM / Zero / Dec. point
4705  */
4706  if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
4707  {
4708  if (Np->num_curr < Np->out_pre_spaces &&
4709  (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
4710  {
4711  /*
4712  * Write blank space
4713  */
4714  if (!IS_FILLMODE(Np->Num))
4715  {
4716  *Np->inout_p = ' '; /* Write ' ' */
4717  ++Np->inout_p;
4718  }
4719  }
4720  else if (IS_ZERO(Np->Num) &&
4721  Np->num_curr < Np->out_pre_spaces &&
4722  Np->Num->zero_start <= Np->num_curr)
4723  {
4724  /*
4725  * Write ZERO
4726  */
4727  *Np->inout_p = '0'; /* Write '0' */
4728  ++Np->inout_p;
4729  Np->num_in = true;
4730  }
4731  else
4732  {
4733  /*
4734  * Write Decimal point
4735  */
4736  if (*Np->number_p == '.')
4737  {
4738  if (!Np->last_relevant || *Np->last_relevant != '.')
4739  {
4740  strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4741  Np->inout_p += strlen(Np->inout_p);
4742  }
4743 
4744  /*
4745  * Ora 'n' -- FM9.9 --> 'n.'
4746  */
4747  else if (IS_FILLMODE(Np->Num) &&
4748  Np->last_relevant && *Np->last_relevant == '.')
4749  {
4750  strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4751  Np->inout_p += strlen(Np->inout_p);
4752  }
4753  }
4754  else
4755  {
4756  /*
4757  * Write Digits
4758  */
4759  if (Np->last_relevant && Np->number_p > Np->last_relevant &&
4760  id != NUM_0)
4761  ;
4762 
4763  /*
4764  * '0.1' -- 9.9 --> ' .1'
4765  */
4766  else if (IS_PREDEC_SPACE(Np))
4767  {
4768  if (!IS_FILLMODE(Np->Num))
4769  {
4770  *Np->inout_p = ' ';
4771  ++Np->inout_p;
4772  }
4773 
4774  /*
4775  * '0' -- FM9.9 --> '0.'
4776  */
4777  else if (Np->last_relevant && *Np->last_relevant == '.')
4778  {
4779  *Np->inout_p = '0';
4780  ++Np->inout_p;
4781  }
4782  }
4783  else
4784  {
4785  *Np->inout_p = *Np->number_p; /* Write DIGIT */
4786  ++Np->inout_p;
4787  Np->num_in = true;
4788  }
4789  }
4790  /* do no exceed string length */
4791  if (*Np->number_p)
4792  ++Np->number_p;
4793  }
4794 
4795  end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
4796 
4797  if (Np->last_relevant && Np->last_relevant == Np->number_p)
4798  end = Np->num_curr;
4799 
4800  if (Np->num_curr + 1 == end)
4801  {
4802  if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
4803  {
4804  *Np->inout_p = Np->sign == '+' ? ' ' : '>';
4805  ++Np->inout_p;
4806  }
4807  else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
4808  {
4809  if (Np->sign == '-')
4810  strcpy(Np->inout_p, Np->L_negative_sign);
4811  else
4812  strcpy(Np->inout_p, Np->L_positive_sign);
4813  Np->inout_p += strlen(Np->inout_p);
4814  }
4815  }
4816  }
4817 
4818  ++Np->num_curr;
4819 }
4820 
4821 /*
4822  * Skip over "n" input characters, but only if they aren't numeric data
4823  */
4824 static void
4825 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
4826 {
4827  while (n-- > 0)
4828  {
4829  if (OVERLOAD_TEST)
4830  break; /* end of input */
4831  if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
4832  break; /* it's a data character */
4833  Np->inout_p += pg_mblen(Np->inout_p);
4834  }
4835 }
4836 
4837 static char *
4838 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
4839  char *number, int input_len, int to_char_out_pre_spaces,
4840  int sign, bool is_to_char, Oid collid)
4841 {
4842  FormatNode *n;
4843  NUMProc _Np,
4844  *Np = &_Np;
4845  const char *pattern;
4846  int pattern_len;
4847 
4848  MemSet(Np, 0, sizeof(NUMProc));
4849 
4850  Np->Num = Num;
4851  Np->is_to_char = is_to_char;
4852  Np->number = number;
4853  Np->inout = inout;
4854  Np->last_relevant = NULL;
4855  Np->read_post = 0;
4856  Np->read_pre = 0;
4857  Np->read_dec = false;
4858 
4859  if (Np->Num->zero_start)
4860  --Np->Num->zero_start;
4861 
4862  if (IS_EEEE(Np->Num))
4863  {
4864  if (!Np->is_to_char)
4865  ereport(ERROR,
4866  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4867  errmsg("\"EEEE\" not supported for input")));
4868  return strcpy(inout, number);
4869  }
4870 
4871  /*
4872  * Roman correction
4873  */
4874  if (IS_ROMAN(Np->Num))
4875  {
4876  if (!Np->is_to_char)
4877  ereport(ERROR,
4878  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4879  errmsg("\"RN\" not supported for input")));
4880 
4881  Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
4882  Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
4883 
4884  if (IS_FILLMODE(Np->Num))
4885  {
4886  Np->Num->flag = 0;
4887  Np->Num->flag |= NUM_F_FILLMODE;
4888  }
4889  else
4890  Np->Num->flag = 0;
4891  Np->Num->flag |= NUM_F_ROMAN;
4892  }
4893 
4894  /*
4895  * Sign
4896  */
4897  if (is_to_char)
4898  {
4899  Np->sign = sign;
4900 
4901  /* MI/PL/SG - write sign itself and not in number */
4902  if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
4903  {
4904  if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
4905  Np->sign_wrote = false; /* need sign */
4906  else
4907  Np->sign_wrote = true; /* needn't sign */
4908  }
4909  else
4910  {
4911  if (Np->sign != '-')
4912  {
4913  if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
4914  Np->Num->flag &= ~NUM_F_BRACKET;
4915  if (IS_MINUS(Np->Num))
4916  Np->Num->flag &= ~NUM_F_MINUS;
4917  }
4918  else if (Np->sign != '+' && IS_PLUS(Np->Num))
4919  Np->Num->flag &= ~NUM_F_PLUS;
4920 
4921  if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
4922  Np->sign_wrote = true; /* needn't sign */
4923  else
4924  Np->sign_wrote = false; /* need sign */
4925 
4926  if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
4927  Np->Num->lsign = NUM_LSIGN_POST;
4928  }
4929  }
4930  else
4931  Np->sign = false;
4932 
4933  /*
4934  * Count
4935  */
4936  Np->num_count = Np->Num->post + Np->Num->pre - 1;
4937 
4938  if (is_to_char)
4939  {
4940  Np->out_pre_spaces = to_char_out_pre_spaces;
4941 
4942  if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
4943  {
4945 
4946  /*
4947  * If any '0' specifiers are present, make sure we don't strip
4948  * those digits.
4949  */
4950  if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
4951  {
4952  char *last_zero;
4953 
4954  last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
4955  if (Np->last_relevant < last_zero)
4956  Np->last_relevant = last_zero;
4957  }
4958  }
4959 
4960  if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
4961  ++Np->num_count;
4962  }
4963  else
4964  {
4965  Np->out_pre_spaces = 0;
4966  *Np->number = ' '; /* sign space */
4967  *(Np->number + 1) = '\0';
4968  }
4969 
4970  Np->num_in = 0;
4971  Np->num_curr = 0;
4972 
4973 #ifdef DEBUG_TO_FROM_CHAR
4974  elog(DEBUG_elog_output,
4975  "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
4976  Np->sign,
4977  Np->number,
4978  Np->Num->pre,
4979  Np->Num->post,
4980  Np->num_count,
4981  Np->out_pre_spaces,
4982  Np->sign_wrote ? "Yes" : "No",
4983  IS_ZERO(Np->Num) ? "Yes" : "No",
4984  Np->Num->zero_start,
4985  Np->Num->zero_end,
4986  Np->last_relevant ? Np->last_relevant : "<not set>",
4987  IS_BRACKET(Np->Num) ? "Yes" : "No",
4988  IS_PLUS(Np->Num) ? "Yes" : "No",
4989  IS_MINUS(Np->Num) ? "Yes" : "No",
4990  IS_FILLMODE(Np->Num) ? "Yes" : "No",
4991  IS_ROMAN(Np->Num) ? "Yes" : "No",
4992  IS_EEEE(Np->Num) ? "Yes" : "No"
4993  );
4994 #endif
4995 
4996  /*
4997  * Locale
4998  */
4999  NUM_prepare_locale(Np);
5000 
5001  /*
5002  * Processor direct cycle
5003  */
5004  if (Np->is_to_char)
5005  Np->number_p = Np->number;
5006  else
5007  Np->number_p = Np->number + 1; /* first char is space for sign */
5008 
5009  for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
5010  {
5011  if (!Np->is_to_char)
5012  {
5013  /*
5014  * Check at least one byte remains to be scanned. (In actions
5015  * below, must use AMOUNT_TEST if we want to read more bytes than
5016  * that.)
5017  */
5018  if (OVERLOAD_TEST)
5019  break;
5020  }
5021 
5022  /*
5023  * Format pictures actions
5024  */
5025  if (n->type == NODE_TYPE_ACTION)
5026  {
5027  /*
5028  * Create/read digit/zero/blank/sign/special-case
5029  *
5030  * 'NUM_S' note: The locale sign is anchored to number and we
5031  * read/write it when we work with first or last number
5032  * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
5033  *
5034  * Notice the "Np->inout_p++" at the bottom of the loop. This is
5035  * why most of the actions advance inout_p one less than you might
5036  * expect. In cases where we don't want that increment to happen,
5037  * a switch case ends with "continue" not "break".
5038  */
5039  switch (n->key->id)
5040  {
5041  case NUM_9:
5042  case NUM_0:
5043  case NUM_DEC:
5044  case NUM_D:
5045  if (Np->is_to_char)
5046  {
5047  NUM_numpart_to_char(Np, n->key->id);
5048  continue; /* for() */
5049  }
5050  else
5051  {
5052  NUM_numpart_from_char(Np, n->key->id, input_len);
5053  break; /* switch() case: */
5054  }
5055 
5056  case NUM_COMMA:
5057  if (Np->is_to_char)
5058  {
5059  if (!Np->num_in)
5060  {
5061  if (IS_FILLMODE(Np->Num))
5062  continue;
5063  else
5064  *Np->inout_p = ' ';
5065  }
5066  else
5067  *Np->inout_p = ',';
5068  }
5069  else
5070  {
5071  if (!Np->num_in)
5072  {
5073  if (IS_FILLMODE(Np->Num))
5074  continue;
5075  }
5076  if (*Np->inout_p != ',')
5077  continue;
5078  }
5079  break;
5080 
5081  case NUM_G:
5082  pattern = Np->L_thousands_sep;
5083  pattern_len = strlen(pattern);
5084  if (Np->is_to_char)
5085  {
5086  if (!Np->num_in)
5087  {
5088  if (IS_FILLMODE(Np->Num))
5089  continue;
5090  else
5091  {
5092  /* just in case there are MB chars */
5093  pattern_len = pg_mbstrlen(pattern);
5094  memset(Np->inout_p, ' ', pattern_len);
5095  Np->inout_p += pattern_len - 1;
5096  }
5097  }
5098  else
5099  {
5100  strcpy(Np->inout_p, pattern);
5101  Np->inout_p += pattern_len - 1;
5102  }
5103  }
5104  else
5105  {
5106  if (!Np->num_in)
5107  {
5108  if (IS_FILLMODE(Np->Num))
5109  continue;
5110  }
5111 
5112  /*
5113  * Because L_thousands_sep typically contains data
5114  * characters (either '.' or ','), we can't use
5115  * NUM_eat_non_data_chars here. Instead skip only if
5116  * the input matches L_thousands_sep.
5117  */
5118  if (AMOUNT_TEST(pattern_len) &&
5119  strncmp(Np->inout_p, pattern, pattern_len) == 0)
5120  Np->inout_p += pattern_len - 1;
5121  else
5122  continue;
5123  }
5124  break;
5125 
5126  case NUM_L:
5127  pattern = Np->L_currency_symbol;
5128  if (Np->is_to_char)
5129  {
5130  strcpy(Np->inout_p, pattern);
5131  Np->inout_p += strlen(pattern) - 1;
5132  }
5133  else
5134  {
5135  NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5136  continue;
5137  }
5138  break;
5139 
5140  case NUM_RN:
5141  if (IS_FILLMODE(Np->Num))
5142  {
5143  strcpy(Np->inout_p, Np->number_p);
5144  Np->inout_p += strlen(Np->inout_p) - 1;
5145  }
5146  else
5147  {
5148  sprintf(Np->inout_p, "%15s", Np->number_p);
5149  Np->inout_p += strlen(Np->inout_p) - 1;
5150  }
5151  break;
5152 
5153  case NUM_rn:
5154  if (IS_FILLMODE(Np->Num))
5155  {
5156  strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5157  Np->inout_p += strlen(Np->inout_p) - 1;
5158  }
5159  else
5160  {
5161  sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5162  Np->inout_p += strlen(Np->inout_p) - 1;
5163  }
5164  break;
5165 
5166  case NUM_th:
5167  if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5168  Np->sign == '-' || IS_DECIMAL(Np->Num))
5169  continue;
5170 
5171  if (Np->is_to_char)
5172  {
5173  strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5174  Np->inout_p += 1;
5175  }
5176  else
5177  {
5178  /* All variants of 'th' occupy 2 characters */
5179  NUM_eat_non_data_chars(Np, 2, input_len);
5180  continue;
5181  }
5182  break;
5183 
5184  case NUM_TH:
5185  if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5186  Np->sign == '-' || IS_DECIMAL(Np->Num))
5187  continue;
5188 
5189  if (Np->is_to_char)
5190  {
5191  strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5192  Np->inout_p += 1;
5193  }
5194  else
5195  {
5196  /* All variants of 'TH' occupy 2 characters */
5197  NUM_eat_non_data_chars(Np, 2, input_len);
5198  continue;
5199  }
5200  break;
5201 
5202  case NUM_MI:
5203  if (Np->is_to_char)
5204  {
5205  if (Np->sign == '-')
5206  *Np->inout_p = '-';
5207  else if (IS_FILLMODE(Np->Num))
5208  continue;
5209  else
5210  *Np->inout_p = ' ';
5211  }
5212  else
5213  {
5214  if (*Np->inout_p == '-')
5215  *Np->number = '-';
5216  else
5217  {
5218  NUM_eat_non_data_chars(Np, 1, input_len);
5219  continue;
5220  }
5221  }
5222  break;
5223 
5224  case NUM_PL:
5225  if (Np->is_to_char)
5226  {
5227  if (Np->sign == '+')
5228  *Np->inout_p = '+';
5229  else if (IS_FILLMODE(Np->Num))
5230  continue;
5231  else
5232  *Np->inout_p = ' ';
5233  }
5234  else
5235  {
5236  if (*Np->inout_p == '+')
5237  *Np->number = '+';
5238  else
5239  {
5240  NUM_eat_non_data_chars(Np, 1, input_len);
5241  continue;
5242  }
5243  }
5244  break;
5245 
5246  case NUM_SG:
5247  if (Np->is_to_char)
5248  *Np->inout_p = Np->sign;
5249  else
5250  {
5251  if (*Np->inout_p == '-')
5252  *Np->number = '-';
5253  else if (*Np->inout_p == '+')
5254  *Np->number = '+';
5255  else
5256  {
5257  NUM_eat_non_data_chars(Np, 1, input_len);
5258  continue;
5259  }
5260  }
5261