PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2021, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for roman number to standard number conversion
53  * - add support for number spelling
54  * - add support for string to string formatting (we must be better
55  * than Oracle :-),
56  * to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 
73 /*
74  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75  * declare them in <wchar.h>, so include that too.
76  */
77 #include <wchar.h>
78 #ifdef HAVE_WCTYPE_H
79 #include <wctype.h>
80 #endif
81 
82 #ifdef USE_ICU
83 #include <unicode/ustring.h>
84 #endif
85 
86 #include "catalog/pg_collation.h"
87 #include "catalog/pg_type.h"
88 #include "mb/pg_wchar.h"
89 #include "parser/scansup.h"
90 #include "utils/builtins.h"
91 #include "utils/date.h"
92 #include "utils/datetime.h"
93 #include "utils/float.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/memutils.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99 
100 /* ----------
101  * Convenience macros for error handling
102  * ----------
103  *
104  * Two macros below help to handle errors in functions that take
105  * 'bool *have_error' argument. When this argument is not NULL, it's expected
106  * that function will suppress ereports when possible. Instead it should
107  * return some default value and set *have_error flag.
108  *
109  * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error
110  * function argument is not NULL, then instead of ereport'ing we set
111  * *have_error flag and go to on_error label. It's supposed that jump
112  * resources will be freed and some 'default' value returned.
113  *
114  * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
115  * It's supposed to be used for immediate exit from the function on error
116  * after call of another function with 'bool *have_error' argument.
117  */
118 #define RETURN_ERROR(throw_error) \
119 do { \
120  if (have_error) \
121  { \
122  *have_error = true; \
123  goto on_error; \
124  } \
125  else \
126  { \
127  throw_error; \
128  } \
129 } while (0)
130 
131 #define CHECK_ERROR \
132 do { \
133  if (have_error && *have_error) \
134  goto on_error; \
135 } while (0)
136 
137 /* ----------
138  * Routines flags
139  * ----------
140  */
141 #define DCH_FLAG 0x1 /* DATE-TIME flag */
142 #define NUM_FLAG 0x2 /* NUMBER flag */
143 #define STD_FLAG 0x4 /* STANDARD flag */
144 
145 /* ----------
146  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
147  * ----------
148  */
149 #define KeyWord_INDEX_SIZE ('~' - ' ')
150 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
151 
152 /* ----------
153  * Maximal length of one node
154  * ----------
155  */
156 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
157 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
158 
159 
160 /* ----------
161  * Format parser structs
162  * ----------
163  */
164 typedef struct
165 {
166  const char *name; /* suffix string */
167  int len, /* suffix length */
168  id, /* used in node->suffix */
169  type; /* prefix / postfix */
170 } KeySuffix;
171 
172 /* ----------
173  * FromCharDateMode
174  * ----------
175  *
176  * This value is used to nominate one of several distinct (and mutually
177  * exclusive) date conventions that a keyword can belong to.
178  */
179 typedef enum
180 {
181  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
182  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
183  FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
185 
186 typedef struct
187 {
188  const char *name;
189  int len;
190  int id;
191  bool is_digit;
193 } KeyWord;
194 
195 typedef struct
196 {
197  uint8 type; /* NODE_TYPE_XXX, see below */
198  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
199  uint8 suffix; /* keyword prefix/suffix code, if any */
200  const KeyWord *key; /* if type is ACTION */
201 } FormatNode;
202 
203 #define NODE_TYPE_END 1
204 #define NODE_TYPE_ACTION 2
205 #define NODE_TYPE_CHAR 3
206 #define NODE_TYPE_SEPARATOR 4
207 #define NODE_TYPE_SPACE 5
208 
209 #define SUFFTYPE_PREFIX 1
210 #define SUFFTYPE_POSTFIX 2
211 
212 #define CLOCK_24_HOUR 0
213 #define CLOCK_12_HOUR 1
214 
215 
216 /* ----------
217  * Full months
218  * ----------
219  */
220 static const char *const months_full[] = {
221  "January", "February", "March", "April", "May", "June", "July",
222  "August", "September", "October", "November", "December", NULL
223 };
224 
225 static const char *const days_short[] = {
226  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
227 };
228 
229 /* ----------
230  * AD / BC
231  * ----------
232  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
233  * positive and map year == -1 to year zero, and shift all negative
234  * years up one. For interval years, we just return the year.
235  */
236 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
237 
238 #define A_D_STR "A.D."
239 #define a_d_STR "a.d."
240 #define AD_STR "AD"
241 #define ad_STR "ad"
242 
243 #define B_C_STR "B.C."
244 #define b_c_STR "b.c."
245 #define BC_STR "BC"
246 #define bc_STR "bc"
247 
248 /*
249  * AD / BC strings for seq_search.
250  *
251  * These are given in two variants, a long form with periods and a standard
252  * form without.
253  *
254  * The array is laid out such that matches for AD have an even index, and
255  * matches for BC have an odd index. So the boolean value for BC is given by
256  * taking the array index of the match, modulo 2.
257  */
258 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
259 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
260 
261 /* ----------
262  * AM / PM
263  * ----------
264  */
265 #define A_M_STR "A.M."
266 #define a_m_STR "a.m."
267 #define AM_STR "AM"
268 #define am_STR "am"
269 
270 #define P_M_STR "P.M."
271 #define p_m_STR "p.m."
272 #define PM_STR "PM"
273 #define pm_STR "pm"
274 
275 /*
276  * AM / PM strings for seq_search.
277  *
278  * These are given in two variants, a long form with periods and a standard
279  * form without.
280  *
281  * The array is laid out such that matches for AM have an even index, and
282  * matches for PM have an odd index. So the boolean value for PM is given by
283  * taking the array index of the match, modulo 2.
284  */
285 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
286 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
287 
288 /* ----------
289  * Months in roman-numeral
290  * (Must be in reverse order for seq_search (in FROM_CHAR), because
291  * 'VIII' must have higher precedence than 'V')
292  * ----------
293  */
294 static const char *const rm_months_upper[] =
295 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
296 
297 static const char *const rm_months_lower[] =
298 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
299 
300 /* ----------
301  * Roman numbers
302  * ----------
303  */
304 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
305 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
306 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
307 
308 /* ----------
309  * Ordinal postfixes
310  * ----------
311  */
312 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
313 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
314 
315 /* ----------
316  * Flags & Options:
317  * ----------
318  */
319 #define TH_UPPER 1
320 #define TH_LOWER 2
321 
322 /* ----------
323  * Number description struct
324  * ----------
325  */
326 typedef struct
327 {
328  int pre, /* (count) numbers before decimal */
329  post, /* (count) numbers after decimal */
330  lsign, /* want locales sign */
331  flag, /* number parameters */
332  pre_lsign_num, /* tmp value for lsign */
333  multi, /* multiplier for 'V' */
334  zero_start, /* position of first zero */
335  zero_end, /* position of last zero */
336  need_locale; /* needs it locale */
337 } NUMDesc;
338 
339 /* ----------
340  * Flags for NUMBER version
341  * ----------
342  */
343 #define NUM_F_DECIMAL (1 << 1)
344 #define NUM_F_LDECIMAL (1 << 2)
345 #define NUM_F_ZERO (1 << 3)
346 #define NUM_F_BLANK (1 << 4)
347 #define NUM_F_FILLMODE (1 << 5)
348 #define NUM_F_LSIGN (1 << 6)
349 #define NUM_F_BRACKET (1 << 7)
350 #define NUM_F_MINUS (1 << 8)
351 #define NUM_F_PLUS (1 << 9)
352 #define NUM_F_ROMAN (1 << 10)
353 #define NUM_F_MULTI (1 << 11)
354 #define NUM_F_PLUS_POST (1 << 12)
355 #define NUM_F_MINUS_POST (1 << 13)
356 #define NUM_F_EEEE (1 << 14)
357 
358 #define NUM_LSIGN_PRE (-1)
359 #define NUM_LSIGN_POST 1
360 #define NUM_LSIGN_NONE 0
361 
362 /* ----------
363  * Tests
364  * ----------
365  */
366 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
367 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
368 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
369 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
370 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
371 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
372 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
373 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
374 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
375 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
376 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
377 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
378 
379 /* ----------
380  * Format picture cache
381  *
382  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
383  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
384  *
385  * For simplicity, the cache entries are fixed-size, so they allow for the
386  * worst case of a FormatNode for each byte in the picture string.
387  *
388  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
389  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
390  * we don't waste too much space by palloc'ing them individually. Be sure
391  * to adjust those macros if you add fields to those structs.
392  *
393  * The max number of entries in each cache is DCH_CACHE_ENTRIES
394  * resp. NUM_CACHE_ENTRIES.
395  * ----------
396  */
397 #define DCH_CACHE_OVERHEAD \
398  MAXALIGN(sizeof(bool) + sizeof(int))
399 #define NUM_CACHE_OVERHEAD \
400  MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
401 
402 #define DCH_CACHE_SIZE \
403  ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
404 #define NUM_CACHE_SIZE \
405  ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
406 
407 #define DCH_CACHE_ENTRIES 20
408 #define NUM_CACHE_ENTRIES 20
409 
410 typedef struct
411 {
413  char str[DCH_CACHE_SIZE + 1];
414  bool std;
415  bool valid;
416  int age;
417 } DCHCacheEntry;
418 
419 typedef struct
420 {
422  char str[NUM_CACHE_SIZE + 1];
423  bool valid;
424  int age;
426 } NUMCacheEntry;
427 
428 /* global cache for date/time format pictures */
430 static int n_DCHCache = 0; /* current number of entries */
431 static int DCHCounter = 0; /* aging-event counter */
432 
433 /* global cache for number format pictures */
435 static int n_NUMCache = 0; /* current number of entries */
436 static int NUMCounter = 0; /* aging-event counter */
437 
438 /* ----------
439  * For char->date/time conversion
440  * ----------
441  */
442 typedef struct
443 {
445  int hh,
446  pm,
447  mi,
448  ss,
449  ssss,
450  d, /* stored as 1-7, Sunday = 1, 0 means missing */
451  dd,
452  ddd,
453  mm,
454  ms,
455  year,
456  bc,
457  ww,
458  w,
459  cc,
460  j,
461  us,
462  yysz, /* is it YY or YYYY ? */
463  clock, /* 12 or 24 hour clock? */
464  tzsign, /* +1, -1 or 0 if timezone info is absent */
465  tzh,
466  tzm,
467  ff; /* fractional precision */
468 } TmFromChar;
469 
470 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
471 
472 /* ----------
473  * Debug
474  * ----------
475  */
476 #ifdef DEBUG_TO_FROM_CHAR
477 #define DEBUG_TMFC(_X) \
478  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
479  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
480  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
481  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
482  (_X)->yysz, (_X)->clock)
483 #define DEBUG_TM(_X) \
484  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
485  (_X)->tm_sec, (_X)->tm_year,\
486  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
487  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
488 #else
489 #define DEBUG_TMFC(_X)
490 #define DEBUG_TM(_X)
491 #endif
492 
493 /* ----------
494  * Datetime to char conversion
495  * ----------
496  */
497 typedef struct TmToChar
498 {
499  struct pg_tm tm; /* classic 'tm' struct */
500  fsec_t fsec; /* fractional seconds */
501  const char *tzn; /* timezone */
502 } TmToChar;
503 
504 #define tmtcTm(_X) (&(_X)->tm)
505 #define tmtcTzn(_X) ((_X)->tzn)
506 #define tmtcFsec(_X) ((_X)->fsec)
507 
508 #define ZERO_tm(_X) \
509 do { \
510  (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
511  (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
512  (_X)->tm_mday = (_X)->tm_mon = 1; \
513  (_X)->tm_zone = NULL; \
514 } while(0)
515 
516 #define ZERO_tmtc(_X) \
517 do { \
518  ZERO_tm( tmtcTm(_X) ); \
519  tmtcFsec(_X) = 0; \
520  tmtcTzn(_X) = NULL; \
521 } while(0)
522 
523 /*
524  * to_char(time) appears to to_char() as an interval, so this check
525  * is really for interval and time data types.
526  */
527 #define INVALID_FOR_INTERVAL \
528 do { \
529  if (is_interval) \
530  ereport(ERROR, \
531  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
532  errmsg("invalid format specification for an interval value"), \
533  errhint("Intervals are not tied to specific calendar dates."))); \
534 } while(0)
535 
536 /*****************************************************************************
537  * KeyWord definitions
538  *****************************************************************************/
539 
540 /* ----------
541  * Suffixes (FormatNode.suffix is an OR of these codes)
542  * ----------
543  */
544 #define DCH_S_FM 0x01
545 #define DCH_S_TH 0x02
546 #define DCH_S_th 0x04
547 #define DCH_S_SP 0x08
548 #define DCH_S_TM 0x10
549 
550 /* ----------
551  * Suffix tests
552  * ----------
553  */
554 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
555 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
556 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
557 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
558 
559 /* Oracle toggles FM behavior, we don't; see docs. */
560 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
561 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
562 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
563 
564 /* ----------
565  * Suffixes definition for DATE-TIME TO/FROM CHAR
566  * ----------
567  */
568 #define TM_SUFFIX_LEN 2
569 
570 static const KeySuffix DCH_suff[] = {
571  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
573  {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
574  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
575  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
576  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
577  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
578  /* last */
579  {NULL, 0, 0, 0}
580 };
581 
582 
583 /* ----------
584  * Format-pictures (KeyWord).
585  *
586  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
587  * complicated -to-> easy:
588  *
589  * (example: "DDD","DD","Day","D" )
590  *
591  * (this specific sort needs the algorithm for sequential search for strings,
592  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
593  * or "HH12"? You must first try "HH12", because "HH" is in string, but
594  * it is not good.
595  *
596  * (!)
597  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
598  * (!)
599  *
600  * For fast search is used the 'int index[]', index is ascii table from position
601  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
602  * position or -1 if char is not used in the KeyWord. Search example for
603  * string "MM":
604  * 1) see in index to index['M' - 32],
605  * 2) take keywords position (enum DCH_MI) from index
606  * 3) run sequential search in keywords[] from this position
607  *
608  * ----------
609  */
610 
611 typedef enum
612 {
633  DCH_FX, /* global suffix */
722 
723  /* last */
725 } DCH_poz;
726 
727 typedef enum
728 {
765 
766  /* last */
768 } NUM_poz;
769 
770 /* ----------
771  * KeyWords for DATE-TIME version
772  * ----------
773  */
774 static const KeyWord DCH_keywords[] = {
775 /* name, len, id, is_digit, date_mode */
776  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
777  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
778  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
779  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
780  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
781  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
782  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
783  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
784  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
787  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
788  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
789  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
790  {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
791  {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
792  {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
793  {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
794  {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
795  {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
796  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
797  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
798  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
799  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
800  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
801  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
802  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
803  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
804  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
805  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
806  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
807  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
808  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
809  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
810  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
811  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
812  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
813  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
814  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
815  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
816  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
817  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
818  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
819  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
820  {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
821  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
822  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
823  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
824  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
825  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
826  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
827  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
828  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
829  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
830  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
831  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
832  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
833  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
834  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
835  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
836  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
837  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
838  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
839  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
840  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
841  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
842  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
843  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
844  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
845  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
846  {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
847  {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
848  {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
849  {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
850  {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
851  {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
852  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
853  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
854  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
855  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
856  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
857  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
858  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
859  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
860  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
861  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
862  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
863  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
864  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
865  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
866  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
867  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
868  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
869  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
870  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
871  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
872  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
873  {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
874  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
875  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
876  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
877  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
878  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
879  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
880  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
881  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
882  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
883  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
884  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
885 
886  /* last */
887  {NULL, 0, 0, 0, 0}
888 };
889 
890 /* ----------
891  * KeyWords for NUMBER version
892  *
893  * The is_digit and date_mode fields are not relevant here.
894  * ----------
895  */
896 static const KeyWord NUM_keywords[] = {
897 /* name, len, id is in Index */
898  {",", 1, NUM_COMMA}, /* , */
899  {".", 1, NUM_DEC}, /* . */
900  {"0", 1, NUM_0}, /* 0 */
901  {"9", 1, NUM_9}, /* 9 */
902  {"B", 1, NUM_B}, /* B */
903  {"C", 1, NUM_C}, /* C */
904  {"D", 1, NUM_D}, /* D */
905  {"EEEE", 4, NUM_E}, /* E */
906  {"FM", 2, NUM_FM}, /* F */
907  {"G", 1, NUM_G}, /* G */
908  {"L", 1, NUM_L}, /* L */
909  {"MI", 2, NUM_MI}, /* M */
910  {"PL", 2, NUM_PL}, /* P */
911  {"PR", 2, NUM_PR},
912  {"RN", 2, NUM_RN}, /* R */
913  {"SG", 2, NUM_SG}, /* S */
914  {"SP", 2, NUM_SP},
915  {"S", 1, NUM_S},
916  {"TH", 2, NUM_TH}, /* T */
917  {"V", 1, NUM_V}, /* V */
918  {"b", 1, NUM_B}, /* b */
919  {"c", 1, NUM_C}, /* c */
920  {"d", 1, NUM_D}, /* d */
921  {"eeee", 4, NUM_E}, /* e */
922  {"fm", 2, NUM_FM}, /* f */
923  {"g", 1, NUM_G}, /* g */
924  {"l", 1, NUM_L}, /* l */
925  {"mi", 2, NUM_MI}, /* m */
926  {"pl", 2, NUM_PL}, /* p */
927  {"pr", 2, NUM_PR},
928  {"rn", 2, NUM_rn}, /* r */
929  {"sg", 2, NUM_SG}, /* s */
930  {"sp", 2, NUM_SP},
931  {"s", 1, NUM_S},
932  {"th", 2, NUM_th}, /* t */
933  {"v", 1, NUM_V}, /* v */
934 
935  /* last */
936  {NULL, 0, 0}
937 };
938 
939 
940 /* ----------
941  * KeyWords index for DATE-TIME version
942  * ----------
943  */
944 static const int DCH_index[KeyWord_INDEX_SIZE] = {
945 /*
946 0 1 2 3 4 5 6 7 8 9
947 */
948  /*---- first 0..31 chars are skipped ----*/
949 
950  -1, -1, -1, -1, -1, -1, -1, -1,
951  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
952  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
953  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
954  DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
956  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
957  DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
958  -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
959  -1, DCH_y_yyy, -1, -1, -1, -1
960 
961  /*---- chars over 126 are skipped ----*/
962 };
963 
964 /* ----------
965  * KeyWords index for NUMBER version
966  * ----------
967  */
968 static const int NUM_index[KeyWord_INDEX_SIZE] = {
969 /*
970 0 1 2 3 4 5 6 7 8 9
971 */
972  /*---- first 0..31 chars are skipped ----*/
973 
974  -1, -1, -1, -1, -1, -1, -1, -1,
975  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
976  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
977  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
978  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
979  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
980  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
981  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
982  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
983  -1, -1, -1, -1, -1, -1
984 
985  /*---- chars over 126 are skipped ----*/
986 };
987 
988 /* ----------
989  * Number processor struct
990  * ----------
991  */
992 typedef struct NUMProc
993 {
995  NUMDesc *Num; /* number description */
996 
997  int sign, /* '-' or '+' */
998  sign_wrote, /* was sign write */
999  num_count, /* number of write digits */
1000  num_in, /* is inside number */
1001  num_curr, /* current position in number */
1002  out_pre_spaces, /* spaces before first digit */
1003 
1004  read_dec, /* to_number - was read dec. point */
1005  read_post, /* to_number - number of dec. digit */
1006  read_pre; /* to_number - number non-dec. digit */
1007 
1008  char *number, /* string with number */
1009  *number_p, /* pointer to current number position */
1010  *inout, /* in / out buffer */
1011  *inout_p, /* pointer to current inout position */
1012  *last_relevant, /* last relevant number after decimal point */
1013 
1014  *L_negative_sign, /* Locale */
1015  *L_positive_sign,
1016  *decimal,
1017  *L_thousands_sep,
1018  *L_currency_symbol;
1019 } NUMProc;
1020 
1021 /* Return flags for DCH_from_char() */
1022 #define DCH_DATED 0x01
1023 #define DCH_TIMED 0x02
1024 #define DCH_ZONED 0x04
1025 
1026 /* ----------
1027  * Functions
1028  * ----------
1029  */
1030 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1031  const int *index);
1032 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1033 static bool is_separator_char(const char *str);
1034 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1035 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1036  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1037 
1038 static void DCH_to_char(FormatNode *node, bool is_interval,
1039  TmToChar *in, char *out, Oid collid);
1040 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1041  Oid collid, bool std, bool *have_error);
1042 
1043 #ifdef DEBUG_TO_FROM_CHAR
1044 static void dump_index(const KeyWord *k, const int *index);
1045 static void dump_node(FormatNode *node, int max);
1046 #endif
1047 
1048 static const char *get_th(char *num, int type);
1049 static char *str_numth(char *dest, char *num, int type);
1050 static int adjust_partial_year_to_2020(int year);
1051 static int strspace_len(const char *str);
1052 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1053  bool *have_error);
1054 static void from_char_set_int(int *dest, const int value, const FormatNode *node,
1055  bool *have_error);
1056 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1057  FormatNode *node, bool *have_error);
1058 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1059  bool *have_error);
1060 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1061 static int seq_search_localized(const char *name, char **array, int *len,
1062  Oid collid);
1063 static int from_char_seq_search(int *dest, const char **src,
1064  const char *const *array,
1065  char **localized_array, Oid collid,
1066  FormatNode *node, bool *have_error);
1067 static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1068  struct pg_tm *tm, fsec_t *fsec, int *fprec,
1069  uint32 *flags, bool *have_error);
1070 static char *fill_str(char *str, int c, int max);
1071 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1072 static char *int_to_roman(int number);
1073 static void NUM_prepare_locale(NUMProc *Np);
1074 static char *get_last_relevant_decnum(char *num);
1075 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1076 static void NUM_numpart_to_char(NUMProc *Np, int id);
1077 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1078  char *number, int input_len, int to_char_out_pre_spaces,
1079  int sign, bool is_to_char, Oid collid);
1080 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1081 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1082 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1083 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1084 static NUMCacheEntry *NUM_cache_search(const char *str);
1085 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1086 
1087 
1088 /* ----------
1089  * Fast sequential search, use index for data selection which
1090  * go to seq. cycle (it is very fast for unwanted strings)
1091  * (can't be used binary search in format parsing)
1092  * ----------
1093  */
1094 static const KeyWord *
1095 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1096 {
1097  int poz;
1098 
1099  if (!KeyWord_INDEX_FILTER(*str))
1100  return NULL;
1101 
1102  if ((poz = *(index + (*str - ' '))) > -1)
1103  {
1104  const KeyWord *k = kw + poz;
1105 
1106  do
1107  {
1108  if (strncmp(str, k->name, k->len) == 0)
1109  return k;
1110  k++;
1111  if (!k->name)
1112  return NULL;
1113  } while (*str == *k->name);
1114  }
1115  return NULL;
1116 }
1117 
1118 static const KeySuffix *
1119 suff_search(const char *str, const KeySuffix *suf, int type)
1120 {
1121  const KeySuffix *s;
1122 
1123  for (s = suf; s->name != NULL; s++)
1124  {
1125  if (s->type != type)
1126  continue;
1127 
1128  if (strncmp(str, s->name, s->len) == 0)
1129  return s;
1130  }
1131  return NULL;
1132 }
1133 
1134 static bool
1136 {
1137  /* ASCII printable character, but not letter or digit */
1138  return (*str > 0x20 && *str < 0x7F &&
1139  !(*str >= 'A' && *str <= 'Z') &&
1140  !(*str >= 'a' && *str <= 'z') &&
1141  !(*str >= '0' && *str <= '9'));
1142 }
1143 
1144 /* ----------
1145  * Prepare NUMDesc (number description struct) via FormatNode struct
1146  * ----------
1147  */
1148 static void
1150 {
1151  if (n->type != NODE_TYPE_ACTION)
1152  return;
1153 
1154  if (IS_EEEE(num) && n->key->id != NUM_E)
1155  ereport(ERROR,
1156  (errcode(ERRCODE_SYNTAX_ERROR),
1157  errmsg("\"EEEE\" must be the last pattern used")));
1158 
1159  switch (n->key->id)
1160  {
1161  case NUM_9:
1162  if (IS_BRACKET(num))
1163  ereport(ERROR,
1164  (errcode(ERRCODE_SYNTAX_ERROR),
1165  errmsg("\"9\" must be ahead of \"PR\"")));
1166  if (IS_MULTI(num))
1167  {
1168  ++num->multi;
1169  break;
1170  }
1171  if (IS_DECIMAL(num))
1172  ++num->post;
1173  else
1174  ++num->pre;
1175  break;
1176 
1177  case NUM_0:
1178  if (IS_BRACKET(num))
1179  ereport(ERROR,
1180  (errcode(ERRCODE_SYNTAX_ERROR),
1181  errmsg("\"0\" must be ahead of \"PR\"")));
1182  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1183  {
1184  num->flag |= NUM_F_ZERO;
1185  num->zero_start = num->pre + 1;
1186  }
1187  if (!IS_DECIMAL(num))
1188  ++num->pre;
1189  else
1190  ++num->post;
1191 
1192  num->zero_end = num->pre + num->post;
1193  break;
1194 
1195  case NUM_B:
1196  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1197  num->flag |= NUM_F_BLANK;
1198  break;
1199 
1200  case NUM_D:
1201  num->flag |= NUM_F_LDECIMAL;
1202  num->need_locale = true;
1203  /* FALLTHROUGH */
1204  case NUM_DEC:
1205  if (IS_DECIMAL(num))
1206  ereport(ERROR,
1207  (errcode(ERRCODE_SYNTAX_ERROR),
1208  errmsg("multiple decimal points")));
1209  if (IS_MULTI(num))
1210  ereport(ERROR,
1211  (errcode(ERRCODE_SYNTAX_ERROR),
1212  errmsg("cannot use \"V\" and decimal point together")));
1213  num->flag |= NUM_F_DECIMAL;
1214  break;
1215 
1216  case NUM_FM:
1217  num->flag |= NUM_F_FILLMODE;
1218  break;
1219 
1220  case NUM_S:
1221  if (IS_LSIGN(num))
1222  ereport(ERROR,
1223  (errcode(ERRCODE_SYNTAX_ERROR),
1224  errmsg("cannot use \"S\" twice")));
1225  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1226  ereport(ERROR,
1227  (errcode(ERRCODE_SYNTAX_ERROR),
1228  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1229  if (!IS_DECIMAL(num))
1230  {
1231  num->lsign = NUM_LSIGN_PRE;
1232  num->pre_lsign_num = num->pre;
1233  num->need_locale = true;
1234  num->flag |= NUM_F_LSIGN;
1235  }
1236  else if (num->lsign == NUM_LSIGN_NONE)
1237  {
1238  num->lsign = NUM_LSIGN_POST;
1239  num->need_locale = true;
1240  num->flag |= NUM_F_LSIGN;
1241  }
1242  break;
1243 
1244  case NUM_MI:
1245  if (IS_LSIGN(num))
1246  ereport(ERROR,
1247  (errcode(ERRCODE_SYNTAX_ERROR),
1248  errmsg("cannot use \"S\" and \"MI\" together")));
1249  num->flag |= NUM_F_MINUS;
1250  if (IS_DECIMAL(num))
1251  num->flag |= NUM_F_MINUS_POST;
1252  break;
1253 
1254  case NUM_PL:
1255  if (IS_LSIGN(num))
1256  ereport(ERROR,
1257  (errcode(ERRCODE_SYNTAX_ERROR),
1258  errmsg("cannot use \"S\" and \"PL\" together")));
1259  num->flag |= NUM_F_PLUS;
1260  if (IS_DECIMAL(num))
1261  num->flag |= NUM_F_PLUS_POST;
1262  break;
1263 
1264  case NUM_SG:
1265  if (IS_LSIGN(num))
1266  ereport(ERROR,
1267  (errcode(ERRCODE_SYNTAX_ERROR),
1268  errmsg("cannot use \"S\" and \"SG\" together")));
1269  num->flag |= NUM_F_MINUS;
1270  num->flag |= NUM_F_PLUS;
1271  break;
1272 
1273  case NUM_PR:
1274  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1275  ereport(ERROR,
1276  (errcode(ERRCODE_SYNTAX_ERROR),
1277  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1278  num->flag |= NUM_F_BRACKET;
1279  break;
1280 
1281  case NUM_rn:
1282  case NUM_RN:
1283  num->flag |= NUM_F_ROMAN;
1284  break;
1285 
1286  case NUM_L:
1287  case NUM_G:
1288  num->need_locale = true;
1289  break;
1290 
1291  case NUM_V:
1292  if (IS_DECIMAL(num))
1293  ereport(ERROR,
1294  (errcode(ERRCODE_SYNTAX_ERROR),
1295  errmsg("cannot use \"V\" and decimal point together")));
1296  num->flag |= NUM_F_MULTI;
1297  break;
1298 
1299  case NUM_E:
1300  if (IS_EEEE(num))
1301  ereport(ERROR,
1302  (errcode(ERRCODE_SYNTAX_ERROR),
1303  errmsg("cannot use \"EEEE\" twice")));
1304  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1305  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1306  IS_ROMAN(num) || IS_MULTI(num))
1307  ereport(ERROR,
1308  (errcode(ERRCODE_SYNTAX_ERROR),
1309  errmsg("\"EEEE\" is incompatible with other formats"),
1310  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1311  num->flag |= NUM_F_EEEE;
1312  break;
1313  }
1314 }
1315 
1316 /* ----------
1317  * Format parser, search small keywords and keyword's suffixes, and make
1318  * format-node tree.
1319  *
1320  * for DATE-TIME & NUMBER version
1321  * ----------
1322  */
1323 static void
1324 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1325  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1326 {
1327  FormatNode *n;
1328 
1329 #ifdef DEBUG_TO_FROM_CHAR
1330  elog(DEBUG_elog_output, "to_char/number(): run parser");
1331 #endif
1332 
1333  n = node;
1334 
1335  while (*str)
1336  {
1337  int suffix = 0;
1338  const KeySuffix *s;
1339 
1340  /*
1341  * Prefix
1342  */
1343  if ((flags & DCH_FLAG) &&
1344  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1345  {
1346  suffix |= s->id;
1347  if (s->len)
1348  str += s->len;
1349  }
1350 
1351  /*
1352  * Keyword
1353  */
1354  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1355  {
1356  n->type = NODE_TYPE_ACTION;
1357  n->suffix = suffix;
1358  if (n->key->len)
1359  str += n->key->len;
1360 
1361  /*
1362  * NUM version: Prepare global NUMDesc struct
1363  */
1364  if (flags & NUM_FLAG)
1365  NUMDesc_prepare(Num, n);
1366 
1367  /*
1368  * Postfix
1369  */
1370  if ((flags & DCH_FLAG) && *str &&
1371  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1372  {
1373  n->suffix |= s->id;
1374  if (s->len)
1375  str += s->len;
1376  }
1377 
1378  n++;
1379  }
1380  else if (*str)
1381  {
1382  int chlen;
1383 
1384  if ((flags & STD_FLAG) && *str != '"')
1385  {
1386  /*
1387  * Standard mode, allow only following separators: "-./,':; ".
1388  * However, we support double quotes even in standard mode
1389  * (see below). This is our extension of standard mode.
1390  */
1391  if (strchr("-./,':; ", *str) == NULL)
1392  ereport(ERROR,
1393  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1394  errmsg("invalid datetime format separator: \"%s\"",
1395  pnstrdup(str, pg_mblen(str)))));
1396 
1397  if (*str == ' ')
1398  n->type = NODE_TYPE_SPACE;
1399  else
1401 
1402  n->character[0] = *str;
1403  n->character[1] = '\0';
1404  n->key = NULL;
1405  n->suffix = 0;
1406  n++;
1407  str++;
1408  }
1409  else if (*str == '"')
1410  {
1411  /*
1412  * Process double-quoted literal string, if any
1413  */
1414  str++;
1415  while (*str)
1416  {
1417  if (*str == '"')
1418  {
1419  str++;
1420  break;
1421  }
1422  /* backslash quotes the next character, if any */
1423  if (*str == '\\' && *(str + 1))
1424  str++;
1425  chlen = pg_mblen(str);
1426  n->type = NODE_TYPE_CHAR;
1427  memcpy(n->character, str, chlen);
1428  n->character[chlen] = '\0';
1429  n->key = NULL;
1430  n->suffix = 0;
1431  n++;
1432  str += chlen;
1433  }
1434  }
1435  else
1436  {
1437  /*
1438  * Outside double-quoted strings, backslash is only special if
1439  * it immediately precedes a double quote.
1440  */
1441  if (*str == '\\' && *(str + 1) == '"')
1442  str++;
1443  chlen = pg_mblen(str);
1444 
1445  if ((flags & DCH_FLAG) && is_separator_char(str))
1447  else if (isspace((unsigned char) *str))
1448  n->type = NODE_TYPE_SPACE;
1449  else
1450  n->type = NODE_TYPE_CHAR;
1451 
1452  memcpy(n->character, str, chlen);
1453  n->character[chlen] = '\0';
1454  n->key = NULL;
1455  n->suffix = 0;
1456  n++;
1457  str += chlen;
1458  }
1459  }
1460  }
1461 
1462  n->type = NODE_TYPE_END;
1463  n->suffix = 0;
1464 }
1465 
1466 /* ----------
1467  * DEBUG: Dump the FormatNode Tree (debug)
1468  * ----------
1469  */
1470 #ifdef DEBUG_TO_FROM_CHAR
1471 
1472 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1473 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1474 
1475 static void
1476 dump_node(FormatNode *node, int max)
1477 {
1478  FormatNode *n;
1479  int a;
1480 
1481  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1482 
1483  for (a = 0, n = node; a <= max; n++, a++)
1484  {
1485  if (n->type == NODE_TYPE_ACTION)
1486  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1487  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1488  else if (n->type == NODE_TYPE_CHAR)
1489  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1490  a, n->character);
1491  else if (n->type == NODE_TYPE_END)
1492  {
1493  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1494  return;
1495  }
1496  else
1497  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1498  }
1499 }
1500 #endif /* DEBUG */
1501 
1502 /*****************************************************************************
1503  * Private utils
1504  *****************************************************************************/
1505 
1506 /* ----------
1507  * Return ST/ND/RD/TH for simple (1..9) numbers
1508  * type --> 0 upper, 1 lower
1509  * ----------
1510  */
1511 static const char *
1512 get_th(char *num, int type)
1513 {
1514  int len = strlen(num),
1515  last;
1516 
1517  last = *(num + (len - 1));
1518  if (!isdigit((unsigned char) last))
1519  ereport(ERROR,
1520  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1521  errmsg("\"%s\" is not a number", num)));
1522 
1523  /*
1524  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1525  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1526  */
1527  if ((len > 1) && (num[len - 2] == '1'))
1528  last = 0;
1529 
1530  switch (last)
1531  {
1532  case '1':
1533  if (type == TH_UPPER)
1534  return numTH[0];
1535  return numth[0];
1536  case '2':
1537  if (type == TH_UPPER)
1538  return numTH[1];
1539  return numth[1];
1540  case '3':
1541  if (type == TH_UPPER)
1542  return numTH[2];
1543  return numth[2];
1544  default:
1545  if (type == TH_UPPER)
1546  return numTH[3];
1547  return numth[3];
1548  }
1549 }
1550 
1551 /* ----------
1552  * Convert string-number to ordinal string-number
1553  * type --> 0 upper, 1 lower
1554  * ----------
1555  */
1556 static char *
1557 str_numth(char *dest, char *num, int type)
1558 {
1559  if (dest != num)
1560  strcpy(dest, num);
1561  strcat(dest, get_th(num, type));
1562  return dest;
1563 }
1564 
1565 /*****************************************************************************
1566  * upper/lower/initcap functions
1567  *****************************************************************************/
1568 
1569 #ifdef USE_ICU
1570 
1571 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1572  const UChar *src, int32_t srcLength,
1573  const char *locale,
1574  UErrorCode *pErrorCode);
1575 
1576 static int32_t
1577 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1578  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1579 {
1580  UErrorCode status;
1581  int32_t len_dest;
1582 
1583  len_dest = len_source; /* try first with same length */
1584  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1585  status = U_ZERO_ERROR;
1586  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1587  mylocale->info.icu.locale, &status);
1588  if (status == U_BUFFER_OVERFLOW_ERROR)
1589  {
1590  /* try again with adjusted length */
1591  pfree(*buff_dest);
1592  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1593  status = U_ZERO_ERROR;
1594  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1595  mylocale->info.icu.locale, &status);
1596  }
1597  if (U_FAILURE(status))
1598  ereport(ERROR,
1599  (errmsg("case conversion failed: %s", u_errorName(status))));
1600  return len_dest;
1601 }
1602 
1603 static int32_t
1604 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1605  const UChar *src, int32_t srcLength,
1606  const char *locale,
1607  UErrorCode *pErrorCode)
1608 {
1609  return u_strToTitle(dest, destCapacity, src, srcLength,
1610  NULL, locale, pErrorCode);
1611 }
1612 
1613 #endif /* USE_ICU */
1614 
1615 /*
1616  * If the system provides the needed functions for wide-character manipulation
1617  * (which are all standardized by C99), then we implement upper/lower/initcap
1618  * using wide-character functions, if necessary. Otherwise we use the
1619  * traditional <ctype.h> functions, which of course will not work as desired
1620  * in multibyte character sets. Note that in either case we are effectively
1621  * assuming that the database character encoding matches the encoding implied
1622  * by LC_CTYPE.
1623  *
1624  * If the system provides locale_t and associated functions (which are
1625  * standardized by Open Group's XBD), we can support collations that are
1626  * neither default nor C. The code is written to handle both combinations
1627  * of have-wide-characters and have-locale_t, though it's rather unlikely
1628  * a platform would have the latter without the former.
1629  */
1630 
1631 /*
1632  * collation-aware, wide-character-aware lower function
1633  *
1634  * We pass the number of bytes so we can pass varlena and char*
1635  * to this function. The result is a palloc'd, null-terminated string.
1636  */
1637 char *
1638 str_tolower(const char *buff, size_t nbytes, Oid collid)
1639 {
1640  char *result;
1641 
1642  if (!buff)
1643  return NULL;
1644 
1645  /* C/POSIX collations use this path regardless of database encoding */
1646  if (lc_ctype_is_c(collid))
1647  {
1648  result = asc_tolower(buff, nbytes);
1649  }
1650  else
1651  {
1652  pg_locale_t mylocale = 0;
1653 
1654  if (collid != DEFAULT_COLLATION_OID)
1655  {
1656  if (!OidIsValid(collid))
1657  {
1658  /*
1659  * This typically means that the parser could not resolve a
1660  * conflict of implicit collations, so report it that way.
1661  */
1662  ereport(ERROR,
1663  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1664  errmsg("could not determine which collation to use for %s function",
1665  "lower()"),
1666  errhint("Use the COLLATE clause to set the collation explicitly.")));
1667  }
1668  mylocale = pg_newlocale_from_collation(collid);
1669  }
1670 
1671 #ifdef USE_ICU
1672  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1673  {
1674  int32_t len_uchar;
1675  int32_t len_conv;
1676  UChar *buff_uchar;
1677  UChar *buff_conv;
1678 
1679  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1680  len_conv = icu_convert_case(u_strToLower, mylocale,
1681  &buff_conv, buff_uchar, len_uchar);
1682  icu_from_uchar(&result, buff_conv, len_conv);
1683  pfree(buff_uchar);
1684  pfree(buff_conv);
1685  }
1686  else
1687 #endif
1688  {
1690  {
1691  wchar_t *workspace;
1692  size_t curr_char;
1693  size_t result_size;
1694 
1695  /* Overflow paranoia */
1696  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1697  ereport(ERROR,
1698  (errcode(ERRCODE_OUT_OF_MEMORY),
1699  errmsg("out of memory")));
1700 
1701  /* Output workspace cannot have more codes than input bytes */
1702  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1703 
1704  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1705 
1706  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1707  {
1708 #ifdef HAVE_LOCALE_T
1709  if (mylocale)
1710  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1711  else
1712 #endif
1713  workspace[curr_char] = towlower(workspace[curr_char]);
1714  }
1715 
1716  /*
1717  * Make result large enough; case change might change number
1718  * of bytes
1719  */
1720  result_size = curr_char * pg_database_encoding_max_length() + 1;
1721  result = palloc(result_size);
1722 
1723  wchar2char(result, workspace, result_size, mylocale);
1724  pfree(workspace);
1725  }
1726  else
1727  {
1728  char *p;
1729 
1730  result = pnstrdup(buff, nbytes);
1731 
1732  /*
1733  * Note: we assume that tolower_l() will not be so broken as
1734  * to need an isupper_l() guard test. When using the default
1735  * collation, we apply the traditional Postgres behavior that
1736  * forces ASCII-style treatment of I/i, but in non-default
1737  * collations you get exactly what the collation says.
1738  */
1739  for (p = result; *p; p++)
1740  {
1741 #ifdef HAVE_LOCALE_T
1742  if (mylocale)
1743  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1744  else
1745 #endif
1746  *p = pg_tolower((unsigned char) *p);
1747  }
1748  }
1749  }
1750  }
1751 
1752  return result;
1753 }
1754 
1755 /*
1756  * collation-aware, wide-character-aware upper function
1757  *
1758  * We pass the number of bytes so we can pass varlena and char*
1759  * to this function. The result is a palloc'd, null-terminated string.
1760  */
1761 char *
1762 str_toupper(const char *buff, size_t nbytes, Oid collid)
1763 {
1764  char *result;
1765 
1766  if (!buff)
1767  return NULL;
1768 
1769  /* C/POSIX collations use this path regardless of database encoding */
1770  if (lc_ctype_is_c(collid))
1771  {
1772  result = asc_toupper(buff, nbytes);
1773  }
1774  else
1775  {
1776  pg_locale_t mylocale = 0;
1777 
1778  if (collid != DEFAULT_COLLATION_OID)
1779  {
1780  if (!OidIsValid(collid))
1781  {
1782  /*
1783  * This typically means that the parser could not resolve a
1784  * conflict of implicit collations, so report it that way.
1785  */
1786  ereport(ERROR,
1787  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1788  errmsg("could not determine which collation to use for %s function",
1789  "upper()"),
1790  errhint("Use the COLLATE clause to set the collation explicitly.")));
1791  }
1792  mylocale = pg_newlocale_from_collation(collid);
1793  }
1794 
1795 #ifdef USE_ICU
1796  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1797  {
1798  int32_t len_uchar,
1799  len_conv;
1800  UChar *buff_uchar;
1801  UChar *buff_conv;
1802 
1803  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1804  len_conv = icu_convert_case(u_strToUpper, mylocale,
1805  &buff_conv, buff_uchar, len_uchar);
1806  icu_from_uchar(&result, buff_conv, len_conv);
1807  pfree(buff_uchar);
1808  pfree(buff_conv);
1809  }
1810  else
1811 #endif
1812  {
1814  {
1815  wchar_t *workspace;
1816  size_t curr_char;
1817  size_t result_size;
1818 
1819  /* Overflow paranoia */
1820  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1821  ereport(ERROR,
1822  (errcode(ERRCODE_OUT_OF_MEMORY),
1823  errmsg("out of memory")));
1824 
1825  /* Output workspace cannot have more codes than input bytes */
1826  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1827 
1828  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1829 
1830  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1831  {
1832 #ifdef HAVE_LOCALE_T
1833  if (mylocale)
1834  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1835  else
1836 #endif
1837  workspace[curr_char] = towupper(workspace[curr_char]);
1838  }
1839 
1840  /*
1841  * Make result large enough; case change might change number
1842  * of bytes
1843  */
1844  result_size = curr_char * pg_database_encoding_max_length() + 1;
1845  result = palloc(result_size);
1846 
1847  wchar2char(result, workspace, result_size, mylocale);
1848  pfree(workspace);
1849  }
1850  else
1851  {
1852  char *p;
1853 
1854  result = pnstrdup(buff, nbytes);
1855 
1856  /*
1857  * Note: we assume that toupper_l() will not be so broken as
1858  * to need an islower_l() guard test. When using the default
1859  * collation, we apply the traditional Postgres behavior that
1860  * forces ASCII-style treatment of I/i, but in non-default
1861  * collations you get exactly what the collation says.
1862  */
1863  for (p = result; *p; p++)
1864  {
1865 #ifdef HAVE_LOCALE_T
1866  if (mylocale)
1867  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1868  else
1869 #endif
1870  *p = pg_toupper((unsigned char) *p);
1871  }
1872  }
1873  }
1874  }
1875 
1876  return result;
1877 }
1878 
1879 /*
1880  * collation-aware, wide-character-aware initcap function
1881  *
1882  * We pass the number of bytes so we can pass varlena and char*
1883  * to this function. The result is a palloc'd, null-terminated string.
1884  */
1885 char *
1886 str_initcap(const char *buff, size_t nbytes, Oid collid)
1887 {
1888  char *result;
1889  int wasalnum = false;
1890 
1891  if (!buff)
1892  return NULL;
1893 
1894  /* C/POSIX collations use this path regardless of database encoding */
1895  if (lc_ctype_is_c(collid))
1896  {
1897  result = asc_initcap(buff, nbytes);
1898  }
1899  else
1900  {
1901  pg_locale_t mylocale = 0;
1902 
1903  if (collid != DEFAULT_COLLATION_OID)
1904  {
1905  if (!OidIsValid(collid))
1906  {
1907  /*
1908  * This typically means that the parser could not resolve a
1909  * conflict of implicit collations, so report it that way.
1910  */
1911  ereport(ERROR,
1912  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1913  errmsg("could not determine which collation to use for %s function",
1914  "initcap()"),
1915  errhint("Use the COLLATE clause to set the collation explicitly.")));
1916  }
1917  mylocale = pg_newlocale_from_collation(collid);
1918  }
1919 
1920 #ifdef USE_ICU
1921  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1922  {
1923  int32_t len_uchar,
1924  len_conv;
1925  UChar *buff_uchar;
1926  UChar *buff_conv;
1927 
1928  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1929  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1930  &buff_conv, buff_uchar, len_uchar);
1931  icu_from_uchar(&result, buff_conv, len_conv);
1932  pfree(buff_uchar);
1933  pfree(buff_conv);
1934  }
1935  else
1936 #endif
1937  {
1939  {
1940  wchar_t *workspace;
1941  size_t curr_char;
1942  size_t result_size;
1943 
1944  /* Overflow paranoia */
1945  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1946  ereport(ERROR,
1947  (errcode(ERRCODE_OUT_OF_MEMORY),
1948  errmsg("out of memory")));
1949 
1950  /* Output workspace cannot have more codes than input bytes */
1951  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1952 
1953  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1954 
1955  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1956  {
1957 #ifdef HAVE_LOCALE_T
1958  if (mylocale)
1959  {
1960  if (wasalnum)
1961  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1962  else
1963  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1964  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1965  }
1966  else
1967 #endif
1968  {
1969  if (wasalnum)
1970  workspace[curr_char] = towlower(workspace[curr_char]);
1971  else
1972  workspace[curr_char] = towupper(workspace[curr_char]);
1973  wasalnum = iswalnum(workspace[curr_char]);
1974  }
1975  }
1976 
1977  /*
1978  * Make result large enough; case change might change number
1979  * of bytes
1980  */
1981  result_size = curr_char * pg_database_encoding_max_length() + 1;
1982  result = palloc(result_size);
1983 
1984  wchar2char(result, workspace, result_size, mylocale);
1985  pfree(workspace);
1986  }
1987  else
1988  {
1989  char *p;
1990 
1991  result = pnstrdup(buff, nbytes);
1992 
1993  /*
1994  * Note: we assume that toupper_l()/tolower_l() will not be so
1995  * broken as to need guard tests. When using the default
1996  * collation, we apply the traditional Postgres behavior that
1997  * forces ASCII-style treatment of I/i, but in non-default
1998  * collations you get exactly what the collation says.
1999  */
2000  for (p = result; *p; p++)
2001  {
2002 #ifdef HAVE_LOCALE_T
2003  if (mylocale)
2004  {
2005  if (wasalnum)
2006  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2007  else
2008  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2009  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2010  }
2011  else
2012 #endif
2013  {
2014  if (wasalnum)
2015  *p = pg_tolower((unsigned char) *p);
2016  else
2017  *p = pg_toupper((unsigned char) *p);
2018  wasalnum = isalnum((unsigned char) *p);
2019  }
2020  }
2021  }
2022  }
2023  }
2024 
2025  return result;
2026 }
2027 
2028 /*
2029  * ASCII-only lower function
2030  *
2031  * We pass the number of bytes so we can pass varlena and char*
2032  * to this function. The result is a palloc'd, null-terminated string.
2033  */
2034 char *
2035 asc_tolower(const char *buff, size_t nbytes)
2036 {
2037  char *result;
2038  char *p;
2039 
2040  if (!buff)
2041  return NULL;
2042 
2043  result = pnstrdup(buff, nbytes);
2044 
2045  for (p = result; *p; p++)
2046  *p = pg_ascii_tolower((unsigned char) *p);
2047 
2048  return result;
2049 }
2050 
2051 /*
2052  * ASCII-only upper function
2053  *
2054  * We pass the number of bytes so we can pass varlena and char*
2055  * to this function. The result is a palloc'd, null-terminated string.
2056  */
2057 char *
2058 asc_toupper(const char *buff, size_t nbytes)
2059 {
2060  char *result;
2061  char *p;
2062 
2063  if (!buff)
2064  return NULL;
2065 
2066  result = pnstrdup(buff, nbytes);
2067 
2068  for (p = result; *p; p++)
2069  *p = pg_ascii_toupper((unsigned char) *p);
2070 
2071  return result;
2072 }
2073 
2074 /*
2075  * ASCII-only initcap function
2076  *
2077  * We pass the number of bytes so we can pass varlena and char*
2078  * to this function. The result is a palloc'd, null-terminated string.
2079  */
2080 char *
2081 asc_initcap(const char *buff, size_t nbytes)
2082 {
2083  char *result;
2084  char *p;
2085  int wasalnum = false;
2086 
2087  if (!buff)
2088  return NULL;
2089 
2090  result = pnstrdup(buff, nbytes);
2091 
2092  for (p = result; *p; p++)
2093  {
2094  char c;
2095 
2096  if (wasalnum)
2097  *p = c = pg_ascii_tolower((unsigned char) *p);
2098  else
2099  *p = c = pg_ascii_toupper((unsigned char) *p);
2100  /* we don't trust isalnum() here */
2101  wasalnum = ((c >= 'A' && c <= 'Z') ||
2102  (c >= 'a' && c <= 'z') ||
2103  (c >= '0' && c <= '9'));
2104  }
2105 
2106  return result;
2107 }
2108 
2109 /* convenience routines for when the input is null-terminated */
2110 
2111 static char *
2112 str_tolower_z(const char *buff, Oid collid)
2113 {
2114  return str_tolower(buff, strlen(buff), collid);
2115 }
2116 
2117 static char *
2118 str_toupper_z(const char *buff, Oid collid)
2119 {
2120  return str_toupper(buff, strlen(buff), collid);
2121 }
2122 
2123 static char *
2124 str_initcap_z(const char *buff, Oid collid)
2125 {
2126  return str_initcap(buff, strlen(buff), collid);
2127 }
2128 
2129 static char *
2130 asc_tolower_z(const char *buff)
2131 {
2132  return asc_tolower(buff, strlen(buff));
2133 }
2134 
2135 static char *
2136 asc_toupper_z(const char *buff)
2137 {
2138  return asc_toupper(buff, strlen(buff));
2139 }
2140 
2141 /* asc_initcap_z is not currently needed */
2142 
2143 
2144 /* ----------
2145  * Skip TM / th in FROM_CHAR
2146  *
2147  * If S_THth is on, skip two chars, assuming there are two available
2148  * ----------
2149  */
2150 #define SKIP_THth(ptr, _suf) \
2151  do { \
2152  if (S_THth(_suf)) \
2153  { \
2154  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2155  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2156  } \
2157  } while (0)
2158 
2159 
2160 #ifdef DEBUG_TO_FROM_CHAR
2161 /* -----------
2162  * DEBUG: Call for debug and for index checking; (Show ASCII char
2163  * and defined keyword for each used position
2164  * ----------
2165  */
2166 static void
2167 dump_index(const KeyWord *k, const int *index)
2168 {
2169  int i,
2170  count = 0,
2171  free_i = 0;
2172 
2173  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2174 
2175  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2176  {
2177  if (index[i] != -1)
2178  {
2179  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2180  count++;
2181  }
2182  else
2183  {
2184  free_i++;
2185  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2186  }
2187  }
2188  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2189  count, free_i);
2190 }
2191 #endif /* DEBUG */
2192 
2193 /* ----------
2194  * Return true if next format picture is not digit value
2195  * ----------
2196  */
2197 static bool
2199 {
2200  if (n->type == NODE_TYPE_END)
2201  return false;
2202 
2203  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2204  return true;
2205 
2206  /*
2207  * Next node
2208  */
2209  n++;
2210 
2211  /* end of format string is treated like a non-digit separator */
2212  if (n->type == NODE_TYPE_END)
2213  return true;
2214 
2215  if (n->type == NODE_TYPE_ACTION)
2216  {
2217  if (n->key->is_digit)
2218  return false;
2219 
2220  return true;
2221  }
2222  else if (n->character[1] == '\0' &&
2223  isdigit((unsigned char) n->character[0]))
2224  return false;
2225 
2226  return true; /* some non-digit input (separator) */
2227 }
2228 
2229 
2230 static int
2232 {
2233  /*
2234  * Adjust all dates toward 2020; this is effectively what happens when we
2235  * assume '70' is 1970 and '69' is 2069.
2236  */
2237  /* Force 0-69 into the 2000's */
2238  if (year < 70)
2239  return year + 2000;
2240  /* Force 70-99 into the 1900's */
2241  else if (year < 100)
2242  return year + 1900;
2243  /* Force 100-519 into the 2000's */
2244  else if (year < 520)
2245  return year + 2000;
2246  /* Force 520-999 into the 1000's */
2247  else if (year < 1000)
2248  return year + 1000;
2249  else
2250  return year;
2251 }
2252 
2253 
2254 static int
2255 strspace_len(const char *str)
2256 {
2257  int len = 0;
2258 
2259  while (*str && isspace((unsigned char) *str))
2260  {
2261  str++;
2262  len++;
2263  }
2264  return len;
2265 }
2266 
2267 /*
2268  * Set the date mode of a from-char conversion.
2269  *
2270  * Puke if the date mode has already been set, and the caller attempts to set
2271  * it to a conflicting mode.
2272  *
2273  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2274  */
2275 static void
2276 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
2277 {
2278  if (mode != FROM_CHAR_DATE_NONE)
2279  {
2280  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2281  tmfc->mode = mode;
2282  else if (tmfc->mode != mode)
2284  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2285  errmsg("invalid combination of date conventions"),
2286  errhint("Do not mix Gregorian and ISO week date "
2287  "conventions in a formatting template."))));
2288  }
2289 
2290 on_error:
2291  return;
2292 }
2293 
2294 /*
2295  * Set the integer pointed to by 'dest' to the given value.
2296  *
2297  * Puke if the destination integer has previously been set to some other
2298  * non-zero value.
2299  *
2300  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2301  */
2302 static void
2303 from_char_set_int(int *dest, const int value, const FormatNode *node,
2304  bool *have_error)
2305 {
2306  if (*dest != 0 && *dest != value)
2308  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2309  errmsg("conflicting values for \"%s\" field in "
2310  "formatting string",
2311  node->key->name),
2312  errdetail("This value contradicts a previous setting "
2313  "for the same field type."))));
2314  *dest = value;
2315 
2316 on_error:
2317  return;
2318 }
2319 
2320 /*
2321  * Read a single integer from the source string, into the int pointed to by
2322  * 'dest'. If 'dest' is NULL, the result is discarded.
2323  *
2324  * In fixed-width mode (the node does not have the FM suffix), consume at most
2325  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2326  *
2327  * We use strtol() to recover the integer value from the source string, in
2328  * accordance with the given FormatNode.
2329  *
2330  * If the conversion completes successfully, src will have been advanced to
2331  * point at the character immediately following the last character used in the
2332  * conversion.
2333  *
2334  * Return the number of characters consumed.
2335  *
2336  * Note that from_char_parse_int() provides a more convenient wrapper where
2337  * the length of the field is the same as the length of the format keyword (as
2338  * with DD and MI).
2339  *
2340  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
2341  * and -1 is returned.
2342  */
2343 static int
2344 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2345  bool *have_error)
2346 {
2347  long result;
2348  char copy[DCH_MAX_ITEM_SIZ + 1];
2349  const char *init = *src;
2350  int used;
2351 
2352  /*
2353  * Skip any whitespace before parsing the integer.
2354  */
2355  *src += strspace_len(*src);
2356 
2357  Assert(len <= DCH_MAX_ITEM_SIZ);
2358  used = (int) strlcpy(copy, *src, len + 1);
2359 
2360  if (S_FM(node->suffix) || is_next_separator(node))
2361  {
2362  /*
2363  * This node is in Fill Mode, or the next node is known to be a
2364  * non-digit value, so we just slurp as many characters as we can get.
2365  */
2366  char *endptr;
2367 
2368  errno = 0;
2369  result = strtol(init, &endptr, 10);
2370  *src = endptr;
2371  }
2372  else
2373  {
2374  /*
2375  * We need to pull exactly the number of characters given in 'len' out
2376  * of the string, and convert those.
2377  */
2378  char *last;
2379 
2380  if (used < len)
2382  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2383  errmsg("source string too short for \"%s\" "
2384  "formatting field",
2385  node->key->name),
2386  errdetail("Field requires %d characters, "
2387  "but only %d remain.",
2388  len, used),
2389  errhint("If your source string is not fixed-width, "
2390  "try using the \"FM\" modifier."))));
2391 
2392  errno = 0;
2393  result = strtol(copy, &last, 10);
2394  used = last - copy;
2395 
2396  if (used > 0 && used < len)
2398  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2399  errmsg("invalid value \"%s\" for \"%s\"",
2400  copy, node->key->name),
2401  errdetail("Field requires %d characters, "
2402  "but only %d could be parsed.",
2403  len, used),
2404  errhint("If your source string is not fixed-width, "
2405  "try using the \"FM\" modifier."))));
2406 
2407  *src += used;
2408  }
2409 
2410  if (*src == init)
2412  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2413  errmsg("invalid value \"%s\" for \"%s\"",
2414  copy, node->key->name),
2415  errdetail("Value must be an integer."))));
2416 
2417  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2419  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2420  errmsg("value for \"%s\" in source string is out of range",
2421  node->key->name),
2422  errdetail("Value must be in the range %d to %d.",
2423  INT_MIN, INT_MAX))));
2424 
2425  if (dest != NULL)
2426  {
2427  from_char_set_int(dest, (int) result, node, have_error);
2428  CHECK_ERROR;
2429  }
2430 
2431  return *src - init;
2432 
2433 on_error:
2434  return -1;
2435 }
2436 
2437 /*
2438  * Call from_char_parse_int_len(), using the length of the format keyword as
2439  * the expected length of the field.
2440  *
2441  * Don't call this function if the field differs in length from the format
2442  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2443  * In such cases, call from_char_parse_int_len() instead to specify the
2444  * required length explicitly.
2445  */
2446 static int
2447 from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
2448 {
2449  return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
2450 }
2451 
2452 /*
2453  * Sequentially search null-terminated "array" for a case-insensitive match
2454  * to the initial character(s) of "name".
2455  *
2456  * Returns array index of match, or -1 for no match.
2457  *
2458  * *len is set to the length of the match, or 0 for no match.
2459  *
2460  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2461  * suitable for comparisons to ASCII strings.
2462  */
2463 static int
2464 seq_search_ascii(const char *name, const char *const *array, int *len)
2465 {
2466  unsigned char firstc;
2467  const char *const *a;
2468 
2469  *len = 0;
2470 
2471  /* empty string can't match anything */
2472  if (!*name)
2473  return -1;
2474 
2475  /* we handle first char specially to gain some speed */
2476  firstc = pg_ascii_tolower((unsigned char) *name);
2477 
2478  for (a = array; *a != NULL; a++)
2479  {
2480  const char *p;
2481  const char *n;
2482 
2483  /* compare first chars */
2484  if (pg_ascii_tolower((unsigned char) **a) != firstc)
2485  continue;
2486 
2487  /* compare rest of string */
2488  for (p = *a + 1, n = name + 1;; p++, n++)
2489  {
2490  /* return success if we matched whole array entry */
2491  if (*p == '\0')
2492  {
2493  *len = n - name;
2494  return a - array;
2495  }
2496  /* else, must have another character in "name" ... */
2497  if (*n == '\0')
2498  break;
2499  /* ... and it must match */
2500  if (pg_ascii_tolower((unsigned char) *p) !=
2501  pg_ascii_tolower((unsigned char) *n))
2502  break;
2503  }
2504  }
2505 
2506  return -1;
2507 }
2508 
2509 /*
2510  * Sequentially search an array of possibly non-English words for
2511  * a case-insensitive match to the initial character(s) of "name".
2512  *
2513  * This has the same API as seq_search_ascii(), but we use a more general
2514  * case-folding transformation to achieve case-insensitivity. Case folding
2515  * is done per the rules of the collation identified by "collid".
2516  *
2517  * The array is treated as const, but we don't declare it that way because
2518  * the arrays exported by pg_locale.c aren't const.
2519  */
2520 static int
2521 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2522 {
2523  char **a;
2524  char *upper_name;
2525  char *lower_name;
2526 
2527  *len = 0;
2528 
2529  /* empty string can't match anything */
2530  if (!*name)
2531  return -1;
2532 
2533  /*
2534  * The case-folding processing done below is fairly expensive, so before
2535  * doing that, make a quick pass to see if there is an exact match.
2536  */
2537  for (a = array; *a != NULL; a++)
2538  {
2539  int element_len = strlen(*a);
2540 
2541  if (strncmp(name, *a, element_len) == 0)
2542  {
2543  *len = element_len;
2544  return a - array;
2545  }
2546  }
2547 
2548  /*
2549  * Fold to upper case, then to lower case, so that we can match reliably
2550  * even in languages in which case conversions are not injective.
2551  */
2552  upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2553  lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2554  pfree(upper_name);
2555 
2556  for (a = array; *a != NULL; a++)
2557  {
2558  char *upper_element;
2559  char *lower_element;
2560  int element_len;
2561 
2562  /* Likewise upper/lower-case array element */
2563  upper_element = str_toupper(*a, strlen(*a), collid);
2564  lower_element = str_tolower(upper_element, strlen(upper_element),
2565  collid);
2566  pfree(upper_element);
2567  element_len = strlen(lower_element);
2568 
2569  /* Match? */
2570  if (strncmp(lower_name, lower_element, element_len) == 0)
2571  {
2572  *len = element_len;
2573  pfree(lower_element);
2574  pfree(lower_name);
2575  return a - array;
2576  }
2577  pfree(lower_element);
2578  }
2579 
2580  pfree(lower_name);
2581  return -1;
2582 }
2583 
2584 /*
2585  * Perform a sequential search in 'array' (or 'localized_array', if that's
2586  * not NULL) for an entry matching the first character(s) of the 'src'
2587  * string case-insensitively.
2588  *
2589  * The 'array' is presumed to be English words (all-ASCII), but
2590  * if 'localized_array' is supplied, that might be non-English
2591  * so we need a more expensive case-folding transformation
2592  * (which will follow the rules of the collation 'collid').
2593  *
2594  * If a match is found, copy the array index of the match into the integer
2595  * pointed to by 'dest', advance 'src' to the end of the part of the string
2596  * which matched, and return the number of characters consumed.
2597  *
2598  * If the string doesn't match, throw an error if 'have_error' is NULL,
2599  * otherwise set '*have_error' and return -1.
2600  *
2601  * 'node' is used only for error reports: node->key->name identifies the
2602  * field type we were searching for.
2603  */
2604 static int
2605 from_char_seq_search(int *dest, const char **src, const char *const *array,
2606  char **localized_array, Oid collid,
2607  FormatNode *node, bool *have_error)
2608 {
2609  int len;
2610 
2611  if (localized_array == NULL)
2612  *dest = seq_search_ascii(*src, array, &len);
2613  else
2614  *dest = seq_search_localized(*src, localized_array, &len, collid);
2615 
2616  if (len <= 0)
2617  {
2618  /*
2619  * In the error report, truncate the string at the next whitespace (if
2620  * any) to avoid including irrelevant data.
2621  */
2622  char *copy = pstrdup(*src);
2623  char *c;
2624 
2625  for (c = copy; *c; c++)
2626  {
2627  if (scanner_isspace(*c))
2628  {
2629  *c = '\0';
2630  break;
2631  }
2632  }
2633 
2635  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2636  errmsg("invalid value \"%s\" for \"%s\"",
2637  copy, node->key->name),
2638  errdetail("The given value did not match any of "
2639  "the allowed values for this field."))));
2640  }
2641  *src += len;
2642  return len;
2643 
2644 on_error:
2645  return -1;
2646 }
2647 
2648 /* ----------
2649  * Process a TmToChar struct as denoted by a list of FormatNodes.
2650  * The formatted data is written to the string pointed to by 'out'.
2651  * ----------
2652  */
2653 static void
2654 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2655 {
2656  FormatNode *n;
2657  char *s;
2658  struct pg_tm *tm = &in->tm;
2659  int i;
2660 
2661  /* cache localized days and months */
2663 
2664  s = out;
2665  for (n = node; n->type != NODE_TYPE_END; n++)
2666  {
2667  if (n->type != NODE_TYPE_ACTION)
2668  {
2669  strcpy(s, n->character);
2670  s += strlen(s);
2671  continue;
2672  }
2673 
2674  switch (n->key->id)
2675  {
2676  case DCH_A_M:
2677  case DCH_P_M:
2678  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2679  ? P_M_STR : A_M_STR);
2680  s += strlen(s);
2681  break;
2682  case DCH_AM:
2683  case DCH_PM:
2684  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2685  ? PM_STR : AM_STR);
2686  s += strlen(s);
2687  break;
2688  case DCH_a_m:
2689  case DCH_p_m:
2690  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2691  ? p_m_STR : a_m_STR);
2692  s += strlen(s);
2693  break;
2694  case DCH_am:
2695  case DCH_pm:
2696  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2697  ? pm_STR : am_STR);
2698  s += strlen(s);
2699  break;
2700  case DCH_HH:
2701  case DCH_HH12:
2702 
2703  /*
2704  * display time as shown on a 12-hour clock, even for
2705  * intervals
2706  */
2707  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2708  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2709  tm->tm_hour % (HOURS_PER_DAY / 2));
2710  if (S_THth(n->suffix))
2711  str_numth(s, s, S_TH_TYPE(n->suffix));
2712  s += strlen(s);
2713  break;
2714  case DCH_HH24:
2715  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2716  tm->tm_hour);
2717  if (S_THth(n->suffix))
2718  str_numth(s, s, S_TH_TYPE(n->suffix));
2719  s += strlen(s);
2720  break;
2721  case DCH_MI:
2722  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2723  tm->tm_min);
2724  if (S_THth(n->suffix))
2725  str_numth(s, s, S_TH_TYPE(n->suffix));
2726  s += strlen(s);
2727  break;
2728  case DCH_SS:
2729  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2730  tm->tm_sec);
2731  if (S_THth(n->suffix))
2732  str_numth(s, s, S_TH_TYPE(n->suffix));
2733  s += strlen(s);
2734  break;
2735 
2736 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2737  sprintf(s, frac_fmt, (int) (frac_val)); \
2738  if (S_THth(n->suffix)) \
2739  str_numth(s, s, S_TH_TYPE(n->suffix)); \
2740  s += strlen(s)
2741 
2742  case DCH_FF1: /* tenth of second */
2743  DCH_to_char_fsec("%01d", in->fsec / 100000);
2744  break;
2745  case DCH_FF2: /* hundredth of second */
2746  DCH_to_char_fsec("%02d", in->fsec / 10000);
2747  break;
2748  case DCH_FF3:
2749  case DCH_MS: /* millisecond */
2750  DCH_to_char_fsec("%03d", in->fsec / 1000);
2751  break;
2752  case DCH_FF4: /* tenth of a millisecond */
2753  DCH_to_char_fsec("%04d", in->fsec / 100);
2754  break;
2755  case DCH_FF5: /* hundredth of a millisecond */
2756  DCH_to_char_fsec("%05d", in->fsec / 10);
2757  break;
2758  case DCH_FF6:
2759  case DCH_US: /* microsecond */
2760  DCH_to_char_fsec("%06d", in->fsec);
2761  break;
2762 #undef DCH_to_char_fsec
2763  case DCH_SSSS:
2764  sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2765  tm->tm_min * SECS_PER_MINUTE +
2766  tm->tm_sec);
2767  if (S_THth(n->suffix))
2768  str_numth(s, s, S_TH_TYPE(n->suffix));
2769  s += strlen(s);
2770  break;
2771  case DCH_tz:
2773  if (tmtcTzn(in))
2774  {
2775  /* We assume here that timezone names aren't localized */
2776  char *p = asc_tolower_z(tmtcTzn(in));
2777 
2778  strcpy(s, p);
2779  pfree(p);
2780  s += strlen(s);
2781  }
2782  break;
2783  case DCH_TZ:
2785  if (tmtcTzn(in))
2786  {
2787  strcpy(s, tmtcTzn(in));
2788  s += strlen(s);
2789  }
2790  break;
2791  case DCH_TZH:
2793  sprintf(s, "%c%02d",
2794  (tm->tm_gmtoff >= 0) ? '+' : '-',
2795  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2796  s += strlen(s);
2797  break;
2798  case DCH_TZM:
2800  sprintf(s, "%02d",
2801  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2802  s += strlen(s);
2803  break;
2804  case DCH_OF:
2806  sprintf(s, "%c%0*d",
2807  (tm->tm_gmtoff >= 0) ? '+' : '-',
2808  S_FM(n->suffix) ? 0 : 2,
2809  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2810  s += strlen(s);
2811  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2812  {
2813  sprintf(s, ":%02d",
2814  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2815  s += strlen(s);
2816  }
2817  break;
2818  case DCH_A_D:
2819  case DCH_B_C:
2821  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2822  s += strlen(s);
2823  break;
2824  case DCH_AD:
2825  case DCH_BC:
2827  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2828  s += strlen(s);
2829  break;
2830  case DCH_a_d:
2831  case DCH_b_c:
2833  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2834  s += strlen(s);
2835  break;
2836  case DCH_ad:
2837  case DCH_bc:
2839  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2840  s += strlen(s);
2841  break;
2842  case DCH_MONTH:
2844  if (!tm->tm_mon)
2845  break;
2846  if (S_TM(n->suffix))
2847  {
2848  char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2849 
2850  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2851  strcpy(s, str);
2852  else
2853  ereport(ERROR,
2854  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2855  errmsg("localized string format value too long")));
2856  }
2857  else
2858  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2859  asc_toupper_z(months_full[tm->tm_mon - 1]));
2860  s += strlen(s);
2861  break;
2862  case DCH_Month:
2864  if (!tm->tm_mon)
2865  break;
2866  if (S_TM(n->suffix))
2867  {
2868  char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2869 
2870  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2871  strcpy(s, str);
2872  else
2873  ereport(ERROR,
2874  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2875  errmsg("localized string format value too long")));
2876  }
2877  else
2878  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2879  months_full[tm->tm_mon - 1]);
2880  s += strlen(s);
2881  break;
2882  case DCH_month:
2884  if (!tm->tm_mon)
2885  break;
2886  if (S_TM(n->suffix))
2887  {
2888  char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2889 
2890  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2891  strcpy(s, str);
2892  else
2893  ereport(ERROR,
2894  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2895  errmsg("localized string format value too long")));
2896  }
2897  else
2898  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2899  asc_tolower_z(months_full[tm->tm_mon - 1]));
2900  s += strlen(s);
2901  break;
2902  case DCH_MON:
2904  if (!tm->tm_mon)
2905  break;
2906  if (S_TM(n->suffix))
2907  {
2908  char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2909 
2910  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2911  strcpy(s, str);
2912  else
2913  ereport(ERROR,
2914  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2915  errmsg("localized string format value too long")));
2916  }
2917  else
2918  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2919  s += strlen(s);
2920  break;
2921  case DCH_Mon:
2923  if (!tm->tm_mon)
2924  break;
2925  if (S_TM(n->suffix))
2926  {
2927  char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2928 
2929  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2930  strcpy(s, str);
2931  else
2932  ereport(ERROR,
2933  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2934  errmsg("localized string format value too long")));
2935  }
2936  else
2937  strcpy(s, months[tm->tm_mon - 1]);
2938  s += strlen(s);
2939  break;
2940  case DCH_mon:
2942  if (!tm->tm_mon)
2943  break;
2944  if (S_TM(n->suffix))
2945  {
2946  char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2947 
2948  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2949  strcpy(s, str);
2950  else
2951  ereport(ERROR,
2952  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2953  errmsg("localized string format value too long")));
2954  }
2955  else
2956  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2957  s += strlen(s);
2958  break;
2959  case DCH_MM:
2960  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2961  tm->tm_mon);
2962  if (S_THth(n->suffix))
2963  str_numth(s, s, S_TH_TYPE(n->suffix));
2964  s += strlen(s);
2965  break;
2966  case DCH_DAY:
2968  if (S_TM(n->suffix))
2969  {
2970  char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2971 
2972  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2973  strcpy(s, str);
2974  else
2975  ereport(ERROR,
2976  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2977  errmsg("localized string format value too long")));
2978  }
2979  else
2980  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2981  asc_toupper_z(days[tm->tm_wday]));
2982  s += strlen(s);
2983  break;
2984  case DCH_Day:
2986  if (S_TM(n->suffix))
2987  {
2988  char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2989 
2990  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2991  strcpy(s, str);
2992  else
2993  ereport(ERROR,
2994  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2995  errmsg("localized string format value too long")));
2996  }
2997  else
2998  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2999  days[tm->tm_wday]);
3000  s += strlen(s);
3001  break;
3002  case DCH_day:
3004  if (S_TM(n->suffix))
3005  {
3006  char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3007 
3008  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3009  strcpy(s, str);
3010  else
3011  ereport(ERROR,
3012  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3013  errmsg("localized string format value too long")));
3014  }
3015  else
3016  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3017  asc_tolower_z(days[tm->tm_wday]));
3018  s += strlen(s);
3019  break;
3020  case DCH_DY:
3022  if (S_TM(n->suffix))
3023  {
3024  char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3025 
3026  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3027  strcpy(s, str);
3028  else
3029  ereport(ERROR,
3030  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3031  errmsg("localized string format value too long")));
3032  }
3033  else
3034  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3035  s += strlen(s);
3036  break;
3037  case DCH_Dy:
3039  if (S_TM(n->suffix))
3040  {
3041  char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3042 
3043  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3044  strcpy(s, str);
3045  else
3046  ereport(ERROR,
3047  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3048  errmsg("localized string format value too long")));
3049  }
3050  else
3051  strcpy(s, days_short[tm->tm_wday]);
3052  s += strlen(s);
3053  break;
3054  case DCH_dy:
3056  if (S_TM(n->suffix))
3057  {
3058  char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3059 
3060  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3061  strcpy(s, str);
3062  else
3063  ereport(ERROR,
3064  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3065  errmsg("localized string format value too long")));
3066  }
3067  else
3068  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3069  s += strlen(s);
3070  break;
3071  case DCH_DDD:
3072  case DCH_IDDD:
3073  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3074  (n->key->id == DCH_DDD) ?
3075  tm->tm_yday :
3076  date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3077  if (S_THth(n->suffix))
3078  str_numth(s, s, S_TH_TYPE(n->suffix));
3079  s += strlen(s);
3080  break;
3081  case DCH_DD:
3082  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3083  if (S_THth(n->suffix))
3084  str_numth(s, s, S_TH_TYPE(n->suffix));
3085  s += strlen(s);
3086  break;
3087  case DCH_D:
3089  sprintf(s, "%d", tm->tm_wday + 1);
3090  if (S_THth(n->suffix))
3091  str_numth(s, s, S_TH_TYPE(n->suffix));
3092  s += strlen(s);
3093  break;
3094  case DCH_ID:
3096  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3097  if (S_THth(n->suffix))
3098  str_numth(s, s, S_TH_TYPE(n->suffix));
3099  s += strlen(s);
3100  break;
3101  case DCH_WW:
3102  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3103  (tm->tm_yday - 1) / 7 + 1);
3104  if (S_THth(n->suffix))
3105  str_numth(s, s, S_TH_TYPE(n->suffix));
3106  s += strlen(s);
3107  break;
3108  case DCH_IW:
3109  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3110  date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3111  if (S_THth(n->suffix))
3112  str_numth(s, s, S_TH_TYPE(n->suffix));
3113  s += strlen(s);
3114  break;
3115  case DCH_Q:
3116  if (!tm->tm_mon)
3117  break;
3118  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3119  if (S_THth(n->suffix))
3120  str_numth(s, s, S_TH_TYPE(n->suffix));
3121  s += strlen(s);
3122  break;
3123  case DCH_CC:
3124  if (is_interval) /* straight calculation */
3125  i = tm->tm_year / 100;
3126  else
3127  {
3128  if (tm->tm_year > 0)
3129  /* Century 20 == 1901 - 2000 */
3130  i = (tm->tm_year - 1) / 100 + 1;
3131  else
3132  /* Century 6BC == 600BC - 501BC */
3133  i = tm->tm_year / 100 - 1;
3134  }
3135  if (i <= 99 && i >= -99)
3136  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3137  else
3138  sprintf(s, "%d", i);
3139  if (S_THth(n->suffix))
3140  str_numth(s, s, S_TH_TYPE(n->suffix));
3141  s += strlen(s);
3142  break;
3143  case DCH_Y_YYY:
3144  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3145  sprintf(s, "%d,%03d", i,
3146  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3147  if (S_THth(n->suffix))
3148  str_numth(s, s, S_TH_TYPE(n->suffix));
3149  s += strlen(s);
3150  break;
3151  case DCH_YYYY:
3152  case DCH_IYYY:
3153  sprintf(s, "%0*d",
3154  S_FM(n->suffix) ? 0 :
3155  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3156  (n->key->id == DCH_YYYY ?
3157  ADJUST_YEAR(tm->tm_year, is_interval) :
3159  tm->tm_mon,
3160  tm->tm_mday),
3161  is_interval)));
3162  if (S_THth(n->suffix))
3163  str_numth(s, s, S_TH_TYPE(n->suffix));
3164  s += strlen(s);
3165  break;
3166  case DCH_YYY:
3167  case DCH_IYY:
3168  sprintf(s, "%0*d",
3169  S_FM(n->suffix) ? 0 :
3170  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3171  (n->key->id == DCH_YYY ?
3172  ADJUST_YEAR(tm->tm_year, is_interval) :
3174  tm->tm_mon,
3175  tm->tm_mday),
3176  is_interval)) % 1000);
3177  if (S_THth(n->suffix))
3178  str_numth(s, s, S_TH_TYPE(n->suffix));
3179  s += strlen(s);
3180  break;
3181  case DCH_YY:
3182  case DCH_IY:
3183  sprintf(s, "%0*d",
3184  S_FM(n->suffix) ? 0 :
3185  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3186  (n->key->id == DCH_YY ?
3187  ADJUST_YEAR(tm->tm_year, is_interval) :
3189  tm->tm_mon,
3190  tm->tm_mday),
3191  is_interval)) % 100);
3192  if (S_THth(n->suffix))
3193  str_numth(s, s, S_TH_TYPE(n->suffix));
3194  s += strlen(s);
3195  break;
3196  case DCH_Y:
3197  case DCH_I:
3198  sprintf(s, "%1d",
3199  (n->key->id == DCH_Y ?
3200  ADJUST_YEAR(tm->tm_year, is_interval) :
3202  tm->tm_mon,
3203  tm->tm_mday),
3204  is_interval)) % 10);
3205  if (S_THth(n->suffix))
3206  str_numth(s, s, S_TH_TYPE(n->suffix));
3207  s += strlen(s);
3208  break;
3209  case DCH_RM:
3210  /* FALLTHROUGH */
3211  case DCH_rm:
3212 
3213  /*
3214  * For intervals, values like '12 month' will be reduced to 0
3215  * month and some years. These should be processed.
3216  */
3217  if (!tm->tm_mon && !tm->tm_year)
3218  break;
3219  else
3220  {
3221  int mon = 0;
3222  const char *const *months;
3223 
3224  if (n->key->id == DCH_RM)
3225  months = rm_months_upper;
3226  else
3227  months = rm_months_lower;
3228 
3229  /*
3230  * Compute the position in the roman-numeral array. Note
3231  * that the contents of the array are reversed, December
3232  * being first and January last.
3233  */
3234  if (tm->tm_mon == 0)
3235  {
3236  /*
3237  * This case is special, and tracks the case of full
3238  * interval years.
3239  */
3240  mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3241  }
3242  else if (tm->tm_mon < 0)
3243  {
3244  /*
3245  * Negative case. In this case, the calculation is
3246  * reversed, where -1 means December, -2 November,
3247  * etc.
3248  */
3249  mon = -1 * (tm->tm_mon + 1);
3250  }
3251  else
3252  {
3253  /*
3254  * Common case, with a strictly positive value. The
3255  * position in the array matches with the value of
3256  * tm_mon.
3257  */
3258  mon = MONTHS_PER_YEAR - tm->tm_mon;
3259  }
3260 
3261  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3262  months[mon]);
3263  s += strlen(s);
3264  }
3265  break;
3266  case DCH_W:
3267  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3268  if (S_THth(n->suffix))
3269  str_numth(s, s, S_TH_TYPE(n->suffix));
3270  s += strlen(s);
3271  break;
3272  case DCH_J:
3273  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3274  if (S_THth(n->suffix))
3275  str_numth(s, s, S_TH_TYPE(n->suffix));
3276  s += strlen(s);
3277  break;
3278  }
3279  }
3280 
3281  *s = '\0';
3282 }
3283 
3284 /*
3285  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3286  * The TmFromChar struct pointed to by 'out' is populated with the results.
3287  *
3288  * 'collid' identifies the collation to use, if needed.
3289  * 'std' specifies standard parsing mode.
3290  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3291  *
3292  * Note: we currently don't have any to_interval() function, so there
3293  * is no need here for INVALID_FOR_INTERVAL checks.
3294  */
3295 static void
3296 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3297  Oid collid, bool std, bool *have_error)
3298 {
3299  FormatNode *n;
3300  const char *s;
3301  int len,
3302  value;
3303  bool fx_mode = std;
3304 
3305  /* number of extra skipped characters (more than given in format string) */
3306  int extra_skip = 0;
3307 
3308  /* cache localized days and months */
3310 
3311  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3312  {
3313  /*
3314  * Ignore spaces at the beginning of the string and before fields when
3315  * not in FX (fixed width) mode.
3316  */
3317  if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3318  (n->type == NODE_TYPE_ACTION || n == node))
3319  {
3320  while (*s != '\0' && isspace((unsigned char) *s))
3321  {
3322  s++;
3323  extra_skip++;
3324  }
3325  }
3326 
3327  if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3328  {
3329  if (std)
3330  {
3331  /*
3332  * Standard mode requires strict matching between format
3333  * string separators/spaces and input string.
3334  */
3335  Assert(n->character[0] && !n->character[1]);
3336 
3337  if (*s == n->character[0])
3338  s++;
3339  else
3341  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3342  errmsg("unmatched format separator \"%c\"",
3343  n->character[0]))));
3344  }
3345  else if (!fx_mode)
3346  {
3347  /*
3348  * In non FX (fixed format) mode one format string space or
3349  * separator match to one space or separator in input string.
3350  * Or match nothing if there is no space or separator in the
3351  * current position of input string.
3352  */
3353  extra_skip--;
3354  if (isspace((unsigned char) *s) || is_separator_char(s))
3355  {
3356  s++;
3357  extra_skip++;
3358  }
3359  }
3360  else
3361  {
3362  /*
3363  * In FX mode, on format string space or separator we consume
3364  * exactly one character from input string. Notice we don't
3365  * insist that the consumed character match the format's
3366  * character.
3367  */
3368  s += pg_mblen(s);
3369  }
3370  continue;
3371  }
3372  else if (n->type != NODE_TYPE_ACTION)
3373  {
3374  /*
3375  * Text character, so consume one character from input string.
3376  * Notice we don't insist that the consumed character match the
3377  * format's character.
3378  */
3379  if (!fx_mode)
3380  {
3381  /*
3382  * In non FX mode we might have skipped some extra characters
3383  * (more than specified in format string) before. In this
3384  * case we don't skip input string character, because it might
3385  * be part of field.
3386  */
3387  if (extra_skip > 0)
3388  extra_skip--;
3389  else
3390  s += pg_mblen(s);
3391  }
3392  else
3393  {
3394  int chlen = pg_mblen(s);
3395 
3396  /*
3397  * Standard mode requires strict match of format characters.
3398  */
3399  if (std && n->type == NODE_TYPE_CHAR &&
3400  strncmp(s, n->character, chlen) != 0)
3402  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3403  errmsg("unmatched format character \"%s\"",
3404  n->character))));
3405 
3406  s += chlen;
3407  }
3408  continue;
3409  }
3410 
3411  from_char_set_mode(out, n->key->date_mode, have_error);
3412  CHECK_ERROR;
3413 
3414  switch (n->key->id)
3415  {
3416  case DCH_FX:
3417  fx_mode = true;
3418  break;
3419  case DCH_A_M:
3420  case DCH_P_M:
3421  case DCH_a_m:
3422  case DCH_p_m:
3424  NULL, InvalidOid,
3425  n, have_error);
3426  CHECK_ERROR;
3427  from_char_set_int(&out->pm, value % 2, n, have_error);
3428  CHECK_ERROR;
3429  out->clock = CLOCK_12_HOUR;
3430  break;
3431  case DCH_AM:
3432  case DCH_PM:
3433  case DCH_am:
3434  case DCH_pm:
3435  from_char_seq_search(&value, &s, ampm_strings,
3436  NULL, InvalidOid,
3437  n, have_error);
3438  CHECK_ERROR;
3439  from_char_set_int(&out->pm, value % 2, n, have_error);
3440  CHECK_ERROR;
3441  out->clock = CLOCK_12_HOUR;
3442  break;
3443  case DCH_HH:
3444  case DCH_HH12:
3445  from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3446  CHECK_ERROR;
3447  out->clock = CLOCK_12_HOUR;
3448  SKIP_THth(s, n->suffix);
3449  break;
3450  case DCH_HH24:
3451  from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3452  CHECK_ERROR;
3453  SKIP_THth(s, n->suffix);
3454  break;
3455  case DCH_MI:
3456  from_char_parse_int(&out->mi, &s, n, have_error);
3457  CHECK_ERROR;
3458  SKIP_THth(s, n->suffix);
3459  break;
3460  case DCH_SS:
3461  from_char_parse_int(&out->ss, &s, n, have_error);
3462  CHECK_ERROR;
3463  SKIP_THth(s, n->suffix);
3464  break;
3465  case DCH_MS: /* millisecond */
3466  len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
3467  CHECK_ERROR;
3468 
3469  /*
3470  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3471  */
3472  out->ms *= len == 1 ? 100 :
3473  len == 2 ? 10 : 1;
3474 
3475  SKIP_THth(s, n->suffix);
3476  break;
3477  case DCH_FF1:
3478  case DCH_FF2:
3479  case DCH_FF3:
3480  case DCH_FF4:
3481  case DCH_FF5:
3482  case DCH_FF6:
3483  out->ff = n->key->id - DCH_FF1 + 1;
3484  /* fall through */
3485  case DCH_US: /* microsecond */
3486  len = from_char_parse_int_len(&out->us, &s,
3487  n->key->id == DCH_US ? 6 :
3488  out->ff, n, have_error);
3489  CHECK_ERROR;
3490 
3491  out->us *= len == 1 ? 100000 :
3492  len == 2 ? 10000 :
3493  len == 3 ? 1000 :
3494  len == 4 ? 100 :
3495  len == 5 ? 10 : 1;
3496 
3497  SKIP_THth(s, n->suffix);
3498  break;
3499  case DCH_SSSS:
3500  from_char_parse_int(&out->ssss, &s, n, have_error);
3501  CHECK_ERROR;
3502  SKIP_THth(s, n->suffix);
3503  break;
3504  case DCH_tz:
3505  case DCH_TZ:
3506  case DCH_OF:
3508  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3509  errmsg("formatting field \"%s\" is only supported in to_char",
3510  n->key->name))));
3511  CHECK_ERROR;
3512  break;
3513  case DCH_TZH:
3514 
3515  /*
3516  * Value of TZH might be negative. And the issue is that we
3517  * might swallow minus sign as the separator. So, if we have
3518  * skipped more characters than specified in the format
3519  * string, then we consider prepending last skipped minus to
3520  * TZH.
3521  */
3522  if (*s == '+' || *s == '-' || *s == ' ')
3523  {
3524  out->tzsign = *s == '-' ? -1 : +1;
3525  s++;
3526  }
3527  else
3528  {
3529  if (extra_skip > 0 && *(s - 1) == '-')
3530  out->tzsign = -1;
3531  else
3532  out->tzsign = +1;
3533  }
3534 
3535  from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
3536  CHECK_ERROR;
3537  break;
3538  case DCH_TZM:
3539  /* assign positive timezone sign if TZH was not seen before */
3540  if (!out->tzsign)
3541  out->tzsign = +1;
3542  from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
3543  CHECK_ERROR;
3544  break;
3545  case DCH_A_D:
3546  case DCH_B_C:
3547  case DCH_a_d:
3548  case DCH_b_c:
3550  NULL, InvalidOid,
3551  n, have_error);
3552  CHECK_ERROR;
3553  from_char_set_int(&out->bc, value % 2, n, have_error);
3554  CHECK_ERROR;
3555  break;
3556  case DCH_AD:
3557  case DCH_BC:
3558  case DCH_ad:
3559  case DCH_bc:
3560  from_char_seq_search(&value, &s, adbc_strings,
3561  NULL, InvalidOid,
3562  n, have_error);
3563  CHECK_ERROR;
3564  from_char_set_int(&out->bc, value % 2, n, have_error);
3565  CHECK_ERROR;
3566  break;
3567  case DCH_MONTH:
3568  case DCH_Month:
3569  case DCH_month:
3570  from_char_seq_search(&value, &s, months_full,
3571  S_TM(n->suffix) ? localized_full_months : NULL,
3572  collid,
3573  n, have_error);
3574  CHECK_ERROR;
3575  from_char_set_int(&out->mm, value + 1, n, have_error);
3576  CHECK_ERROR;
3577  break;
3578  case DCH_MON:
3579  case DCH_Mon:
3580  case DCH_mon:
3581  from_char_seq_search(&value, &s, months,
3582  S_TM(n->suffix) ? localized_abbrev_months : NULL,
3583  collid,
3584  n, have_error);
3585  CHECK_ERROR;
3586  from_char_set_int(&out->mm, value + 1, n, have_error);
3587  CHECK_ERROR;
3588  break;
3589  case DCH_MM:
3590  from_char_parse_int(&out->mm, &s, n, have_error);
3591  CHECK_ERROR;
3592  SKIP_THth(s, n->suffix);
3593  break;
3594  case DCH_DAY:
3595  case DCH_Day:
3596  case DCH_day:
3597  from_char_seq_search(&value, &s, days,
3598  S_TM(n->suffix) ? localized_full_days : NULL,
3599  collid,
3600  n, have_error);
3601  CHECK_ERROR;
3602  from_char_set_int(&out->d, value, n, have_error);
3603  CHECK_ERROR;
3604  out->d++;
3605  break;
3606  case DCH_DY:
3607  case DCH_Dy:
3608  case DCH_dy:
3609  from_char_seq_search(&value, &s, days_short,
3610  S_TM(n->suffix) ? localized_abbrev_days : NULL,
3611  collid,
3612  n, have_error);
3613  CHECK_ERROR;
3614  from_char_set_int(&out->d, value, n, have_error);
3615  CHECK_ERROR;
3616  out->d++;
3617  break;
3618  case DCH_DDD:
3619  from_char_parse_int(&out->ddd, &s, n, have_error);
3620  CHECK_ERROR;
3621  SKIP_THth(s, n->suffix);
3622  break;
3623  case DCH_IDDD:
3624  from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
3625  CHECK_ERROR;
3626  SKIP_THth(s, n->suffix);
3627  break;
3628  case DCH_DD:
3629  from_char_parse_int(&out->dd, &s, n, have_error);
3630  CHECK_ERROR;
3631  SKIP_THth(s, n->suffix);
3632  break;
3633  case DCH_D:
3634  from_char_parse_int(&out->d, &s, n, have_error);
3635  CHECK_ERROR;
3636  SKIP_THth(s, n->suffix);
3637  break;
3638  case DCH_ID:
3639  from_char_parse_int_len(&out->d, &s, 1, n, have_error);
3640  CHECK_ERROR;
3641  /* Shift numbering to match Gregorian where Sunday = 1 */
3642  if (++out->d > 7)
3643  out->d = 1;
3644  SKIP_THth(s, n->suffix);
3645  break;
3646  case DCH_WW:
3647  case DCH_IW:
3648  from_char_parse_int(&out->ww, &s, n, have_error);
3649  CHECK_ERROR;
3650  SKIP_THth(s, n->suffix);
3651  break;
3652  case DCH_Q:
3653 
3654  /*
3655  * We ignore 'Q' when converting to date because it is unclear
3656  * which date in the quarter to use, and some people specify
3657  * both quarter and month, so if it was honored it might
3658  * conflict with the supplied month. That is also why we don't
3659  * throw an error.
3660  *
3661  * We still parse the source string for an integer, but it
3662  * isn't stored anywhere in 'out'.
3663  */
3664  from_char_parse_int((int *) NULL, &s, n, have_error);
3665  CHECK_ERROR;
3666  SKIP_THth(s, n->suffix);
3667  break;
3668  case DCH_CC:
3669  from_char_parse_int(&out->cc, &s, n, have_error);
3670  CHECK_ERROR;
3671  SKIP_THth(s, n->suffix);
3672  break;
3673  case DCH_Y_YYY:
3674  {
3675  int matched,
3676  years,
3677  millennia,
3678  nch;
3679 
3680  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3681  if (matched < 2)
3683  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3684  errmsg("invalid input string for \"Y,YYY\""))));
3685  years += (millennia * 1000);
3686  from_char_set_int(&out->year, years, n, have_error);
3687  CHECK_ERROR;
3688  out->yysz = 4;
3689  s += nch;
3690  SKIP_THth(s, n->suffix);
3691  }
3692  break;
3693  case DCH_YYYY:
3694  case DCH_IYYY:
3695  from_char_parse_int(&out->year, &s, n, have_error);
3696  CHECK_ERROR;
3697  out->yysz = 4;
3698  SKIP_THth(s, n->suffix);
3699  break;
3700  case DCH_YYY:
3701  case DCH_IYY:
3702  len = from_char_parse_int(&out->year, &s, n, have_error);
3703  CHECK_ERROR;
3704  if (len < 4)
3705  out->year = adjust_partial_year_to_2020(out->year);
3706  out->yysz = 3;
3707  SKIP_THth(s, n->suffix);
3708  break;
3709  case DCH_YY:
3710  case DCH_IY:
3711  len = from_char_parse_int(&out->year, &s, n, have_error);
3712  CHECK_ERROR;
3713  if (len < 4)
3714  out->year = adjust_partial_year_to_2020(out->year);
3715  out->yysz = 2;
3716  SKIP_THth(s, n->suffix);
3717  break;
3718  case DCH_Y:
3719  case DCH_I:
3720  len = from_char_parse_int(&out->year, &s, n, have_error);
3721  CHECK_ERROR;
3722  if (len < 4)
3723  out->year = adjust_partial_year_to_2020(out->year);
3724  out->yysz = 1;
3725  SKIP_THth(s, n->suffix);
3726  break;
3727  case DCH_RM:
3728  case DCH_rm:
3730  NULL, InvalidOid,
3731  n, have_error);
3732  CHECK_ERROR;
3733  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3734  n, have_error);
3735  CHECK_ERROR;
3736  break;
3737  case DCH_W:
3738  from_char_parse_int(&out->w, &s, n, have_error);
3739  CHECK_ERROR;
3740  SKIP_THth(s, n->suffix);
3741  break;
3742  case DCH_J:
3743  from_char_parse_int(&out->j, &s, n, have_error);
3744  CHECK_ERROR;
3745  SKIP_THth(s, n->suffix);
3746  break;
3747  }
3748 
3749  /* Ignore all spaces after fields */
3750  if (!fx_mode)
3751  {
3752  extra_skip = 0;
3753  while (*s != '\0' && isspace((unsigned char) *s))
3754  {
3755  s++;
3756  extra_skip++;
3757  }
3758  }
3759  }
3760 
3761  /*
3762  * Standard parsing mode doesn't allow unmatched format patterns or
3763  * trailing characters in the input string.
3764  */
3765  if (std)
3766  {
3767  if (n->type != NODE_TYPE_END)
3769  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3770  errmsg("input string is too short for datetime format"))));
3771 
3772  while (*s != '\0' && isspace((unsigned char) *s))
3773  s++;
3774 
3775  if (*s != '\0')
3777  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3778  errmsg("trailing characters remain in input string "
3779  "after datetime format"))));
3780  }
3781 
3782 on_error:
3783  return;
3784 }
3785 
3786 /*
3787  * The invariant for DCH cache entry management is that DCHCounter is equal
3788  * to the maximum age value among the existing entries, and we increment it
3789  * whenever an access occurs. If we approach overflow, deal with that by
3790  * halving all the age values, so that we retain a fairly accurate idea of
3791  * which entries are oldest.
3792  */
3793 static inline void
3795 {
3796  if (DCHCounter >= (INT_MAX - 1))
3797  {
3798  for (int i = 0; i < n_DCHCache; i++)
3799  DCHCache[i]->age >>= 1;
3800  DCHCounter >>= 1;
3801  }
3802 }
3803 
3804 /*
3805  * Get mask of date/time/zone components present in format nodes.
3806  *
3807  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3808  */
3809 static int
3810 DCH_datetime_type(FormatNode *node, bool *have_error)
3811 {
3812  FormatNode *n;
3813  int flags = 0;
3814 
3815  for (n = node; n->type != NODE_TYPE_END; n++)
3816  {
3817  if (n->type != NODE_TYPE_ACTION)
3818  continue;
3819 
3820  switch (n->key->id)
3821  {
3822  case DCH_FX:
3823  break;
3824  case DCH_A_M:
3825  case DCH_P_M:
3826  case DCH_a_m:
3827  case DCH_p_m:
3828  case DCH_AM:
3829  case DCH_PM:
3830  case DCH_am:
3831  case DCH_pm:
3832  case DCH_HH:
3833  case DCH_HH12:
3834  case DCH_HH24:
3835  case DCH_MI:
3836  case DCH_SS:
3837  case DCH_MS: /* millisecond */
3838  case DCH_US: /* microsecond */
3839  case DCH_FF1:
3840  case DCH_FF2:
3841  case DCH_FF3:
3842  case DCH_FF4:
3843  case DCH_FF5:
3844  case DCH_FF6:
3845  case DCH_SSSS:
3846  flags |= DCH_TIMED;
3847  break;
3848  case DCH_tz:
3849  case DCH_TZ:
3850  case DCH_OF:
3852  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3853  errmsg("formatting field \"%s\" is only supported in to_char",
3854  n->key->name))));
3855  flags |= DCH_ZONED;
3856  break;
3857  case DCH_TZH:
3858  case DCH_TZM:
3859  flags |= DCH_ZONED;
3860  break;
3861  case DCH_A_D:
3862  case DCH_B_C:
3863  case DCH_a_d:
3864  case DCH_b_c:
3865  case DCH_AD:
3866  case DCH_BC:
3867  case DCH_ad:
3868  case DCH_bc:
3869  case DCH_MONTH:
3870  case DCH_Month:
3871  case DCH_month:
3872  case DCH_MON:
3873  case DCH_Mon:
3874  case DCH_mon:
3875  case DCH_MM:
3876  case DCH_DAY:
3877  case DCH_Day:
3878  case DCH_day:
3879  case DCH_DY:
3880  case DCH_Dy:
3881  case DCH_dy:
3882  case DCH_DDD:
3883  case DCH_IDDD:
3884  case DCH_DD:
3885  case DCH_D:
3886  case DCH_ID:
3887  case DCH_WW:
3888  case DCH_Q:
3889  case DCH_CC:
3890  case DCH_Y_YYY:
3891  case DCH_YYYY:
3892  case DCH_IYYY:
3893  case DCH_YYY:
3894  case DCH_IYY:
3895  case DCH_YY:
3896  case DCH_IY:
3897  case DCH_Y:
3898  case DCH_I:
3899  case DCH_RM:
3900  case DCH_rm:
3901  case DCH_W:
3902  case DCH_J:
3903  flags |= DCH_DATED;
3904  break;
3905  }
3906  }
3907 
3908 on_error:
3909  return flags;
3910 }
3911 
3912 /* select a DCHCacheEntry to hold the given format picture */
3913 static DCHCacheEntry *
3914 DCH_cache_getnew(const char *str, bool std)
3915 {
3916  DCHCacheEntry *ent;
3917 
3918  /* Ensure we can advance DCHCounter below */
3920 
3921  /*
3922  * If cache is full, remove oldest entry (or recycle first not-valid one)
3923  */
3925  {
3926  DCHCacheEntry *old = DCHCache[0];
3927 
3928 #ifdef DEBUG_TO_FROM_CHAR
3929  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3930 #endif
3931  if (old->valid)
3932  {
3933  for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3934  {
3935  ent = DCHCache[i];
3936  if (!ent->valid)
3937  {
3938  old = ent;
3939  break;
3940  }
3941  if (ent->age < old->age)
3942  old = ent;
3943  }
3944  }
3945 #ifdef DEBUG_TO_FROM_CHAR
3946  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3947 #endif
3948  old->valid = false;
3949  strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
3950  old->age = (++DCHCounter);
3951  /* caller is expected to fill format, then set valid */
3952  return old;
3953  }
3954  else
3955  {
3956 #ifdef DEBUG_TO_FROM_CHAR
3957  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3958 #endif
3959  Assert(DCHCache[n_DCHCache] == NULL);
3960  DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3962  ent->valid = false;
3963  strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
3964  ent->std = std;
3965  ent->age = (++DCHCounter);
3966  /* caller is expected to fill format, then set valid */
3967  ++n_DCHCache;
3968  return ent;
3969  }
3970 }
3971 
3972 /* look for an existing DCHCacheEntry matching the given format picture */
3973 static DCHCacheEntry *
3974 DCH_cache_search(const char *str, bool std)
3975 {
3976  /* Ensure we can advance DCHCounter below */
3978 
3979  for (int i = 0; i < n_DCHCache; i++)
3980  {
3981  DCHCacheEntry *ent = DCHCache[i];
3982 
3983  if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3984  {
3985  ent->age = (++DCHCounter);
3986  return ent;
3987  }
3988  }
3989 
3990  return NULL;
3991 }
3992 
3993 /* Find or create a DCHCacheEntry for the given format picture */
3994 static DCHCacheEntry *
3995 DCH_cache_fetch(const char *str, bool std)
3996 {
3997  DCHCacheEntry *ent;
3998 
3999  if ((ent = DCH_cache_search(str, std)) == NULL)
4000  {
4001  /*
4002  * Not in the cache, must run parser and save a new format-picture to
4003  * the cache. Do not mark the cache entry valid until parsing
4004  * succeeds.
4005  */
4006  ent = DCH_cache_getnew(str, std);
4007 
4008  parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
4009  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4010 
4011  ent->valid = true;
4012  }
4013  return ent;
4014 }
4015 
4016 /*
4017  * Format a date/time or interval into a string according to fmt.
4018  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
4019  * for formatting.
4020  */
4021 static text *
4022 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4023 {
4024  FormatNode *format;
4025  char *fmt_str,
4026  *result;
4027  bool incache;
4028  int fmt_len;
4029  text *res;
4030 
4031  /*
4032  * Convert fmt to C string
4033  */
4034  fmt_str = text_to_cstring(fmt);
4035  fmt_len = strlen(fmt_str);
4036 
4037  /*
4038  * Allocate workspace for result as C string
4039  */
4040  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4041  *result = '\0';
4042 
4043  if (fmt_len > DCH_CACHE_SIZE)
4044  {
4045  /*
4046  * Allocate new memory if format picture is bigger than static cache
4047  * and do not use cache (call parser always)
4048  */
4049  incache = false;
4050 
4051  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4052 
4053  parse_format(format, fmt_str, DCH_keywords,
4054  DCH_suff, DCH_index, DCH_FLAG, NULL);
4055  }
4056  else
4057  {
4058  /*
4059  * Use cache buffers
4060  */
4061  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4062 
4063  incache = true;
4064  format = ent->format;
4065  }
4066 
4067  /* The real work is here */
4068  DCH_to_char(format, is_interval, tmtc, result, collid);
4069 
4070  if (!incache)
4071  pfree(format);
4072 
4073  pfree(fmt_str);
4074 
4075  /* convert C-string result to TEXT format */
4076  res = cstring_to_text(result);
4077 
4078  pfree(result);
4079  return res;
4080 }
4081 
4082 /****************************************************************************
4083  * Public routines
4084  ***************************************************************************/
4085 
4086 /* -------------------
4087  * TIMESTAMP to_char()
4088  * -------------------
4089  */
4090 Datum
4092 {
4094  text *fmt = PG_GETARG_TEXT_PP(1),
4095  *res;
4096  TmToChar tmtc;
4097  struct pg_tm *tm;
4098  int thisdate;
4099 
4100  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4101  PG_RETURN_NULL();
4102 
4103  ZERO_tmtc(&tmtc);
4104  tm = tmtcTm(&tmtc);
4105 
4106  if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4107  ereport(ERROR,
4108  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4109  errmsg("timestamp out of range")));
4110 
4111  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4112  tm->tm_wday = (thisdate + 1) % 7;
4113  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4114 
4115  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4116  PG_RETURN_NULL();
4117 
4118  PG_RETURN_TEXT_P(res);
4119 }
4120 
4121 Datum
4123 {
4125  text *fmt = PG_GETARG_TEXT_PP(1),
4126  *res;
4127  TmToChar tmtc;
4128  int tz;
4129  struct pg_tm *tm;
4130  int thisdate;
4131 
4132  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4133  PG_RETURN_NULL();
4134 
4135  ZERO_tmtc(&tmtc);
4136  tm = tmtcTm(&tmtc);
4137 
4138  if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4139  ereport(ERROR,
4140  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4141  errmsg("timestamp out of range")));
4142 
4143  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4144  tm->tm_wday = (thisdate + 1) % 7;
4145  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4146 
4147  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4148  PG_RETURN_NULL();
4149 
4150  PG_RETURN_TEXT_P(res);
4151 }
4152 
4153 
4154 /* -------------------
4155  * INTERVAL to_char()
4156  * -------------------
4157  */
4158 Datum
4160 {
4161  Interval *it = PG_GETARG_INTERVAL_P(0);
4162  text *fmt = PG_GETARG_TEXT_PP(1),
4163  *res;
4164  TmToChar tmtc;
4165  struct pg_tm *tm;
4166 
4167  if (VARSIZE_ANY_EXHDR(fmt) <= 0)
4168  PG_RETURN_NULL();
4169 
4170  ZERO_tmtc(&tmtc);
4171  tm = tmtcTm(&tmtc);
4172 
4173  if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
4174  PG_RETURN_NULL();
4175 
4176  /* wday is meaningless, yday approximates the total span in days */
4177  tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4178 
4179  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4180  PG_RETURN_NULL();
4181 
4182  PG_RETURN_TEXT_P(res);
4183 }
4184 
4185 /* ---------------------
4186  * TO_TIMESTAMP()
4187  *
4188  * Make Timestamp from date_str which is formatted at argument 'fmt'
4189  * ( to_timestamp is reverse to_char() )
4190  * ---------------------
4191  */
4192 Datum
4194 {
4195  text *date_txt = PG_GETARG_TEXT_PP(0);
4196  text *fmt = PG_GETARG_TEXT_PP(1);
4197  Oid collid = PG_GET_COLLATION();
4198  Timestamp result;
4199  int tz;
4200  struct pg_tm tm;
4201  fsec_t fsec;
4202  int fprec;
4203 
4204  do_to_timestamp(date_txt, fmt, collid, false,
4205  &tm, &fsec, &fprec, NULL, NULL);
4206 
4207  /* Use the specified time zone, if any. */
4208  if (tm.tm_zone)
4209  {
4210  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
4211 
4212  if (dterr)
4213  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4214  }
4215  else
4217 
4218  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4219  ereport(ERROR,
4220  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4221  errmsg("timestamp out of range")));
4222 
4223  /* Use the specified fractional precision, if any. */
4224  if (fprec)
4225  AdjustTimestampForTypmod(&result, fprec);
4226 
4227  PG_RETURN_TIMESTAMP(result);
4228 }
4229 
4230 /* ----------
4231  * TO_DATE
4232  * Make Date from date_str which is formatted at argument 'fmt'
4233  * ----------
4234  */
4235 Datum
4237 {
4238  text *date_txt = PG_GETARG_TEXT_PP(0);
4239  text *fmt = PG_GETARG_TEXT_PP(1);
4240  Oid collid = PG_GET_COLLATION();
4241  DateADT result;
4242  struct pg_tm tm;
4243  fsec_t fsec;
4244 
4245  do_to_timestamp(date_txt, fmt, collid, false,
4246  &tm, &fsec, NULL, NULL, NULL);
4247 
4248  /* Prevent overflow in Julian-day routines */
4249  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4250  ereport(ERROR,
4251  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4252  errmsg("date out of range: \"%s\"",
4253  text_to_cstring(date_txt))));
4254 
4255  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4256 
4257  /* Now check for just-out-of-range dates */
4258  if (!IS_VALID_DATE(result))
4259  ereport(ERROR,
4260  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4261  errmsg("date out of range: \"%s\"",
4262  text_to_cstring(date_txt))));
4263 
4264  PG_RETURN_DATEADT(result);
4265 }
4266 
4267 /*
4268  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4269  * as a format string. The collation 'collid' may be used for case-folding
4270  * rules in some cases. 'strict' specifies standard parsing mode.
4271  *
4272  * The actual data type (returned in 'typid', 'typmod') is determined by
4273  * the presence of date/time/zone components in the format string.
4274  *
4275  * When timezone component is present, the corresponding offset is
4276  * returned in '*tz'.
4277  *
4278  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
4279  * and zero value is returned.
4280  */
4281 Datum
4282 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4283  Oid *typid, int32 *typmod, int *tz,
4284  bool *have_error)
4285 {
4286  struct pg_tm tm;
4287  fsec_t fsec;
4288  int fprec;
4289  uint32 flags;
4290 
4291  do_to_timestamp(date_txt, fmt, collid, strict,
4292  &tm, &fsec, &fprec, &flags, have_error);
4293  CHECK_ERROR;
4294 
4295  *typmod = fprec ? fprec : -1; /* fractional part precision */
4296 
4297  if (flags & DCH_DATED)
4298  {
4299  if (flags & DCH_TIMED)
4300  {
4301  if (flags & DCH_ZONED)
4302  {
4303  TimestampTz result;
4304 
4305  if (tm.tm_zone)
4306  {
4307  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4308 
4309  if (dterr)
4310  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4311  }
4312  else
4313  {
4314  /*
4315  * Time zone is present in format string, but not in input
4316  * string. Assuming do_to_timestamp() triggers no error
4317  * this should be possible only in non-strict case.
4318  */
4319  Assert(!strict);
4320 
4322  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4323  errmsg("missing time zone in input string for type timestamptz"))));
4324  }
4325 
4326  if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4328  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4329  errmsg("timestamptz out of range"))));
4330 
4331  AdjustTimestampForTypmod(&result, *typmod);
4332 
4333  *typid = TIMESTAMPTZOID;
4334  return TimestampTzGetDatum(result);
4335  }
4336  else
4337  {
4338  Timestamp result;
4339 
4340  if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4342  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4343  errmsg("timestamp out of range"))));
4344 
4345  AdjustTimestampForTypmod(&result, *typmod);
4346 
4347  *typid = TIMESTAMPOID;
4348  return TimestampGetDatum(result);
4349  }
4350  }
4351  else
4352  {
4353  if (flags & DCH_ZONED)
4354  {
4356  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4357  errmsg("datetime format is zoned but not timed"))));
4358  }
4359  else
4360  {
4361  DateADT result;
4362 
4363  /* Prevent overflow in Julian-day routines */
4364  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4366  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4367  errmsg("date out of range: \"%s\"",
4368  text_to_cstring(date_txt)))));
4369 
4370  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4372 
4373  /* Now check for just-out-of-range dates */
4374  if (!IS_VALID_DATE(result))
4376  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4377  errmsg("date out of range: \"%s\"",
4378  text_to_cstring(date_txt)))));
4379 
4380  *typid = DATEOID;
4381  return DateADTGetDatum(result);
4382  }
4383  }
4384  }
4385  else if (flags & DCH_TIMED)
4386  {
4387  if (flags & DCH_ZONED)
4388  {
4389  TimeTzADT *result = palloc(sizeof(TimeTzADT));
4390 
4391  if (tm.tm_zone)
4392  {
4393  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4394 
4395  if (dterr)
4396  RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
4397  }
4398  else
4399  {
4400  /*
4401  * Time zone is present in format string, but not in input
4402  * string. Assuming do_to_timestamp() triggers no error this
4403  * should be possible only in non-strict case.
4404  */
4405  Assert(!strict);
4406 
4408  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4409  errmsg("missing time zone in input string for type timetz"))));
4410  }
4411 
4412  if (tm2timetz(&tm, fsec, *tz, result) != 0)
4414  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4415  errmsg("timetz out of range"))));
4416 
4417  AdjustTimeForTypmod(&result->time, *typmod);
4418 
4419  *typid = TIMETZOID;
4420  return TimeTzADTPGetDatum(result);
4421  }
4422  else
4423  {
4424  TimeADT result;
4425 
4426  if (tm2time(&tm, fsec, &result) != 0)
4428  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4429  errmsg("time out of range"))));
4430 
4431  AdjustTimeForTypmod(&result, *typmod);
4432 
4433  *typid = TIMEOID;
4434  return TimeADTGetDatum(result);
4435  }
4436  }
4437  else
4438  {
4440  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4441  errmsg("datetime format is not dated and not timed"))));
4442  }
4443 
4444 on_error:
4445  return (Datum) 0;
4446 }
4447 
4448 /*
4449  * do_to_timestamp: shared code for to_timestamp and to_date
4450  *
4451  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4452  * fractional seconds, and fractional precision.
4453  *
4454  * 'collid' identifies the collation to use, if needed.
4455  * 'std' specifies standard parsing mode.
4456  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4457  * if that is not NULL.
4458  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
4459  *
4460  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4461  * DCH_from_char to populate a TmFromChar with the parsed contents of
4462  * 'date_txt'.
4463  *
4464  * The TmFromChar is then analysed and converted into the final results in
4465  * struct 'tm', 'fsec', and 'fprec'.
4466  */
4467 static void
4468 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4469  struct pg_tm *tm, fsec_t *fsec, int *fprec,
4470  uint32 *flags, bool *have_error)
4471 {
4472  FormatNode *format = NULL;
4473  TmFromChar tmfc;
4474  int fmt_len;
4475  char *date_str;
4476  int fmask;
4477  bool incache = false;
4478 
4479  Assert(tm != NULL);
4480  Assert(fsec != NULL);
4481 
4482  date_str = text_to_cstring(date_txt);
4483 
4484  ZERO_tmfc(&tmfc);
4485  ZERO_tm(tm);
4486  *fsec = 0;
4487  if (fprec)
4488  *fprec = 0;
4489  if (flags)
4490  *flags = 0;
4491  fmask = 0; /* bit mask for ValidateDate() */
4492 
4493  fmt_len = VARSIZE_ANY_EXHDR(fmt);
4494 
4495  if (fmt_len)
4496  {
4497  char *fmt_str;
4498 
4499  fmt_str = text_to_cstring(fmt);
4500 
4501  if (fmt_len > DCH_CACHE_SIZE)
4502  {
4503  /*
4504  * Allocate new memory if format picture is bigger than static
4505  * cache and do not use cache (call parser always)
4506  */
4507  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4508 
4509  parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4510  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4511  }
4512  else
4513  {
4514  /*
4515  * Use cache buffers
4516  */
4517  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4518 
4519  incache = true;
4520  format = ent->format;
4521  }
4522 
4523 #ifdef DEBUG_TO_FROM_CHAR
4524  /* dump_node(format, fmt_len); */
4525  /* dump_index(DCH_keywords, DCH_index); */
4526 #endif
4527 
4528  DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
4529  CHECK_ERROR;
4530 
4531  pfree(fmt_str);
4532 
4533  if (flags)
4534  *flags = DCH_datetime_type(format, have_error);
4535 
4536  if (!incache)
4537  {
4538  pfree(format);
4539  format = NULL;
4540  }
4541 
4542  CHECK_ERROR;
4543  }
4544 
4545  DEBUG_TMFC(&tmfc);
4546 
4547  /*
4548  * Convert to_date/to_timestamp input fields to standard 'tm'
4549  */
4550  if (tmfc.ssss)
4551  {
4552  int x = tmfc.ssss;
4553 
4554  tm->tm_hour = x / SECS_PER_HOUR;
4555  x %= SECS_PER_HOUR;
4556  tm->tm_min = x / SECS_PER_MINUTE;
4557  x %= SECS_PER_MINUTE;
4558  tm->tm_sec = x;
4559  }
4560 
4561  if (tmfc.ss)
4562  tm->tm_sec = tmfc.ss;
4563  if (tmfc.mi)
4564  tm->tm_min = tmfc.mi;
4565  if (tmfc.hh)
4566  tm->tm_hour = tmfc.hh;
4567 
4568  if (tmfc.clock == CLOCK_12_HOUR)
4569  {
4570  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4571  {
4573  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4574  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4575  tm->tm_hour),
4576  errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
4577  }
4578 
4579  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4580  tm->tm_hour += HOURS_PER_DAY / 2;
4581  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4582  tm->tm_hour = 0;
4583  }
4584 
4585  if (tmfc.year)
4586  {
4587  /*
4588  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4589  * the year in the given century. Keep in mind that the 21st century
4590  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4591  * 600BC to 501BC.
4592  */
4593  if (tmfc.cc && tmfc.yysz <= 2)
4594  {
4595  if (tmfc.bc)
4596  tmfc.cc = -tmfc.cc;
4597  tm->tm_year = tmfc.year % 100;
4598  if (tm->tm_year)
4599  {
4600  if (tmfc.cc >= 0)
4601  tm->tm_year += (tmfc.cc - 1) * 100;
4602  else
4603  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4604  }
4605  else
4606  {
4607  /* find century year for dates ending in "00" */
4608  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4609  }
4610  }
4611  else
4612  {
4613  /* If a 4-digit year is provided, we use that and ignore CC. */
4614  tm->tm_year = tmfc.year;
4615  if (tmfc.bc)
4616  tm->tm_year = -tm->tm_year;
4617  /* correct for our representation of BC years */
4618  if (tm->tm_year < 0)
4619  tm->tm_year++;
4620  }
4621  fmask |= DTK_M(YEAR);
4622  }
4623  else if (tmfc.cc)
4624  {
4625  /* use first year of century */
4626  if (tmfc.bc)
4627  tmfc.cc = -tmfc.cc;
4628  if (tmfc.cc >= 0)
4629  /* +1 because 21st century started in 2001 */
4630  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4631  else
4632  /* +1 because year == 599 is 600 BC */
4633  tm->tm_year = tmfc.cc * 100 + 1;
4634  fmask |= DTK_M(YEAR);
4635  }
4636 
4637  if (tmfc.j)
4638  {
4639  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4640  fmask |= DTK_DATE_M;
4641  }
4642 
4643  if (tmfc.ww)
4644  {
4645  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4646  {
4647  /*
4648  * If tmfc.d is not set, then the date is left at the beginning of
4649  * the ISO week (Monday).
4650  */
4651  if (tmfc.d)
4652  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4653  else
4654  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4655  fmask |= DTK_DATE_M;
4656  }
4657  else
4658  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4659  }
4660 
4661  if (tmfc.w)
4662  tmfc.dd = (tmfc.w - 1) * 7 + 1;
4663  if (tmfc.dd)
4664  {
4665  tm->tm_mday = tmfc.dd;
4666  fmask |= DTK_M(DAY);
4667  }
4668  if (tmfc.mm)
4669  {
4670  tm->tm_mon = tmfc.mm;
4671  fmask |= DTK_M(MONTH);
4672  }
4673 
4674  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4675  {
4676  /*
4677  * The month and day field have not been set, so we use the
4678  * day-of-year field to populate them. Depending on the date mode,
4679  * this field may be interpreted as a Gregorian day-of-year, or an ISO
4680  * week date day-of-year.
4681  */
4682 
4683  if (!tm->tm_year && !tmfc.bc)
4684  {
4686  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4687  errmsg("cannot calculate day of year without year information"))));
4688  }
4689 
4690  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4691  {
4692  int j0; /* zeroth day of the ISO year, in Julian */
4693 
4694  j0 = isoweek2j(tm->tm_year, 1) - 1;
4695 
4696  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4697  fmask |= DTK_DATE_M;
4698  }
4699  else
4700  {
4701  const int *y;
4702  int i;
4703 
4704  static const int ysum[2][13] = {
4705  {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4706  {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4707 
4708  y = ysum[isleap(tm->tm_year)];
4709 
4710  for (i = 1; i <= MONTHS_PER_YEAR; i++)
4711  {
4712  if (tmfc.ddd <= y[i])
4713  break;
4714  }
4715  if (tm->tm_mon <= 1)
4716  tm->tm_mon = i;
4717 
4718  if (tm->tm_mday <= 1)
4719  tm->tm_mday = tmfc.ddd - y[i - 1];
4720 
4721  fmask |= DTK_M(MONTH) | DTK_M(DAY);
4722  }
4723  }
4724 
4725  if (tmfc.ms)
4726  *fsec += tmfc.ms * 1000;
4727  if (tmfc.us)
4728  *fsec += tmfc.us;
4729  if (fprec)
4730  *fprec = tmfc.ff; /* fractional precision, if specified */
4731 
4732  /* Range-check date fields according to bit mask computed above */
4733  if (fmask != 0)
4734  {
4735  /* We already dealt with AD/BC, so pass isjulian = true */
4736  int dterr = ValidateDate(fmask, true, false, false, tm);
4737 
4738  if (dterr != 0)
4739  {
4740  /*
4741  * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4742  * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4743  * irrelevant hint about datestyle.
4744  */
4745  RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4746  }
4747  }
4748 
4749  /* Range-check time fields too */
4750  if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4751  tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4752  tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4753  *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4754  {
4755  RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4756  }
4757 
4758  /* Save parsed time-zone into tm->tm_zone if it was specified */
4759  if (tmfc.tzsign)
4760  {
4761  char *tz;
4762 
4763  if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4764  tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4765  {
4766  RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
4767  }
4768 
4769  tz = psprintf("%c%02d:%02d",
4770  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4771 
4772  tm->tm_zone = tz;
4773  }
4774 
4775  DEBUG_TM(tm);
4776 
4777 on_error:
4778 
4779  if (format && !incache)
4780  pfree(format);
4781 
4782  pfree(date_str);
4783 }
4784 
4785 
4786 /**********************************************************************
4787  * the NUMBER version part
4788  *********************************************************************/
4789 
4790 
4791 static char *
4792 fill_str(char *str, int c, int max)
4793 {
4794  memset(str, c, max);
4795  *(str + max) = '\0';
4796  return str;
4797 }
4798 
4799 #define zeroize_NUM(_n) \
4800 do { \
4801  (_n)->flag = 0; \
4802  (_n)->lsign = 0; \
4803  (_n)->pre = 0; \
4804  (_n)->post = 0; \
4805  (_n)->pre_lsign_num = 0; \
4806  (_n)->need_locale = 0; \
4807  (_n)->multi = 0; \
4808  (_n)->zero_start = 0; \
4809  (_n)->zero_end = 0; \
4810 } while(0)
4811 
4812 /* This works the same as DCH_prevent_counter_overflow */
4813 static inline void
4815 {
4816  if (NUMCounter >= (INT_MAX - 1))
4817  {
4818  for (int i = 0; i < n_NUMCache; i++)
4819  NUMCache[i]->age >>= 1;
4820  NUMCounter >>= 1;
4821  }
4822 }
4823 
4824 /* select a NUMCacheEntry to hold the given format picture */
4825 static NUMCacheEntry *
4826 NUM_cache_getnew(const char *str)
4827 {
4828  NUMCacheEntry *ent;
4829 
4830  /* Ensure we can advance NUMCounter below */
4832 
4833  /*
4834  * If cache is full, remove oldest entry (or recycle first not-valid one)
4835  */
4837  {
4838  NUMCacheEntry *old = NUMCache[0];
4839 
4840 #ifdef DEBUG_TO_FROM_CHAR
4841  elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4842 #endif
4843  if (old->valid)
4844  {
4845  for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4846  {
4847  ent = NUMCache[i];
4848  if (!ent->valid)
4849  {
4850  old = ent;
4851  break;
4852  }
4853  if (ent->age < old->age)
4854  old = ent;
4855  }
4856  }
4857 #ifdef DEBUG_TO_FROM_CHAR
4858  elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4859 #endif
4860  old->valid = false;
4861  strlcpy(old->str, str, NUM_CACHE_SIZE + 1);
4862  old->age = (++NUMCounter);
4863  /* caller is expected to fill format and Num, then set valid */
4864  return old;
4865  }
4866  else
4867  {
4868 #ifdef DEBUG_TO_FROM_CHAR
4869  elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4870 #endif
4871  Assert(NUMCache[n_NUMCache] == NULL);
4872  NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4874  ent->valid = false;
4875  strlcpy(ent->str, str, NUM_CACHE_SIZE + 1);
4876  ent->age = (++NUMCounter);
4877  /* caller is expected to fill format and Num, then set valid */
4878  ++n_NUMCache;
4879  return ent;
4880  }
4881 }
4882 
4883 /* look for an existing NUMCacheEntry matching the given format picture */
4884 static NUMCacheEntry *
4885 NUM_cache_search(const char *str)
4886 {
4887  /* Ensure we can advance NUMCounter below */
4889 
4890  for (int i = 0; i < n_NUMCache; i++)
4891  {
4892  NUMCacheEntry *ent = NUMCache[i];
4893 
4894  if (ent->valid && strcmp(ent->str, str) == 0)
4895  {
4896  ent->age = (++NUMCounter);
4897  return ent;
4898  }
4899  }
4900 
4901  return NULL;
4902 }
4903 
4904 /* Find or create a NUMCacheEntry for the given format picture */
4905 static NUMCacheEntry *
4906 NUM_cache_fetch(const char *str)
4907 {
4908  NUMCacheEntry *ent;
4909 
4910  if ((ent = NUM_cache_search(str)) == NULL)
4911  {
4912  /*
4913  * Not in the cache, must run parser and save a new format-picture to
4914  * the cache. Do not mark the cache entry valid until parsing
4915  * succeeds.
4916  */
4917  ent = NUM_cache_getnew(str);
4918 
4919  zeroize_NUM(&ent->Num);
4920 
4921  parse_format(ent->format, str, NUM_keywords,
4922  NULL, NUM_index, NUM_FLAG, &ent->Num);
4923 
4924  ent->valid = true;
4925  }
4926  return ent;
4927 }
4928 
4929 /* ----------
4930  * Cache routine for NUM to_char version
4931  * ----------
4932  */
4933 static FormatNode *
4934 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4935 {
4936  FormatNode *format = NULL;
4937  char *str;
4938 
4939  str = text_to_cstring(pars_str);
4940 
4941  if (len > NUM_CACHE_SIZE)
4942  {
4943  /*
4944  * Allocate new memory if format picture is bigger than static cache
4945  * and do not use cache (call parser always)
4946  */
4947  format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4948 
4949  *shouldFree = true;
4950 
4951  zeroize_NUM(Num);
4952 
4953  parse_format(format, str, NUM_keywords,
4954  NULL, NUM_index, NUM_FLAG, Num);
4955  }
4956  else
4957  {
4958  /*
4959  * Use cache buffers
4960  */
4961  NUMCacheEntry *ent = NUM_cache_fetch(str);
4962 
4963  *shouldFree = false;
4964 
4965  format = ent->format;
4966 
4967  /*
4968  * Copy cache to used struct
4969  */
4970  Num->flag = ent->Num.flag;
4971  Num->lsign = ent->Num.lsign;
4972  Num->pre = ent->Num.pre;
4973  Num->post = ent->Num.post;
4974  Num->pre_lsign_num = ent->Num.pre_lsign_num;
4975  Num->need_locale = ent->Num.need_locale;
4976  Num->multi = ent->Num.multi;
4977  Num->zero_start = ent->Num.zero_start;
4978  Num->zero_end = ent->Num.zero_end;
4979  }
4980 
4981 #ifdef DEBUG_TO_FROM_CHAR
4982  /* dump_node(format, len); */
4983  dump_index(NUM_keywords, NUM_index);
4984 #endif
4985 
4986  pfree(str);
4987  return format;
4988 }
4989 
4990 
4991 static char *
4992 int_to_roman(int number)
4993 {
4994  int len,
4995  num;
4996  char *p,
4997  *result,
4998  numstr[12];
4999 
5000  result = (char *) palloc(16);
5001  *result = '\0';
5002 
5003  if (number > 3999 || number < 1)
5004  {
5005  fill_str(result, '#', 15);
5006  return result;
5007  }
5008  len = snprintf(numstr, sizeof(numstr), "%d", number);
5009 
5010  for (p = numstr; *p != '\0'; p++, --len)
5011  {
5012  num = *p - ('0' + 1);
5013  if (num < 0)
5014  continue;
5015 
5016  if (len > 3)
5017  {
5018  while (num-- != -1)
5019  strcat(result, "M");
5020  }
5021  else
5022  {
5023  if (len == 3)
5024  strcat(result, rm100[num]);
5025  else if (len == 2)
5026  strcat(result, rm10[num]);
5027  else if (len == 1)
5028  strcat(result, rm1[num]);
5029  }
5030  }
5031  return result;
5032 }
5033 
5034 
5035 
5036 /* ----------
5037  * Locale
5038  * ----------
5039  */
5040 static void
5042 {
5043  if (Np->Num->need_locale)
5044  {
5045  struct lconv *lconv;
5046 
5047  /*
5048  * Get locales
5049  */
5050  lconv = PGLC_localeconv();
5051 
5052  /*
5053  * Positive / Negative number sign
5054  */
5055  if (lconv->negative_sign && *lconv->negative_sign)
5056  Np->L_negative_sign = lconv->negative_sign;
5057  else
5058  Np->L_negative_sign = "-";
5059 
5060  if (lconv->positive_sign && *lconv->positive_sign)
5061  Np->L_positive_sign = lconv->positive_sign;
5062  else
5063  Np->L_positive_sign = "+";
5064 
5065  /*
5066  * Number decimal point
5067  */
5068  if (lconv->decimal_point && *lconv->decimal_point)
5069  Np->decimal = lconv->decimal_point;
5070 
5071  else
5072  Np->decimal = ".";
5073 
5074  if (!IS_LDECIMAL(Np->Num))
5075  Np->decimal = ".";
5076 
5077  /*
5078  * Number thousands separator
5079  *
5080  * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5081  * but "" for thousands_sep, so we set the thousands_sep too.
5082  * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5083  */
5084  if (lconv->thousands_sep && *lconv->thousands_sep)
5085  Np->L_thousands_sep = lconv->thousands_sep;
5086  /* Make sure thousands separator doesn't match decimal point symbol. */
5087  else if (strcmp(Np->decimal, ",") != 0)
5088  Np->L_thousands_sep = ",";
5089  else
5090  Np->L_thousands_sep = ".";
5091 
5092  /*
5093  * Currency symbol
5094  */
5095  if (lconv->currency_symbol && *lconv->currency_symbol)
5096  Np->L_currency_symbol = lconv->currency_symbol;
5097  else
5098  Np->L_currency_symbol = " ";
5099  }
5100  else
5101  {
5102  /*
5103  * Default values
5104  */
5105  Np->L_negative_sign = "-";
5106  Np->L_positive_sign = "+";
5107  Np->decimal = ".";
5108 
5109  Np->L_thousands_sep = ",";
5110  Np->L_currency_symbol = " ";
5111  }
5112 }
5113 
5114 /* ----------
5115  * Return pointer of last relevant number after decimal point
5116  * 12.0500 --> last relevant is '5'
5117  * 12.0000 --> last relevant is '.'
5118  * If there is no decimal point, return NULL (which will result in same
5119  * behavior as if FM hadn't been specified).
5120  * ----------
5121  */
5122 static char *
5124 {
5125  char *result,
5126  *p = strchr(num, '.');
5127 
5128 #ifdef DEBUG_TO_FROM_CHAR
5129  elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5130 #endif
5131 
5132  if (!p)
5133  return NULL;
5134 
5135  result = p;
5136 
5137  while (*(++p))
5138  {
5139  if (*p != '0')
5140  result = p;
5141  }
5142 
5143  return result;
5144 }
5145 
5146 /*
5147  * These macros are used in NUM_processor() and its subsidiary routines.
5148  * OVERLOAD_TEST: true if we've reached end of input string
5149  * AMOUNT_TEST(s): true if at least s bytes remain in string
5150  */
5151 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
5152 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
5153 
5154 /* ----------
5155  * Number extraction for TO_NUMBER()
5156  * ----------
5157  */
5158 static void
5159 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5160 {
5161  bool isread = false;
5162 
5163 #ifdef DEBUG_TO_FROM_CHAR
5164  elog(DEBUG_elog_output, " --- scan start --- id=%s",
5165  (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5166 #endif
5167 
5168  if (OVERLOAD_TEST)
5169  return;
5170 
5171  if (*Np->inout_p == ' ')
5172  Np->inout_p++;
5173 
5174  if (OVERLOAD_TEST)
5175  return;
5176 
5177  /*
5178  * read sign before number
5179  */
5180  if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5181  (Np->read_pre + Np->read_post) == 0)
5182  {
5183 #ifdef DEBUG_TO_FROM_CHAR
5184  elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5185  *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5186 #endif
5187 
5188  /*
5189  * locale sign
5190  */
5191  if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5192  {
5193  int x = 0;
5194 
5195 #ifdef DEBUG_TO_FROM_CHAR
5196  elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5197 #endif
5198  if ((x = strlen(Np->L_negative_sign)) &&
5199  AMOUNT_TEST(x) &&
5200  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5201  {
5202  Np->inout_p += x;
5203  *Np->number = '-';
5204  }
5205  else if ((x = strlen(Np->L_positive_sign)) &&
5206  AMOUNT_TEST(x) &&
5207  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5208  {
5209  Np->inout_p += x;
5210  *Np->number = '+';
5211  }
5212  }
5213  else
5214  {
5215 #ifdef DEBUG_TO_FROM_CHAR
5216  elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5217 #endif
5218 
5219  /*
5220  * simple + - < >
5221  */
5222  if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5223  *Np->inout_p == '<'))
5224  {
5225  *Np->number = '-'; /* set - */
5226  Np->inout_p++;
5227  }
5228  else if (*Np->inout_p == '+')
5229  {
5230  *Np->number = '+'; /* set + */
5231  Np->inout_p++;
5232  }
5233  }
5234  }
5235 
5236  if (OVERLOAD_TEST)
5237  return;
5238 
5239 #ifdef DEBUG_TO_FROM_CHAR
5240  elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5241 #endif
5242 
5243  /*
5244  * read digit or decimal point
5245  */
5246  if (isdigit((unsigned char) *Np->inout_p))
5247  {
5248  if (Np->read_dec && Np->read_post == Np->Num->post)
5249  return;
5250 
5251  *Np->number_p = *Np->inout_p;
5252  Np->number_p++;
5253 
5254  if (Np->read_dec)
5255  Np->read_post++;
5256  else
5257  Np->read_pre++;
5258 
5259  isread = true;
5260 
5261 #ifdef DEBUG_TO_FROM_CHAR
5262  elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5263 #endif
5264  }
5265  else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5266  {
5267  /*
5268  * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5269  * Np->decimal is always just "." if we don't have a D format token.
5270  * So we just unconditionally match to Np->decimal.
5271  */
5272  int x = strlen(Np->decimal);
5273 
5274 #ifdef DEBUG_TO_FROM_CHAR
5275  elog(DEBUG_elog_output, "Try read decimal point (%c)",
5276  *Np->inout_p);
5277 #endif
5278  if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5279  {
5280  Np->inout_p += x - 1;
5281  *Np->number_p = '.';
5282  Np->number_p++;
5283  Np->read_dec = true;
5284  isread = true;
5285  }
5286  }
5287 
5288  if (OVERLOAD_TEST)