PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for roman number to standard number conversion
53  * - add support for number spelling
54  * - add support for string to string formatting (we must be better
55  * than Oracle :-),
56  * to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 #include <wctype.h>
73 
74 #ifdef USE_ICU
75 #include <unicode/ustring.h>
76 #endif
77 
78 #include "catalog/pg_collation.h"
79 #include "catalog/pg_type.h"
80 #include "common/unicode_case.h"
82 #include "mb/pg_wchar.h"
83 #include "nodes/miscnodes.h"
84 #include "parser/scansup.h"
85 #include "utils/builtins.h"
86 #include "utils/date.h"
87 #include "utils/datetime.h"
88 #include "utils/formatting.h"
89 #include "utils/memutils.h"
90 #include "utils/numeric.h"
91 #include "utils/pg_locale.h"
92 #include "varatt.h"
93 
94 
95 /* ----------
96  * Routines flags
97  * ----------
98  */
99 #define DCH_FLAG 0x1 /* DATE-TIME flag */
100 #define NUM_FLAG 0x2 /* NUMBER flag */
101 #define STD_FLAG 0x4 /* STANDARD flag */
102 
103 /* ----------
104  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
105  * ----------
106  */
107 #define KeyWord_INDEX_SIZE ('~' - ' ')
108 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
109 
110 /* ----------
111  * Maximal length of one node
112  * ----------
113  */
114 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
115 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
116 
117 
118 /* ----------
119  * Format parser structs
120  * ----------
121  */
122 typedef struct
123 {
124  const char *name; /* suffix string */
125  int len, /* suffix length */
126  id, /* used in node->suffix */
127  type; /* prefix / postfix */
128 } KeySuffix;
129 
130 /* ----------
131  * FromCharDateMode
132  * ----------
133  *
134  * This value is used to nominate one of several distinct (and mutually
135  * exclusive) date conventions that a keyword can belong to.
136  */
137 typedef enum
138 {
139  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
140  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
141  FROM_CHAR_DATE_ISOWEEK, /* ISO 8601 week date */
143 
144 typedef struct
145 {
146  const char *name;
147  int len;
148  int id;
149  bool is_digit;
151 } KeyWord;
152 
153 typedef struct
154 {
155  uint8 type; /* NODE_TYPE_XXX, see below */
156  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
157  uint8 suffix; /* keyword prefix/suffix code, if any */
158  const KeyWord *key; /* if type is ACTION */
159 } FormatNode;
160 
161 #define NODE_TYPE_END 1
162 #define NODE_TYPE_ACTION 2
163 #define NODE_TYPE_CHAR 3
164 #define NODE_TYPE_SEPARATOR 4
165 #define NODE_TYPE_SPACE 5
166 
167 #define SUFFTYPE_PREFIX 1
168 #define SUFFTYPE_POSTFIX 2
169 
170 #define CLOCK_24_HOUR 0
171 #define CLOCK_12_HOUR 1
172 
173 
174 /* ----------
175  * Full months
176  * ----------
177  */
178 static const char *const months_full[] = {
179  "January", "February", "March", "April", "May", "June", "July",
180  "August", "September", "October", "November", "December", NULL
181 };
182 
183 static const char *const days_short[] = {
184  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
185 };
186 
187 /* ----------
188  * AD / BC
189  * ----------
190  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
191  * positive and map year == -1 to year zero, and shift all negative
192  * years up one. For interval years, we just return the year.
193  */
194 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
195 
196 #define A_D_STR "A.D."
197 #define a_d_STR "a.d."
198 #define AD_STR "AD"
199 #define ad_STR "ad"
200 
201 #define B_C_STR "B.C."
202 #define b_c_STR "b.c."
203 #define BC_STR "BC"
204 #define bc_STR "bc"
205 
206 /*
207  * AD / BC strings for seq_search.
208  *
209  * These are given in two variants, a long form with periods and a standard
210  * form without.
211  *
212  * The array is laid out such that matches for AD have an even index, and
213  * matches for BC have an odd index. So the boolean value for BC is given by
214  * taking the array index of the match, modulo 2.
215  */
216 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
217 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
218 
219 /* ----------
220  * AM / PM
221  * ----------
222  */
223 #define A_M_STR "A.M."
224 #define a_m_STR "a.m."
225 #define AM_STR "AM"
226 #define am_STR "am"
227 
228 #define P_M_STR "P.M."
229 #define p_m_STR "p.m."
230 #define PM_STR "PM"
231 #define pm_STR "pm"
232 
233 /*
234  * AM / PM strings for seq_search.
235  *
236  * These are given in two variants, a long form with periods and a standard
237  * form without.
238  *
239  * The array is laid out such that matches for AM have an even index, and
240  * matches for PM have an odd index. So the boolean value for PM is given by
241  * taking the array index of the match, modulo 2.
242  */
243 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
244 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
245 
246 /* ----------
247  * Months in roman-numeral
248  * (Must be in reverse order for seq_search (in FROM_CHAR), because
249  * 'VIII' must have higher precedence than 'V')
250  * ----------
251  */
252 static const char *const rm_months_upper[] =
253 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
254 
255 static const char *const rm_months_lower[] =
256 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
257 
258 /* ----------
259  * Roman numbers
260  * ----------
261  */
262 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
263 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
264 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
265 
266 /* ----------
267  * Ordinal postfixes
268  * ----------
269  */
270 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
271 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
272 
273 /* ----------
274  * Flags & Options:
275  * ----------
276  */
277 #define TH_UPPER 1
278 #define TH_LOWER 2
279 
280 /* ----------
281  * Number description struct
282  * ----------
283  */
284 typedef struct
285 {
286  int pre, /* (count) numbers before decimal */
287  post, /* (count) numbers after decimal */
288  lsign, /* want locales sign */
289  flag, /* number parameters */
290  pre_lsign_num, /* tmp value for lsign */
291  multi, /* multiplier for 'V' */
292  zero_start, /* position of first zero */
293  zero_end, /* position of last zero */
294  need_locale; /* needs it locale */
295 } NUMDesc;
296 
297 /* ----------
298  * Flags for NUMBER version
299  * ----------
300  */
301 #define NUM_F_DECIMAL (1 << 1)
302 #define NUM_F_LDECIMAL (1 << 2)
303 #define NUM_F_ZERO (1 << 3)
304 #define NUM_F_BLANK (1 << 4)
305 #define NUM_F_FILLMODE (1 << 5)
306 #define NUM_F_LSIGN (1 << 6)
307 #define NUM_F_BRACKET (1 << 7)
308 #define NUM_F_MINUS (1 << 8)
309 #define NUM_F_PLUS (1 << 9)
310 #define NUM_F_ROMAN (1 << 10)
311 #define NUM_F_MULTI (1 << 11)
312 #define NUM_F_PLUS_POST (1 << 12)
313 #define NUM_F_MINUS_POST (1 << 13)
314 #define NUM_F_EEEE (1 << 14)
315 
316 #define NUM_LSIGN_PRE (-1)
317 #define NUM_LSIGN_POST 1
318 #define NUM_LSIGN_NONE 0
319 
320 /* ----------
321  * Tests
322  * ----------
323  */
324 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
325 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
326 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
327 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
328 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
329 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
330 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
331 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
332 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
333 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
334 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
335 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
336 
337 /* ----------
338  * Format picture cache
339  *
340  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
341  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
342  *
343  * For simplicity, the cache entries are fixed-size, so they allow for the
344  * worst case of a FormatNode for each byte in the picture string.
345  *
346  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
347  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
348  * we don't waste too much space by palloc'ing them individually. Be sure
349  * to adjust those macros if you add fields to those structs.
350  *
351  * The max number of entries in each cache is DCH_CACHE_ENTRIES
352  * resp. NUM_CACHE_ENTRIES.
353  * ----------
354  */
355 #define DCH_CACHE_OVERHEAD \
356  MAXALIGN(sizeof(bool) + sizeof(int))
357 #define NUM_CACHE_OVERHEAD \
358  MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
359 
360 #define DCH_CACHE_SIZE \
361  ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
362 #define NUM_CACHE_SIZE \
363  ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
364 
365 #define DCH_CACHE_ENTRIES 20
366 #define NUM_CACHE_ENTRIES 20
367 
368 typedef struct
369 {
371  char str[DCH_CACHE_SIZE + 1];
372  bool std;
373  bool valid;
374  int age;
375 } DCHCacheEntry;
376 
377 typedef struct
378 {
380  char str[NUM_CACHE_SIZE + 1];
381  bool valid;
382  int age;
384 } NUMCacheEntry;
385 
386 /* global cache for date/time format pictures */
388 static int n_DCHCache = 0; /* current number of entries */
389 static int DCHCounter = 0; /* aging-event counter */
390 
391 /* global cache for number format pictures */
393 static int n_NUMCache = 0; /* current number of entries */
394 static int NUMCounter = 0; /* aging-event counter */
395 
396 /* ----------
397  * For char->date/time conversion
398  * ----------
399  */
400 typedef struct
401 {
403  int hh,
404  pm,
405  mi,
406  ss,
408  d, /* stored as 1-7, Sunday = 1, 0 means missing */
409  dd,
411  mm,
412  ms,
414  bc,
415  ww,
416  w,
417  cc,
418  j,
419  us,
420  yysz, /* is it YY or YYYY ? */
421  clock, /* 12 or 24 hour clock? */
422  tzsign, /* +1, -1, or 0 if no TZH/TZM fields */
425  ff; /* fractional precision */
426  bool has_tz; /* was there a TZ field? */
427  int gmtoffset; /* GMT offset of fixed-offset zone abbrev */
428  pg_tz *tzp; /* pg_tz for dynamic abbrev */
429  char *abbrev; /* dynamic abbrev */
430 } TmFromChar;
431 
432 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
433 
434 struct fmt_tz /* do_to_timestamp's timezone info output */
435 {
436  bool has_tz; /* was there any TZ/TZH/TZM field? */
437  int gmtoffset; /* GMT offset in seconds */
438 };
439 
440 /* ----------
441  * Debug
442  * ----------
443  */
444 #ifdef DEBUG_TO_FROM_CHAR
445 #define DEBUG_TMFC(_X) \
446  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
447  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
448  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
449  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
450  (_X)->yysz, (_X)->clock)
451 #define DEBUG_TM(_X) \
452  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
453  (_X)->tm_sec, (_X)->tm_year,\
454  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
455  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
456 #else
457 #define DEBUG_TMFC(_X)
458 #define DEBUG_TM(_X)
459 #endif
460 
461 /* ----------
462  * Datetime to char conversion
463  *
464  * To support intervals as well as timestamps, we use a custom "tm" struct
465  * that is almost like struct pg_tm, but has a 64-bit tm_hour field.
466  * We omit the tm_isdst and tm_zone fields, which are not used here.
467  * ----------
468  */
469 struct fmt_tm
470 {
471  int tm_sec;
472  int tm_min;
473  int64 tm_hour;
474  int tm_mday;
475  int tm_mon;
476  int tm_year;
477  int tm_wday;
478  int tm_yday;
479  long int tm_gmtoff;
480 };
481 
482 typedef struct TmToChar
483 {
484  struct fmt_tm tm; /* almost the classic 'tm' struct */
485  fsec_t fsec; /* fractional seconds */
486  const char *tzn; /* timezone */
488 
489 #define tmtcTm(_X) (&(_X)->tm)
490 #define tmtcTzn(_X) ((_X)->tzn)
491 #define tmtcFsec(_X) ((_X)->fsec)
492 
493 /* Note: this is used to copy pg_tm to fmt_tm, so not quite a bitwise copy */
494 #define COPY_tm(_DST, _SRC) \
495 do { \
496  (_DST)->tm_sec = (_SRC)->tm_sec; \
497  (_DST)->tm_min = (_SRC)->tm_min; \
498  (_DST)->tm_hour = (_SRC)->tm_hour; \
499  (_DST)->tm_mday = (_SRC)->tm_mday; \
500  (_DST)->tm_mon = (_SRC)->tm_mon; \
501  (_DST)->tm_year = (_SRC)->tm_year; \
502  (_DST)->tm_wday = (_SRC)->tm_wday; \
503  (_DST)->tm_yday = (_SRC)->tm_yday; \
504  (_DST)->tm_gmtoff = (_SRC)->tm_gmtoff; \
505 } while(0)
506 
507 /* Caution: this is used to zero both pg_tm and fmt_tm structs */
508 #define ZERO_tm(_X) \
509 do { \
510  memset(_X, 0, sizeof(*(_X))); \
511  (_X)->tm_mday = (_X)->tm_mon = 1; \
512 } while(0)
513 
514 #define ZERO_tmtc(_X) \
515 do { \
516  ZERO_tm( tmtcTm(_X) ); \
517  tmtcFsec(_X) = 0; \
518  tmtcTzn(_X) = NULL; \
519 } while(0)
520 
521 /*
522  * to_char(time) appears to to_char() as an interval, so this check
523  * is really for interval and time data types.
524  */
525 #define INVALID_FOR_INTERVAL \
526 do { \
527  if (is_interval) \
528  ereport(ERROR, \
529  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
530  errmsg("invalid format specification for an interval value"), \
531  errhint("Intervals are not tied to specific calendar dates."))); \
532 } while(0)
533 
534 /*****************************************************************************
535  * KeyWord definitions
536  *****************************************************************************/
537 
538 /* ----------
539  * Suffixes (FormatNode.suffix is an OR of these codes)
540  * ----------
541  */
542 #define DCH_S_FM 0x01
543 #define DCH_S_TH 0x02
544 #define DCH_S_th 0x04
545 #define DCH_S_SP 0x08
546 #define DCH_S_TM 0x10
547 
548 /* ----------
549  * Suffix tests
550  * ----------
551  */
552 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
553 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
554 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
555 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
556 
557 /* Oracle toggles FM behavior, we don't; see docs. */
558 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
559 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
560 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
561 
562 /* ----------
563  * Suffixes definition for DATE-TIME TO/FROM CHAR
564  * ----------
565  */
566 #define TM_SUFFIX_LEN 2
567 
568 static const KeySuffix DCH_suff[] = {
569  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
570  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
573  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
574  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
575  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
576  /* last */
577  {NULL, 0, 0, 0}
578 };
579 
580 
581 /* ----------
582  * Format-pictures (KeyWord).
583  *
584  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
585  * complicated -to-> easy:
586  *
587  * (example: "DDD","DD","Day","D" )
588  *
589  * (this specific sort needs the algorithm for sequential search for strings,
590  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
591  * or "HH12"? You must first try "HH12", because "HH" is in string, but
592  * it is not good.
593  *
594  * (!)
595  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
596  * (!)
597  *
598  * For fast search is used the 'int index[]', index is ascii table from position
599  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
600  * position or -1 if char is not used in the KeyWord. Search example for
601  * string "MM":
602  * 1) see in index to index['M' - 32],
603  * 2) take keywords position (enum DCH_MI) from index
604  * 3) run sequential search in keywords[] from this position
605  *
606  * ----------
607  */
608 
609 typedef enum
610 {
631  DCH_FX, /* global suffix */
723 
724  /* last */
725  _DCH_last_
727 
728 typedef enum
729 {
766 
767  /* last */
768  _NUM_last_
770 
771 /* ----------
772  * KeyWords for DATE-TIME version
773  * ----------
774  */
775 static const KeyWord DCH_keywords[] = {
776 /* name, len, id, is_digit, date_mode */
777  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
778  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
779  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
780  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
781  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
782  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
783  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
784  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
785  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
786  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
787  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
788  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
789  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
790  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
791  {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
792  {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
793  {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
794  {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
795  {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
796  {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
797  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
798  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
799  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
800  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
801  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
802  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
803  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
804  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
805  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
806  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
807  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
808  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
809  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
810  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
811  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
812  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
813  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
814  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
815  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
816  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
817  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
818  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
819  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
820  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
821  {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
822  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
823  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
824  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
825  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
826  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
827  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
828  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
829  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
830  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
831  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
832  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
833  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
834  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
835  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
836  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
837  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
838  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
839  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
840  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
841  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
842  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
843  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
844  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
845  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
846  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
847  {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
848  {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
849  {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
850  {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
851  {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
852  {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
853  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
854  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
855  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
856  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
857  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
858  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
859  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
860  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
861  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
862  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
863  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
864  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
865  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
866  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
867  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
868  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
869  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
870  {"of", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* o */
871  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
872  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
873  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
874  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
875  {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
876  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
877  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
878  {"tzh", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* t */
879  {"tzm", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
880  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE},
881  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
882  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
883  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
884  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
885  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
886  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
887  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
888  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
889 
890  /* last */
891  {NULL, 0, 0, 0, 0}
892 };
893 
894 /* ----------
895  * KeyWords for NUMBER version
896  *
897  * The is_digit and date_mode fields are not relevant here.
898  * ----------
899  */
900 static const KeyWord NUM_keywords[] = {
901 /* name, len, id is in Index */
902  {",", 1, NUM_COMMA}, /* , */
903  {".", 1, NUM_DEC}, /* . */
904  {"0", 1, NUM_0}, /* 0 */
905  {"9", 1, NUM_9}, /* 9 */
906  {"B", 1, NUM_B}, /* B */
907  {"C", 1, NUM_C}, /* C */
908  {"D", 1, NUM_D}, /* D */
909  {"EEEE", 4, NUM_E}, /* E */
910  {"FM", 2, NUM_FM}, /* F */
911  {"G", 1, NUM_G}, /* G */
912  {"L", 1, NUM_L}, /* L */
913  {"MI", 2, NUM_MI}, /* M */
914  {"PL", 2, NUM_PL}, /* P */
915  {"PR", 2, NUM_PR},
916  {"RN", 2, NUM_RN}, /* R */
917  {"SG", 2, NUM_SG}, /* S */
918  {"SP", 2, NUM_SP},
919  {"S", 1, NUM_S},
920  {"TH", 2, NUM_TH}, /* T */
921  {"V", 1, NUM_V}, /* V */
922  {"b", 1, NUM_B}, /* b */
923  {"c", 1, NUM_C}, /* c */
924  {"d", 1, NUM_D}, /* d */
925  {"eeee", 4, NUM_E}, /* e */
926  {"fm", 2, NUM_FM}, /* f */
927  {"g", 1, NUM_G}, /* g */
928  {"l", 1, NUM_L}, /* l */
929  {"mi", 2, NUM_MI}, /* m */
930  {"pl", 2, NUM_PL}, /* p */
931  {"pr", 2, NUM_PR},
932  {"rn", 2, NUM_rn}, /* r */
933  {"sg", 2, NUM_SG}, /* s */
934  {"sp", 2, NUM_SP},
935  {"s", 1, NUM_S},
936  {"th", 2, NUM_th}, /* t */
937  {"v", 1, NUM_V}, /* v */
938 
939  /* last */
940  {NULL, 0, 0}
941 };
942 
943 
944 /* ----------
945  * KeyWords index for DATE-TIME version
946  * ----------
947  */
948 static const int DCH_index[KeyWord_INDEX_SIZE] = {
949 /*
950 0 1 2 3 4 5 6 7 8 9
951 */
952  /*---- first 0..31 chars are skipped ----*/
953 
954  -1, -1, -1, -1, -1, -1, -1, -1,
955  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
956  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
957  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
958  DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
960  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
961  DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
963  -1, DCH_y_yyy, -1, -1, -1, -1
964 
965  /*---- chars over 126 are skipped ----*/
966 };
967 
968 /* ----------
969  * KeyWords index for NUMBER version
970  * ----------
971  */
972 static const int NUM_index[KeyWord_INDEX_SIZE] = {
973 /*
974 0 1 2 3 4 5 6 7 8 9
975 */
976  /*---- first 0..31 chars are skipped ----*/
977 
978  -1, -1, -1, -1, -1, -1, -1, -1,
979  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
980  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
981  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
982  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
983  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
984  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
985  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
986  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
987  -1, -1, -1, -1, -1, -1
988 
989  /*---- chars over 126 are skipped ----*/
990 };
991 
992 /* ----------
993  * Number processor struct
994  * ----------
995  */
996 typedef struct NUMProc
997 {
999  NUMDesc *Num; /* number description */
1000 
1001  int sign, /* '-' or '+' */
1002  sign_wrote, /* was sign write */
1003  num_count, /* number of write digits */
1004  num_in, /* is inside number */
1005  num_curr, /* current position in number */
1006  out_pre_spaces, /* spaces before first digit */
1007 
1008  read_dec, /* to_number - was read dec. point */
1009  read_post, /* to_number - number of dec. digit */
1010  read_pre; /* to_number - number non-dec. digit */
1011 
1012  char *number, /* string with number */
1013  *number_p, /* pointer to current number position */
1014  *inout, /* in / out buffer */
1015  *inout_p, /* pointer to current inout position */
1016  *last_relevant, /* last relevant number after decimal point */
1017 
1018  *L_negative_sign, /* Locale */
1024 
1025 /* Return flags for DCH_from_char() */
1026 #define DCH_DATED 0x01
1027 #define DCH_TIMED 0x02
1028 #define DCH_ZONED 0x04
1029 
1030 /* ----------
1031  * Functions
1032  * ----------
1033  */
1034 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1035  const int *index);
1036 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1037 static bool is_separator_char(const char *str);
1038 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1039 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1040  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1041 
1042 static void DCH_to_char(FormatNode *node, bool is_interval,
1043  TmToChar *in, char *out, Oid collid);
1044 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1045  Oid collid, bool std, Node *escontext);
1046 
1047 #ifdef DEBUG_TO_FROM_CHAR
1048 static void dump_index(const KeyWord *k, const int *index);
1049 static void dump_node(FormatNode *node, int max);
1050 #endif
1051 
1052 static const char *get_th(char *num, int type);
1053 static char *str_numth(char *dest, char *num, int type);
1054 static int adjust_partial_year_to_2020(int year);
1055 static int strspace_len(const char *str);
1056 static bool from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1057  Node *escontext);
1058 static bool from_char_set_int(int *dest, const int value, const FormatNode *node,
1059  Node *escontext);
1060 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1061  FormatNode *node, Node *escontext);
1062 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1063  Node *escontext);
1064 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1065 static int seq_search_localized(const char *name, char **array, int *len,
1066  Oid collid);
1067 static bool from_char_seq_search(int *dest, const char **src,
1068  const char *const *array,
1069  char **localized_array, Oid collid,
1070  FormatNode *node, Node *escontext);
1071 static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1072  struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
1073  int *fprec, uint32 *flags, Node *escontext);
1074 static char *fill_str(char *str, int c, int max);
1075 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1076 static char *int_to_roman(int number);
1077 static void NUM_prepare_locale(NUMProc *Np);
1078 static char *get_last_relevant_decnum(char *num);
1079 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1080 static void NUM_numpart_to_char(NUMProc *Np, int id);
1081 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1082  char *number, int input_len, int to_char_out_pre_spaces,
1083  int sign, bool is_to_char, Oid collid);
1084 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1085 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1086 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1087 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1088 static NUMCacheEntry *NUM_cache_search(const char *str);
1089 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1090 
1091 
1092 /* ----------
1093  * Fast sequential search, use index for data selection which
1094  * go to seq. cycle (it is very fast for unwanted strings)
1095  * (can't be used binary search in format parsing)
1096  * ----------
1097  */
1098 static const KeyWord *
1099 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1100 {
1101  int poz;
1102 
1103  if (!KeyWord_INDEX_FILTER(*str))
1104  return NULL;
1105 
1106  if ((poz = *(index + (*str - ' '))) > -1)
1107  {
1108  const KeyWord *k = kw + poz;
1109 
1110  do
1111  {
1112  if (strncmp(str, k->name, k->len) == 0)
1113  return k;
1114  k++;
1115  if (!k->name)
1116  return NULL;
1117  } while (*str == *k->name);
1118  }
1119  return NULL;
1120 }
1121 
1122 static const KeySuffix *
1123 suff_search(const char *str, const KeySuffix *suf, int type)
1124 {
1125  const KeySuffix *s;
1126 
1127  for (s = suf; s->name != NULL; s++)
1128  {
1129  if (s->type != type)
1130  continue;
1131 
1132  if (strncmp(str, s->name, s->len) == 0)
1133  return s;
1134  }
1135  return NULL;
1136 }
1137 
1138 static bool
1140 {
1141  /* ASCII printable character, but not letter or digit */
1142  return (*str > 0x20 && *str < 0x7F &&
1143  !(*str >= 'A' && *str <= 'Z') &&
1144  !(*str >= 'a' && *str <= 'z') &&
1145  !(*str >= '0' && *str <= '9'));
1146 }
1147 
1148 /* ----------
1149  * Prepare NUMDesc (number description struct) via FormatNode struct
1150  * ----------
1151  */
1152 static void
1154 {
1155  if (n->type != NODE_TYPE_ACTION)
1156  return;
1157 
1158  if (IS_EEEE(num) && n->key->id != NUM_E)
1159  ereport(ERROR,
1160  (errcode(ERRCODE_SYNTAX_ERROR),
1161  errmsg("\"EEEE\" must be the last pattern used")));
1162 
1163  switch (n->key->id)
1164  {
1165  case NUM_9:
1166  if (IS_BRACKET(num))
1167  ereport(ERROR,
1168  (errcode(ERRCODE_SYNTAX_ERROR),
1169  errmsg("\"9\" must be ahead of \"PR\"")));
1170  if (IS_MULTI(num))
1171  {
1172  ++num->multi;
1173  break;
1174  }
1175  if (IS_DECIMAL(num))
1176  ++num->post;
1177  else
1178  ++num->pre;
1179  break;
1180 
1181  case NUM_0:
1182  if (IS_BRACKET(num))
1183  ereport(ERROR,
1184  (errcode(ERRCODE_SYNTAX_ERROR),
1185  errmsg("\"0\" must be ahead of \"PR\"")));
1186  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1187  {
1188  num->flag |= NUM_F_ZERO;
1189  num->zero_start = num->pre + 1;
1190  }
1191  if (!IS_DECIMAL(num))
1192  ++num->pre;
1193  else
1194  ++num->post;
1195 
1196  num->zero_end = num->pre + num->post;
1197  break;
1198 
1199  case NUM_B:
1200  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1201  num->flag |= NUM_F_BLANK;
1202  break;
1203 
1204  case NUM_D:
1205  num->flag |= NUM_F_LDECIMAL;
1206  num->need_locale = true;
1207  /* FALLTHROUGH */
1208  case NUM_DEC:
1209  if (IS_DECIMAL(num))
1210  ereport(ERROR,
1211  (errcode(ERRCODE_SYNTAX_ERROR),
1212  errmsg("multiple decimal points")));
1213  if (IS_MULTI(num))
1214  ereport(ERROR,
1215  (errcode(ERRCODE_SYNTAX_ERROR),
1216  errmsg("cannot use \"V\" and decimal point together")));
1217  num->flag |= NUM_F_DECIMAL;
1218  break;
1219 
1220  case NUM_FM:
1221  num->flag |= NUM_F_FILLMODE;
1222  break;
1223 
1224  case NUM_S:
1225  if (IS_LSIGN(num))
1226  ereport(ERROR,
1227  (errcode(ERRCODE_SYNTAX_ERROR),
1228  errmsg("cannot use \"S\" twice")));
1229  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1230  ereport(ERROR,
1231  (errcode(ERRCODE_SYNTAX_ERROR),
1232  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1233  if (!IS_DECIMAL(num))
1234  {
1235  num->lsign = NUM_LSIGN_PRE;
1236  num->pre_lsign_num = num->pre;
1237  num->need_locale = true;
1238  num->flag |= NUM_F_LSIGN;
1239  }
1240  else if (num->lsign == NUM_LSIGN_NONE)
1241  {
1242  num->lsign = NUM_LSIGN_POST;
1243  num->need_locale = true;
1244  num->flag |= NUM_F_LSIGN;
1245  }
1246  break;
1247 
1248  case NUM_MI:
1249  if (IS_LSIGN(num))
1250  ereport(ERROR,
1251  (errcode(ERRCODE_SYNTAX_ERROR),
1252  errmsg("cannot use \"S\" and \"MI\" together")));
1253  num->flag |= NUM_F_MINUS;
1254  if (IS_DECIMAL(num))
1255  num->flag |= NUM_F_MINUS_POST;
1256  break;
1257 
1258  case NUM_PL:
1259  if (IS_LSIGN(num))
1260  ereport(ERROR,
1261  (errcode(ERRCODE_SYNTAX_ERROR),
1262  errmsg("cannot use \"S\" and \"PL\" together")));
1263  num->flag |= NUM_F_PLUS;
1264  if (IS_DECIMAL(num))
1265  num->flag |= NUM_F_PLUS_POST;
1266  break;
1267 
1268  case NUM_SG:
1269  if (IS_LSIGN(num))
1270  ereport(ERROR,
1271  (errcode(ERRCODE_SYNTAX_ERROR),
1272  errmsg("cannot use \"S\" and \"SG\" together")));
1273  num->flag |= NUM_F_MINUS;
1274  num->flag |= NUM_F_PLUS;
1275  break;
1276 
1277  case NUM_PR:
1278  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1279  ereport(ERROR,
1280  (errcode(ERRCODE_SYNTAX_ERROR),
1281  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1282  num->flag |= NUM_F_BRACKET;
1283  break;
1284 
1285  case NUM_rn:
1286  case NUM_RN:
1287  num->flag |= NUM_F_ROMAN;
1288  break;
1289 
1290  case NUM_L:
1291  case NUM_G:
1292  num->need_locale = true;
1293  break;
1294 
1295  case NUM_V:
1296  if (IS_DECIMAL(num))
1297  ereport(ERROR,
1298  (errcode(ERRCODE_SYNTAX_ERROR),
1299  errmsg("cannot use \"V\" and decimal point together")));
1300  num->flag |= NUM_F_MULTI;
1301  break;
1302 
1303  case NUM_E:
1304  if (IS_EEEE(num))
1305  ereport(ERROR,
1306  (errcode(ERRCODE_SYNTAX_ERROR),
1307  errmsg("cannot use \"EEEE\" twice")));
1308  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1309  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1310  IS_ROMAN(num) || IS_MULTI(num))
1311  ereport(ERROR,
1312  (errcode(ERRCODE_SYNTAX_ERROR),
1313  errmsg("\"EEEE\" is incompatible with other formats"),
1314  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1315  num->flag |= NUM_F_EEEE;
1316  break;
1317  }
1318 }
1319 
1320 /* ----------
1321  * Format parser, search small keywords and keyword's suffixes, and make
1322  * format-node tree.
1323  *
1324  * for DATE-TIME & NUMBER version
1325  * ----------
1326  */
1327 static void
1328 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1329  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1330 {
1331  FormatNode *n;
1332 
1333 #ifdef DEBUG_TO_FROM_CHAR
1334  elog(DEBUG_elog_output, "to_char/number(): run parser");
1335 #endif
1336 
1337  n = node;
1338 
1339  while (*str)
1340  {
1341  int suffix = 0;
1342  const KeySuffix *s;
1343 
1344  /*
1345  * Prefix
1346  */
1347  if ((flags & DCH_FLAG) &&
1348  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1349  {
1350  suffix |= s->id;
1351  if (s->len)
1352  str += s->len;
1353  }
1354 
1355  /*
1356  * Keyword
1357  */
1358  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1359  {
1360  n->type = NODE_TYPE_ACTION;
1361  n->suffix = suffix;
1362  if (n->key->len)
1363  str += n->key->len;
1364 
1365  /*
1366  * NUM version: Prepare global NUMDesc struct
1367  */
1368  if (flags & NUM_FLAG)
1369  NUMDesc_prepare(Num, n);
1370 
1371  /*
1372  * Postfix
1373  */
1374  if ((flags & DCH_FLAG) && *str &&
1375  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1376  {
1377  n->suffix |= s->id;
1378  if (s->len)
1379  str += s->len;
1380  }
1381 
1382  n++;
1383  }
1384  else if (*str)
1385  {
1386  int chlen;
1387 
1388  if ((flags & STD_FLAG) && *str != '"')
1389  {
1390  /*
1391  * Standard mode, allow only following separators: "-./,':; ".
1392  * However, we support double quotes even in standard mode
1393  * (see below). This is our extension of standard mode.
1394  */
1395  if (strchr("-./,':; ", *str) == NULL)
1396  ereport(ERROR,
1397  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1398  errmsg("invalid datetime format separator: \"%s\"",
1399  pnstrdup(str, pg_mblen(str)))));
1400 
1401  if (*str == ' ')
1402  n->type = NODE_TYPE_SPACE;
1403  else
1405 
1406  n->character[0] = *str;
1407  n->character[1] = '\0';
1408  n->key = NULL;
1409  n->suffix = 0;
1410  n++;
1411  str++;
1412  }
1413  else if (*str == '"')
1414  {
1415  /*
1416  * Process double-quoted literal string, if any
1417  */
1418  str++;
1419  while (*str)
1420  {
1421  if (*str == '"')
1422  {
1423  str++;
1424  break;
1425  }
1426  /* backslash quotes the next character, if any */
1427  if (*str == '\\' && *(str + 1))
1428  str++;
1429  chlen = pg_mblen(str);
1430  n->type = NODE_TYPE_CHAR;
1431  memcpy(n->character, str, chlen);
1432  n->character[chlen] = '\0';
1433  n->key = NULL;
1434  n->suffix = 0;
1435  n++;
1436  str += chlen;
1437  }
1438  }
1439  else
1440  {
1441  /*
1442  * Outside double-quoted strings, backslash is only special if
1443  * it immediately precedes a double quote.
1444  */
1445  if (*str == '\\' && *(str + 1) == '"')
1446  str++;
1447  chlen = pg_mblen(str);
1448 
1449  if ((flags & DCH_FLAG) && is_separator_char(str))
1451  else if (isspace((unsigned char) *str))
1452  n->type = NODE_TYPE_SPACE;
1453  else
1454  n->type = NODE_TYPE_CHAR;
1455 
1456  memcpy(n->character, str, chlen);
1457  n->character[chlen] = '\0';
1458  n->key = NULL;
1459  n->suffix = 0;
1460  n++;
1461  str += chlen;
1462  }
1463  }
1464  }
1465 
1466  n->type = NODE_TYPE_END;
1467  n->suffix = 0;
1468 }
1469 
1470 /* ----------
1471  * DEBUG: Dump the FormatNode Tree (debug)
1472  * ----------
1473  */
1474 #ifdef DEBUG_TO_FROM_CHAR
1475 
1476 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1477 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1478 
1479 static void
1480 dump_node(FormatNode *node, int max)
1481 {
1482  FormatNode *n;
1483  int a;
1484 
1485  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1486 
1487  for (a = 0, n = node; a <= max; n++, a++)
1488  {
1489  if (n->type == NODE_TYPE_ACTION)
1490  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1491  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1492  else if (n->type == NODE_TYPE_CHAR)
1493  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1494  a, n->character);
1495  else if (n->type == NODE_TYPE_END)
1496  {
1497  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1498  return;
1499  }
1500  else
1501  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1502  }
1503 }
1504 #endif /* DEBUG */
1505 
1506 /*****************************************************************************
1507  * Private utils
1508  *****************************************************************************/
1509 
1510 /* ----------
1511  * Return ST/ND/RD/TH for simple (1..9) numbers
1512  * type --> 0 upper, 1 lower
1513  * ----------
1514  */
1515 static const char *
1516 get_th(char *num, int type)
1517 {
1518  int len = strlen(num),
1519  last;
1520 
1521  last = *(num + (len - 1));
1522  if (!isdigit((unsigned char) last))
1523  ereport(ERROR,
1524  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1525  errmsg("\"%s\" is not a number", num)));
1526 
1527  /*
1528  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1529  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1530  */
1531  if ((len > 1) && (num[len - 2] == '1'))
1532  last = 0;
1533 
1534  switch (last)
1535  {
1536  case '1':
1537  if (type == TH_UPPER)
1538  return numTH[0];
1539  return numth[0];
1540  case '2':
1541  if (type == TH_UPPER)
1542  return numTH[1];
1543  return numth[1];
1544  case '3':
1545  if (type == TH_UPPER)
1546  return numTH[2];
1547  return numth[2];
1548  default:
1549  if (type == TH_UPPER)
1550  return numTH[3];
1551  return numth[3];
1552  }
1553 }
1554 
1555 /* ----------
1556  * Convert string-number to ordinal string-number
1557  * type --> 0 upper, 1 lower
1558  * ----------
1559  */
1560 static char *
1561 str_numth(char *dest, char *num, int type)
1562 {
1563  if (dest != num)
1564  strcpy(dest, num);
1565  strcat(dest, get_th(num, type));
1566  return dest;
1567 }
1568 
1569 /*****************************************************************************
1570  * upper/lower/initcap functions
1571  *****************************************************************************/
1572 
1573 #ifdef USE_ICU
1574 
1575 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1576  const UChar *src, int32_t srcLength,
1577  const char *locale,
1578  UErrorCode *pErrorCode);
1579 
1580 static int32_t
1581 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1582  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1583 {
1584  UErrorCode status;
1585  int32_t len_dest;
1586 
1587  len_dest = len_source; /* try first with same length */
1588  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1589  status = U_ZERO_ERROR;
1590  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1591  mylocale->info.icu.locale, &status);
1592  if (status == U_BUFFER_OVERFLOW_ERROR)
1593  {
1594  /* try again with adjusted length */
1595  pfree(*buff_dest);
1596  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1597  status = U_ZERO_ERROR;
1598  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1599  mylocale->info.icu.locale, &status);
1600  }
1601  if (U_FAILURE(status))
1602  ereport(ERROR,
1603  (errmsg("case conversion failed: %s", u_errorName(status))));
1604  return len_dest;
1605 }
1606 
1607 static int32_t
1608 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1609  const UChar *src, int32_t srcLength,
1610  const char *locale,
1611  UErrorCode *pErrorCode)
1612 {
1613  return u_strToTitle(dest, destCapacity, src, srcLength,
1614  NULL, locale, pErrorCode);
1615 }
1616 
1617 #endif /* USE_ICU */
1618 
1619 /*
1620  * If the system provides the needed functions for wide-character manipulation
1621  * (which are all standardized by C99), then we implement upper/lower/initcap
1622  * using wide-character functions, if necessary. Otherwise we use the
1623  * traditional <ctype.h> functions, which of course will not work as desired
1624  * in multibyte character sets. Note that in either case we are effectively
1625  * assuming that the database character encoding matches the encoding implied
1626  * by LC_CTYPE.
1627  */
1628 
1629 /*
1630  * collation-aware, wide-character-aware lower function
1631  *
1632  * We pass the number of bytes so we can pass varlena and char*
1633  * to this function. The result is a palloc'd, null-terminated string.
1634  */
1635 char *
1636 str_tolower(const char *buff, size_t nbytes, Oid collid)
1637 {
1638  char *result;
1639 
1640  if (!buff)
1641  return NULL;
1642 
1643  if (!OidIsValid(collid))
1644  {
1645  /*
1646  * This typically means that the parser could not resolve a conflict
1647  * of implicit collations, so report it that way.
1648  */
1649  ereport(ERROR,
1650  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1651  errmsg("could not determine which collation to use for %s function",
1652  "lower()"),
1653  errhint("Use the COLLATE clause to set the collation explicitly.")));
1654  }
1655 
1656  /* C/POSIX collations use this path regardless of database encoding */
1657  if (lc_ctype_is_c(collid))
1658  {
1659  result = asc_tolower(buff, nbytes);
1660  }
1661  else
1662  {
1663  pg_locale_t mylocale;
1664 
1665  mylocale = pg_newlocale_from_collation(collid);
1666 
1667 #ifdef USE_ICU
1668  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1669  {
1670  int32_t len_uchar;
1671  int32_t len_conv;
1672  UChar *buff_uchar;
1673  UChar *buff_conv;
1674 
1675  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1676  len_conv = icu_convert_case(u_strToLower, mylocale,
1677  &buff_conv, buff_uchar, len_uchar);
1678  icu_from_uchar(&result, buff_conv, len_conv);
1679  pfree(buff_uchar);
1680  pfree(buff_conv);
1681  }
1682  else
1683 #endif
1684  if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
1685  {
1686  const char *src = buff;
1687  size_t srclen = nbytes;
1688  size_t dstsize;
1689  char *dst;
1690  size_t needed;
1691 
1693 
1694  /* first try buffer of equal size plus terminating NUL */
1695  dstsize = srclen + 1;
1696  dst = palloc(dstsize);
1697 
1698  needed = unicode_strlower(dst, dstsize, src, srclen);
1699  if (needed + 1 > dstsize)
1700  {
1701  /* grow buffer if needed and retry */
1702  dstsize = needed + 1;
1703  dst = repalloc(dst, dstsize);
1704  needed = unicode_strlower(dst, dstsize, src, srclen);
1705  Assert(needed + 1 == dstsize);
1706  }
1707 
1708  Assert(dst[needed] == '\0');
1709  result = dst;
1710  }
1711  else
1712  {
1713  Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
1714 
1716  {
1717  wchar_t *workspace;
1718  size_t curr_char;
1719  size_t result_size;
1720 
1721  /* Overflow paranoia */
1722  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1723  ereport(ERROR,
1724  (errcode(ERRCODE_OUT_OF_MEMORY),
1725  errmsg("out of memory")));
1726 
1727  /* Output workspace cannot have more codes than input bytes */
1728  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1729 
1730  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1731 
1732  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1733  {
1734  if (mylocale)
1735  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1736  else
1737  workspace[curr_char] = towlower(workspace[curr_char]);
1738  }
1739 
1740  /*
1741  * Make result large enough; case change might change number
1742  * of bytes
1743  */
1744  result_size = curr_char * pg_database_encoding_max_length() + 1;
1745  result = palloc(result_size);
1746 
1747  wchar2char(result, workspace, result_size, mylocale);
1748  pfree(workspace);
1749  }
1750  else
1751  {
1752  char *p;
1753 
1754  result = pnstrdup(buff, nbytes);
1755 
1756  /*
1757  * Note: we assume that tolower_l() will not be so broken as
1758  * to need an isupper_l() guard test. When using the default
1759  * collation, we apply the traditional Postgres behavior that
1760  * forces ASCII-style treatment of I/i, but in non-default
1761  * collations you get exactly what the collation says.
1762  */
1763  for (p = result; *p; p++)
1764  {
1765  if (mylocale)
1766  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1767  else
1768  *p = pg_tolower((unsigned char) *p);
1769  }
1770  }
1771  }
1772  }
1773 
1774  return result;
1775 }
1776 
1777 /*
1778  * collation-aware, wide-character-aware upper function
1779  *
1780  * We pass the number of bytes so we can pass varlena and char*
1781  * to this function. The result is a palloc'd, null-terminated string.
1782  */
1783 char *
1784 str_toupper(const char *buff, size_t nbytes, Oid collid)
1785 {
1786  char *result;
1787 
1788  if (!buff)
1789  return NULL;
1790 
1791  if (!OidIsValid(collid))
1792  {
1793  /*
1794  * This typically means that the parser could not resolve a conflict
1795  * of implicit collations, so report it that way.
1796  */
1797  ereport(ERROR,
1798  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1799  errmsg("could not determine which collation to use for %s function",
1800  "upper()"),
1801  errhint("Use the COLLATE clause to set the collation explicitly.")));
1802  }
1803 
1804  /* C/POSIX collations use this path regardless of database encoding */
1805  if (lc_ctype_is_c(collid))
1806  {
1807  result = asc_toupper(buff, nbytes);
1808  }
1809  else
1810  {
1811  pg_locale_t mylocale;
1812 
1813  mylocale = pg_newlocale_from_collation(collid);
1814 
1815 #ifdef USE_ICU
1816  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1817  {
1818  int32_t len_uchar,
1819  len_conv;
1820  UChar *buff_uchar;
1821  UChar *buff_conv;
1822 
1823  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1824  len_conv = icu_convert_case(u_strToUpper, mylocale,
1825  &buff_conv, buff_uchar, len_uchar);
1826  icu_from_uchar(&result, buff_conv, len_conv);
1827  pfree(buff_uchar);
1828  pfree(buff_conv);
1829  }
1830  else
1831 #endif
1832  if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
1833  {
1834  const char *src = buff;
1835  size_t srclen = nbytes;
1836  size_t dstsize;
1837  char *dst;
1838  size_t needed;
1839 
1841 
1842  /* first try buffer of equal size plus terminating NUL */
1843  dstsize = srclen + 1;
1844  dst = palloc(dstsize);
1845 
1846  needed = unicode_strupper(dst, dstsize, src, srclen);
1847  if (needed + 1 > dstsize)
1848  {
1849  /* grow buffer if needed and retry */
1850  dstsize = needed + 1;
1851  dst = repalloc(dst, dstsize);
1852  needed = unicode_strupper(dst, dstsize, src, srclen);
1853  Assert(needed + 1 == dstsize);
1854  }
1855 
1856  Assert(dst[needed] == '\0');
1857  result = dst;
1858  }
1859  else
1860  {
1861  Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
1862 
1864  {
1865  wchar_t *workspace;
1866  size_t curr_char;
1867  size_t result_size;
1868 
1869  /* Overflow paranoia */
1870  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1871  ereport(ERROR,
1872  (errcode(ERRCODE_OUT_OF_MEMORY),
1873  errmsg("out of memory")));
1874 
1875  /* Output workspace cannot have more codes than input bytes */
1876  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1877 
1878  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1879 
1880  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1881  {
1882  if (mylocale)
1883  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1884  else
1885  workspace[curr_char] = towupper(workspace[curr_char]);
1886  }
1887 
1888  /*
1889  * Make result large enough; case change might change number
1890  * of bytes
1891  */
1892  result_size = curr_char * pg_database_encoding_max_length() + 1;
1893  result = palloc(result_size);
1894 
1895  wchar2char(result, workspace, result_size, mylocale);
1896  pfree(workspace);
1897  }
1898  else
1899  {
1900  char *p;
1901 
1902  result = pnstrdup(buff, nbytes);
1903 
1904  /*
1905  * Note: we assume that toupper_l() will not be so broken as
1906  * to need an islower_l() guard test. When using the default
1907  * collation, we apply the traditional Postgres behavior that
1908  * forces ASCII-style treatment of I/i, but in non-default
1909  * collations you get exactly what the collation says.
1910  */
1911  for (p = result; *p; p++)
1912  {
1913  if (mylocale)
1914  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1915  else
1916  *p = pg_toupper((unsigned char) *p);
1917  }
1918  }
1919  }
1920  }
1921 
1922  return result;
1923 }
1924 
1926 {
1927  const char *str;
1928  size_t len;
1929  size_t offset;
1930  bool init;
1932 };
1933 
1934 /*
1935  * Simple word boundary iterator that draws boundaries each time the result of
1936  * pg_u_isalnum() changes.
1937  */
1938 static size_t
1940 {
1941  struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
1942 
1943  while (wbstate->offset < wbstate->len &&
1944  wbstate->str[wbstate->offset] != '\0')
1945  {
1946  pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
1947  wbstate->offset);
1948  bool curr_alnum = pg_u_isalnum(u, true);
1949 
1950  if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
1951  {
1952  size_t prev_offset = wbstate->offset;
1953 
1954  wbstate->init = true;
1955  wbstate->offset += unicode_utf8len(u);
1956  wbstate->prev_alnum = curr_alnum;
1957  return prev_offset;
1958  }
1959 
1960  wbstate->offset += unicode_utf8len(u);
1961  }
1962 
1963  return wbstate->len;
1964 }
1965 
1966 /*
1967  * collation-aware, wide-character-aware initcap function
1968  *
1969  * We pass the number of bytes so we can pass varlena and char*
1970  * to this function. The result is a palloc'd, null-terminated string.
1971  */
1972 char *
1973 str_initcap(const char *buff, size_t nbytes, Oid collid)
1974 {
1975  char *result;
1976  int wasalnum = false;
1977 
1978  if (!buff)
1979  return NULL;
1980 
1981  if (!OidIsValid(collid))
1982  {
1983  /*
1984  * This typically means that the parser could not resolve a conflict
1985  * of implicit collations, so report it that way.
1986  */
1987  ereport(ERROR,
1988  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1989  errmsg("could not determine which collation to use for %s function",
1990  "initcap()"),
1991  errhint("Use the COLLATE clause to set the collation explicitly.")));
1992  }
1993 
1994  /* C/POSIX collations use this path regardless of database encoding */
1995  if (lc_ctype_is_c(collid))
1996  {
1997  result = asc_initcap(buff, nbytes);
1998  }
1999  else
2000  {
2001  pg_locale_t mylocale;
2002 
2003  mylocale = pg_newlocale_from_collation(collid);
2004 
2005 #ifdef USE_ICU
2006  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
2007  {
2008  int32_t len_uchar,
2009  len_conv;
2010  UChar *buff_uchar;
2011  UChar *buff_conv;
2012 
2013  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
2014  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
2015  &buff_conv, buff_uchar, len_uchar);
2016  icu_from_uchar(&result, buff_conv, len_conv);
2017  pfree(buff_uchar);
2018  pfree(buff_conv);
2019  }
2020  else
2021 #endif
2022  if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
2023  {
2024  const char *src = buff;
2025  size_t srclen = nbytes;
2026  size_t dstsize;
2027  char *dst;
2028  size_t needed;
2029  struct WordBoundaryState wbstate = {
2030  .str = src,
2031  .len = srclen,
2032  .offset = 0,
2033  .init = false,
2034  .prev_alnum = false,
2035  };
2036 
2038 
2039  /* first try buffer of equal size plus terminating NUL */
2040  dstsize = srclen + 1;
2041  dst = palloc(dstsize);
2042 
2043  needed = unicode_strtitle(dst, dstsize, src, srclen,
2044  initcap_wbnext, &wbstate);
2045  if (needed + 1 > dstsize)
2046  {
2047  /* reset iterator */
2048  wbstate.offset = 0;
2049  wbstate.init = false;
2050 
2051  /* grow buffer if needed and retry */
2052  dstsize = needed + 1;
2053  dst = repalloc(dst, dstsize);
2054  needed = unicode_strtitle(dst, dstsize, src, srclen,
2055  initcap_wbnext, &wbstate);
2056  Assert(needed + 1 == dstsize);
2057  }
2058 
2059  result = dst;
2060  }
2061  else
2062  {
2063  Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
2064 
2066  {
2067  wchar_t *workspace;
2068  size_t curr_char;
2069  size_t result_size;
2070 
2071  /* Overflow paranoia */
2072  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
2073  ereport(ERROR,
2074  (errcode(ERRCODE_OUT_OF_MEMORY),
2075  errmsg("out of memory")));
2076 
2077  /* Output workspace cannot have more codes than input bytes */
2078  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
2079 
2080  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
2081 
2082  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
2083  {
2084  if (mylocale)
2085  {
2086  if (wasalnum)
2087  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
2088  else
2089  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
2090  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
2091  }
2092  else
2093  {
2094  if (wasalnum)
2095  workspace[curr_char] = towlower(workspace[curr_char]);
2096  else
2097  workspace[curr_char] = towupper(workspace[curr_char]);
2098  wasalnum = iswalnum(workspace[curr_char]);
2099  }
2100  }
2101 
2102  /*
2103  * Make result large enough; case change might change number
2104  * of bytes
2105  */
2106  result_size = curr_char * pg_database_encoding_max_length() + 1;
2107  result = palloc(result_size);
2108 
2109  wchar2char(result, workspace, result_size, mylocale);
2110  pfree(workspace);
2111  }
2112  else
2113  {
2114  char *p;
2115 
2116  result = pnstrdup(buff, nbytes);
2117 
2118  /*
2119  * Note: we assume that toupper_l()/tolower_l() will not be so
2120  * broken as to need guard tests. When using the default
2121  * collation, we apply the traditional Postgres behavior that
2122  * forces ASCII-style treatment of I/i, but in non-default
2123  * collations you get exactly what the collation says.
2124  */
2125  for (p = result; *p; p++)
2126  {
2127  if (mylocale)
2128  {
2129  if (wasalnum)
2130  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2131  else
2132  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2133  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2134  }
2135  else
2136  {
2137  if (wasalnum)
2138  *p = pg_tolower((unsigned char) *p);
2139  else
2140  *p = pg_toupper((unsigned char) *p);
2141  wasalnum = isalnum((unsigned char) *p);
2142  }
2143  }
2144  }
2145  }
2146  }
2147 
2148  return result;
2149 }
2150 
2151 /*
2152  * ASCII-only lower function
2153  *
2154  * We pass the number of bytes so we can pass varlena and char*
2155  * to this function. The result is a palloc'd, null-terminated string.
2156  */
2157 char *
2158 asc_tolower(const char *buff, size_t nbytes)
2159 {
2160  char *result;
2161  char *p;
2162 
2163  if (!buff)
2164  return NULL;
2165 
2166  result = pnstrdup(buff, nbytes);
2167 
2168  for (p = result; *p; p++)
2169  *p = pg_ascii_tolower((unsigned char) *p);
2170 
2171  return result;
2172 }
2173 
2174 /*
2175  * ASCII-only upper function
2176  *
2177  * We pass the number of bytes so we can pass varlena and char*
2178  * to this function. The result is a palloc'd, null-terminated string.
2179  */
2180 char *
2181 asc_toupper(const char *buff, size_t nbytes)
2182 {
2183  char *result;
2184  char *p;
2185 
2186  if (!buff)
2187  return NULL;
2188 
2189  result = pnstrdup(buff, nbytes);
2190 
2191  for (p = result; *p; p++)
2192  *p = pg_ascii_toupper((unsigned char) *p);
2193 
2194  return result;
2195 }
2196 
2197 /*
2198  * ASCII-only initcap function
2199  *
2200  * We pass the number of bytes so we can pass varlena and char*
2201  * to this function. The result is a palloc'd, null-terminated string.
2202  */
2203 char *
2204 asc_initcap(const char *buff, size_t nbytes)
2205 {
2206  char *result;
2207  char *p;
2208  int wasalnum = false;
2209 
2210  if (!buff)
2211  return NULL;
2212 
2213  result = pnstrdup(buff, nbytes);
2214 
2215  for (p = result; *p; p++)
2216  {
2217  char c;
2218 
2219  if (wasalnum)
2220  *p = c = pg_ascii_tolower((unsigned char) *p);
2221  else
2222  *p = c = pg_ascii_toupper((unsigned char) *p);
2223  /* we don't trust isalnum() here */
2224  wasalnum = ((c >= 'A' && c <= 'Z') ||
2225  (c >= 'a' && c <= 'z') ||
2226  (c >= '0' && c <= '9'));
2227  }
2228 
2229  return result;
2230 }
2231 
2232 /* convenience routines for when the input is null-terminated */
2233 
2234 static char *
2235 str_tolower_z(const char *buff, Oid collid)
2236 {
2237  return str_tolower(buff, strlen(buff), collid);
2238 }
2239 
2240 static char *
2241 str_toupper_z(const char *buff, Oid collid)
2242 {
2243  return str_toupper(buff, strlen(buff), collid);
2244 }
2245 
2246 static char *
2247 str_initcap_z(const char *buff, Oid collid)
2248 {
2249  return str_initcap(buff, strlen(buff), collid);
2250 }
2251 
2252 static char *
2253 asc_tolower_z(const char *buff)
2254 {
2255  return asc_tolower(buff, strlen(buff));
2256 }
2257 
2258 static char *
2259 asc_toupper_z(const char *buff)
2260 {
2261  return asc_toupper(buff, strlen(buff));
2262 }
2263 
2264 /* asc_initcap_z is not currently needed */
2265 
2266 
2267 /* ----------
2268  * Skip TM / th in FROM_CHAR
2269  *
2270  * If S_THth is on, skip two chars, assuming there are two available
2271  * ----------
2272  */
2273 #define SKIP_THth(ptr, _suf) \
2274  do { \
2275  if (S_THth(_suf)) \
2276  { \
2277  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2278  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2279  } \
2280  } while (0)
2281 
2282 
2283 #ifdef DEBUG_TO_FROM_CHAR
2284 /* -----------
2285  * DEBUG: Call for debug and for index checking; (Show ASCII char
2286  * and defined keyword for each used position
2287  * ----------
2288  */
2289 static void
2290 dump_index(const KeyWord *k, const int *index)
2291 {
2292  int i,
2293  count = 0,
2294  free_i = 0;
2295 
2296  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2297 
2298  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2299  {
2300  if (index[i] != -1)
2301  {
2302  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2303  count++;
2304  }
2305  else
2306  {
2307  free_i++;
2308  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2309  }
2310  }
2311  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2312  count, free_i);
2313 }
2314 #endif /* DEBUG */
2315 
2316 /* ----------
2317  * Return true if next format picture is not digit value
2318  * ----------
2319  */
2320 static bool
2322 {
2323  if (n->type == NODE_TYPE_END)
2324  return false;
2325 
2326  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2327  return true;
2328 
2329  /*
2330  * Next node
2331  */
2332  n++;
2333 
2334  /* end of format string is treated like a non-digit separator */
2335  if (n->type == NODE_TYPE_END)
2336  return true;
2337 
2338  if (n->type == NODE_TYPE_ACTION)
2339  {
2340  if (n->key->is_digit)
2341  return false;
2342 
2343  return true;
2344  }
2345  else if (n->character[1] == '\0' &&
2346  isdigit((unsigned char) n->character[0]))
2347  return false;
2348 
2349  return true; /* some non-digit input (separator) */
2350 }
2351 
2352 
2353 static int
2355 {
2356  /*
2357  * Adjust all dates toward 2020; this is effectively what happens when we
2358  * assume '70' is 1970 and '69' is 2069.
2359  */
2360  /* Force 0-69 into the 2000's */
2361  if (year < 70)
2362  return year + 2000;
2363  /* Force 70-99 into the 1900's */
2364  else if (year < 100)
2365  return year + 1900;
2366  /* Force 100-519 into the 2000's */
2367  else if (year < 520)
2368  return year + 2000;
2369  /* Force 520-999 into the 1000's */
2370  else if (year < 1000)
2371  return year + 1000;
2372  else
2373  return year;
2374 }
2375 
2376 
2377 static int
2378 strspace_len(const char *str)
2379 {
2380  int len = 0;
2381 
2382  while (*str && isspace((unsigned char) *str))
2383  {
2384  str++;
2385  len++;
2386  }
2387  return len;
2388 }
2389 
2390 /*
2391  * Set the date mode of a from-char conversion.
2392  *
2393  * Puke if the date mode has already been set, and the caller attempts to set
2394  * it to a conflicting mode.
2395  *
2396  * Returns true on success, false on failure (if escontext points to an
2397  * ErrorSaveContext; otherwise errors are thrown).
2398  */
2399 static bool
2401  Node *escontext)
2402 {
2403  if (mode != FROM_CHAR_DATE_NONE)
2404  {
2405  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2406  tmfc->mode = mode;
2407  else if (tmfc->mode != mode)
2408  ereturn(escontext, false,
2409  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2410  errmsg("invalid combination of date conventions"),
2411  errhint("Do not mix Gregorian and ISO week date "
2412  "conventions in a formatting template.")));
2413  }
2414  return true;
2415 }
2416 
2417 /*
2418  * Set the integer pointed to by 'dest' to the given value.
2419  *
2420  * Puke if the destination integer has previously been set to some other
2421  * non-zero value.
2422  *
2423  * Returns true on success, false on failure (if escontext points to an
2424  * ErrorSaveContext; otherwise errors are thrown).
2425  */
2426 static bool
2427 from_char_set_int(int *dest, const int value, const FormatNode *node,
2428  Node *escontext)
2429 {
2430  if (*dest != 0 && *dest != value)
2431  ereturn(escontext, false,
2432  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2433  errmsg("conflicting values for \"%s\" field in formatting string",
2434  node->key->name),
2435  errdetail("This value contradicts a previous setting "
2436  "for the same field type.")));
2437  *dest = value;
2438  return true;
2439 }
2440 
2441 /*
2442  * Read a single integer from the source string, into the int pointed to by
2443  * 'dest'. If 'dest' is NULL, the result is discarded.
2444  *
2445  * In fixed-width mode (the node does not have the FM suffix), consume at most
2446  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2447  *
2448  * We use strtol() to recover the integer value from the source string, in
2449  * accordance with the given FormatNode.
2450  *
2451  * If the conversion completes successfully, src will have been advanced to
2452  * point at the character immediately following the last character used in the
2453  * conversion.
2454  *
2455  * Returns the number of characters consumed, or -1 on error (if escontext
2456  * points to an ErrorSaveContext; otherwise errors are thrown).
2457  *
2458  * Note that from_char_parse_int() provides a more convenient wrapper where
2459  * the length of the field is the same as the length of the format keyword (as
2460  * with DD and MI).
2461  */
2462 static int
2463 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2464  Node *escontext)
2465 {
2466  long result;
2467  char copy[DCH_MAX_ITEM_SIZ + 1];
2468  const char *init = *src;
2469  int used;
2470 
2471  /*
2472  * Skip any whitespace before parsing the integer.
2473  */
2474  *src += strspace_len(*src);
2475 
2477  used = (int) strlcpy(copy, *src, len + 1);
2478 
2479  if (S_FM(node->suffix) || is_next_separator(node))
2480  {
2481  /*
2482  * This node is in Fill Mode, or the next node is known to be a
2483  * non-digit value, so we just slurp as many characters as we can get.
2484  */
2485  char *endptr;
2486 
2487  errno = 0;
2488  result = strtol(init, &endptr, 10);
2489  *src = endptr;
2490  }
2491  else
2492  {
2493  /*
2494  * We need to pull exactly the number of characters given in 'len' out
2495  * of the string, and convert those.
2496  */
2497  char *last;
2498 
2499  if (used < len)
2500  ereturn(escontext, -1,
2501  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2502  errmsg("source string too short for \"%s\" formatting field",
2503  node->key->name),
2504  errdetail("Field requires %d characters, but only %d remain.",
2505  len, used),
2506  errhint("If your source string is not fixed-width, "
2507  "try using the \"FM\" modifier.")));
2508 
2509  errno = 0;
2510  result = strtol(copy, &last, 10);
2511  used = last - copy;
2512 
2513  if (used > 0 && used < len)
2514  ereturn(escontext, -1,
2515  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2516  errmsg("invalid value \"%s\" for \"%s\"",
2517  copy, node->key->name),
2518  errdetail("Field requires %d characters, but only %d could be parsed.",
2519  len, used),
2520  errhint("If your source string is not fixed-width, "
2521  "try using the \"FM\" modifier.")));
2522 
2523  *src += used;
2524  }
2525 
2526  if (*src == init)
2527  ereturn(escontext, -1,
2528  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2529  errmsg("invalid value \"%s\" for \"%s\"",
2530  copy, node->key->name),
2531  errdetail("Value must be an integer.")));
2532 
2533  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2534  ereturn(escontext, -1,
2535  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2536  errmsg("value for \"%s\" in source string is out of range",
2537  node->key->name),
2538  errdetail("Value must be in the range %d to %d.",
2539  INT_MIN, INT_MAX)));
2540 
2541  if (dest != NULL)
2542  {
2543  if (!from_char_set_int(dest, (int) result, node, escontext))
2544  return -1;
2545  }
2546 
2547  return *src - init;
2548 }
2549 
2550 /*
2551  * Call from_char_parse_int_len(), using the length of the format keyword as
2552  * the expected length of the field.
2553  *
2554  * Don't call this function if the field differs in length from the format
2555  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2556  * In such cases, call from_char_parse_int_len() instead to specify the
2557  * required length explicitly.
2558  */
2559 static int
2560 from_char_parse_int(int *dest, const char **src, FormatNode *node,
2561  Node *escontext)
2562 {
2563  return from_char_parse_int_len(dest, src, node->key->len, node, escontext);
2564 }
2565 
2566 /*
2567  * Sequentially search null-terminated "array" for a case-insensitive match
2568  * to the initial character(s) of "name".
2569  *
2570  * Returns array index of match, or -1 for no match.
2571  *
2572  * *len is set to the length of the match, or 0 for no match.
2573  *
2574  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2575  * suitable for comparisons to ASCII strings.
2576  */
2577 static int
2578 seq_search_ascii(const char *name, const char *const *array, int *len)
2579 {
2580  unsigned char firstc;
2581  const char *const *a;
2582 
2583  *len = 0;
2584 
2585  /* empty string can't match anything */
2586  if (!*name)
2587  return -1;
2588 
2589  /* we handle first char specially to gain some speed */
2590  firstc = pg_ascii_tolower((unsigned char) *name);
2591 
2592  for (a = array; *a != NULL; a++)
2593  {
2594  const char *p;
2595  const char *n;
2596 
2597  /* compare first chars */
2598  if (pg_ascii_tolower((unsigned char) **a) != firstc)
2599  continue;
2600 
2601  /* compare rest of string */
2602  for (p = *a + 1, n = name + 1;; p++, n++)
2603  {
2604  /* return success if we matched whole array entry */
2605  if (*p == '\0')
2606  {
2607  *len = n - name;
2608  return a - array;
2609  }
2610  /* else, must have another character in "name" ... */
2611  if (*n == '\0')
2612  break;
2613  /* ... and it must match */
2614  if (pg_ascii_tolower((unsigned char) *p) !=
2615  pg_ascii_tolower((unsigned char) *n))
2616  break;
2617  }
2618  }
2619 
2620  return -1;
2621 }
2622 
2623 /*
2624  * Sequentially search an array of possibly non-English words for
2625  * a case-insensitive match to the initial character(s) of "name".
2626  *
2627  * This has the same API as seq_search_ascii(), but we use a more general
2628  * case-folding transformation to achieve case-insensitivity. Case folding
2629  * is done per the rules of the collation identified by "collid".
2630  *
2631  * The array is treated as const, but we don't declare it that way because
2632  * the arrays exported by pg_locale.c aren't const.
2633  */
2634 static int
2635 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2636 {
2637  char **a;
2638  char *upper_name;
2639  char *lower_name;
2640 
2641  *len = 0;
2642 
2643  /* empty string can't match anything */
2644  if (!*name)
2645  return -1;
2646 
2647  /*
2648  * The case-folding processing done below is fairly expensive, so before
2649  * doing that, make a quick pass to see if there is an exact match.
2650  */
2651  for (a = array; *a != NULL; a++)
2652  {
2653  int element_len = strlen(*a);
2654 
2655  if (strncmp(name, *a, element_len) == 0)
2656  {
2657  *len = element_len;
2658  return a - array;
2659  }
2660  }
2661 
2662  /*
2663  * Fold to upper case, then to lower case, so that we can match reliably
2664  * even in languages in which case conversions are not injective.
2665  */
2666  upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2667  lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2668  pfree(upper_name);
2669 
2670  for (a = array; *a != NULL; a++)
2671  {
2672  char *upper_element;
2673  char *lower_element;
2674  int element_len;
2675 
2676  /* Likewise upper/lower-case array element */
2677  upper_element = str_toupper(*a, strlen(*a), collid);
2678  lower_element = str_tolower(upper_element, strlen(upper_element),
2679  collid);
2680  pfree(upper_element);
2681  element_len = strlen(lower_element);
2682 
2683  /* Match? */
2684  if (strncmp(lower_name, lower_element, element_len) == 0)
2685  {
2686  *len = element_len;
2687  pfree(lower_element);
2688  pfree(lower_name);
2689  return a - array;
2690  }
2691  pfree(lower_element);
2692  }
2693 
2694  pfree(lower_name);
2695  return -1;
2696 }
2697 
2698 /*
2699  * Perform a sequential search in 'array' (or 'localized_array', if that's
2700  * not NULL) for an entry matching the first character(s) of the 'src'
2701  * string case-insensitively.
2702  *
2703  * The 'array' is presumed to be English words (all-ASCII), but
2704  * if 'localized_array' is supplied, that might be non-English
2705  * so we need a more expensive case-folding transformation
2706  * (which will follow the rules of the collation 'collid').
2707  *
2708  * If a match is found, copy the array index of the match into the integer
2709  * pointed to by 'dest' and advance 'src' to the end of the part of the string
2710  * which matched.
2711  *
2712  * Returns true on match, false on failure (if escontext points to an
2713  * ErrorSaveContext; otherwise errors are thrown).
2714  *
2715  * 'node' is used only for error reports: node->key->name identifies the
2716  * field type we were searching for.
2717  */
2718 static bool
2719 from_char_seq_search(int *dest, const char **src, const char *const *array,
2720  char **localized_array, Oid collid,
2721  FormatNode *node, Node *escontext)
2722 {
2723  int len;
2724 
2725  if (localized_array == NULL)
2726  *dest = seq_search_ascii(*src, array, &len);
2727  else
2728  *dest = seq_search_localized(*src, localized_array, &len, collid);
2729 
2730  if (len <= 0)
2731  {
2732  /*
2733  * In the error report, truncate the string at the next whitespace (if
2734  * any) to avoid including irrelevant data.
2735  */
2736  char *copy = pstrdup(*src);
2737  char *c;
2738 
2739  for (c = copy; *c; c++)
2740  {
2741  if (scanner_isspace(*c))
2742  {
2743  *c = '\0';
2744  break;
2745  }
2746  }
2747 
2748  ereturn(escontext, false,
2749  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2750  errmsg("invalid value \"%s\" for \"%s\"",
2751  copy, node->key->name),
2752  errdetail("The given value did not match any of "
2753  "the allowed values for this field.")));
2754  }
2755  *src += len;
2756  return true;
2757 }
2758 
2759 /* ----------
2760  * Process a TmToChar struct as denoted by a list of FormatNodes.
2761  * The formatted data is written to the string pointed to by 'out'.
2762  * ----------
2763  */
2764 static void
2765 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2766 {
2767  FormatNode *n;
2768  char *s;
2769  struct fmt_tm *tm = &in->tm;
2770  int i;
2771 
2772  /* cache localized days and months */
2774 
2775  s = out;
2776  for (n = node; n->type != NODE_TYPE_END; n++)
2777  {
2778  if (n->type != NODE_TYPE_ACTION)
2779  {
2780  strcpy(s, n->character);
2781  s += strlen(s);
2782  continue;
2783  }
2784 
2785  switch (n->key->id)
2786  {
2787  case DCH_A_M:
2788  case DCH_P_M:
2789  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2790  ? P_M_STR : A_M_STR);
2791  s += strlen(s);
2792  break;
2793  case DCH_AM:
2794  case DCH_PM:
2795  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2796  ? PM_STR : AM_STR);
2797  s += strlen(s);
2798  break;
2799  case DCH_a_m:
2800  case DCH_p_m:
2801  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2802  ? p_m_STR : a_m_STR);
2803  s += strlen(s);
2804  break;
2805  case DCH_am:
2806  case DCH_pm:
2807  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2808  ? pm_STR : am_STR);
2809  s += strlen(s);
2810  break;
2811  case DCH_HH:
2812  case DCH_HH12:
2813 
2814  /*
2815  * display time as shown on a 12-hour clock, even for
2816  * intervals
2817  */
2818  sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2819  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ?
2820  (long long) (HOURS_PER_DAY / 2) :
2821  (long long) (tm->tm_hour % (HOURS_PER_DAY / 2)));
2822  if (S_THth(n->suffix))
2823  str_numth(s, s, S_TH_TYPE(n->suffix));
2824  s += strlen(s);
2825  break;
2826  case DCH_HH24:
2827  sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2828  (long long) tm->tm_hour);
2829  if (S_THth(n->suffix))
2830  str_numth(s, s, S_TH_TYPE(n->suffix));
2831  s += strlen(s);
2832  break;
2833  case DCH_MI:
2834  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2835  tm->tm_min);
2836  if (S_THth(n->suffix))
2837  str_numth(s, s, S_TH_TYPE(n->suffix));
2838  s += strlen(s);
2839  break;
2840  case DCH_SS:
2841  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2842  tm->tm_sec);
2843  if (S_THth(n->suffix))
2844  str_numth(s, s, S_TH_TYPE(n->suffix));
2845  s += strlen(s);
2846  break;
2847 
2848 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2849  sprintf(s, frac_fmt, (int) (frac_val)); \
2850  if (S_THth(n->suffix)) \
2851  str_numth(s, s, S_TH_TYPE(n->suffix)); \
2852  s += strlen(s)
2853 
2854  case DCH_FF1: /* tenth of second */
2855  DCH_to_char_fsec("%01d", in->fsec / 100000);
2856  break;
2857  case DCH_FF2: /* hundredth of second */
2858  DCH_to_char_fsec("%02d", in->fsec / 10000);
2859  break;
2860  case DCH_FF3:
2861  case DCH_MS: /* millisecond */
2862  DCH_to_char_fsec("%03d", in->fsec / 1000);
2863  break;
2864  case DCH_FF4: /* tenth of a millisecond */
2865  DCH_to_char_fsec("%04d", in->fsec / 100);
2866  break;
2867  case DCH_FF5: /* hundredth of a millisecond */
2868  DCH_to_char_fsec("%05d", in->fsec / 10);
2869  break;
2870  case DCH_FF6:
2871  case DCH_US: /* microsecond */
2872  DCH_to_char_fsec("%06d", in->fsec);
2873  break;
2874 #undef DCH_to_char_fsec
2875  case DCH_SSSS:
2876  sprintf(s, "%lld",
2877  (long long) (tm->tm_hour * SECS_PER_HOUR +
2879  tm->tm_sec));
2880  if (S_THth(n->suffix))
2881  str_numth(s, s, S_TH_TYPE(n->suffix));
2882  s += strlen(s);
2883  break;
2884  case DCH_tz:
2886  if (tmtcTzn(in))
2887  {
2888  /* We assume here that timezone names aren't localized */
2889  char *p = asc_tolower_z(tmtcTzn(in));
2890 
2891  strcpy(s, p);
2892  pfree(p);
2893  s += strlen(s);
2894  }
2895  break;
2896  case DCH_TZ:
2898  if (tmtcTzn(in))
2899  {
2900  strcpy(s, tmtcTzn(in));
2901  s += strlen(s);
2902  }
2903  break;
2904  case DCH_TZH:
2906  sprintf(s, "%c%02d",
2907  (tm->tm_gmtoff >= 0) ? '+' : '-',
2908  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2909  s += strlen(s);
2910  break;
2911  case DCH_TZM:
2913  sprintf(s, "%02d",
2914  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2915  s += strlen(s);
2916  break;
2917  case DCH_OF:
2919  sprintf(s, "%c%0*d",
2920  (tm->tm_gmtoff >= 0) ? '+' : '-',
2921  S_FM(n->suffix) ? 0 : 2,
2922  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2923  s += strlen(s);
2924  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2925  {
2926  sprintf(s, ":%02d",
2927  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2928  s += strlen(s);
2929  }
2930  break;
2931  case DCH_A_D:
2932  case DCH_B_C:
2934  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2935  s += strlen(s);
2936  break;
2937  case DCH_AD:
2938  case DCH_BC:
2940  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2941  s += strlen(s);
2942  break;
2943  case DCH_a_d:
2944  case DCH_b_c:
2946  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2947  s += strlen(s);
2948  break;
2949  case DCH_ad:
2950  case DCH_bc:
2952  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2953  s += strlen(s);
2954  break;
2955  case DCH_MONTH:
2957  if (!tm->tm_mon)
2958  break;
2959  if (S_TM(n->suffix))
2960  {
2962 
2963  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2964  strcpy(s, str);
2965  else
2966  ereport(ERROR,
2967  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2968  errmsg("localized string format value too long")));
2969  }
2970  else
2971  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2973  s += strlen(s);
2974  break;
2975  case DCH_Month:
2977  if (!tm->tm_mon)
2978  break;
2979  if (S_TM(n->suffix))
2980  {
2982 
2983  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2984  strcpy(s, str);
2985  else
2986  ereport(ERROR,
2987  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2988  errmsg("localized string format value too long")));
2989  }
2990  else
2991  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2992  months_full[tm->tm_mon - 1]);
2993  s += strlen(s);
2994  break;
2995  case DCH_month:
2997  if (!tm->tm_mon)
2998  break;
2999  if (S_TM(n->suffix))
3000  {
3002 
3003  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3004  strcpy(s, str);
3005  else
3006  ereport(ERROR,
3007  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3008  errmsg("localized string format value too long")));
3009  }
3010  else
3011  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3013  s += strlen(s);
3014  break;
3015  case DCH_MON:
3017  if (!tm->tm_mon)
3018  break;
3019  if (S_TM(n->suffix))
3020  {
3022 
3023  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3024  strcpy(s, str);
3025  else
3026  ereport(ERROR,
3027  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3028  errmsg("localized string format value too long")));
3029  }
3030  else
3031  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
3032  s += strlen(s);
3033  break;
3034  case DCH_Mon:
3036  if (!tm->tm_mon)
3037  break;
3038  if (S_TM(n->suffix))
3039  {
3041 
3042  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3043  strcpy(s, str);
3044  else
3045  ereport(ERROR,
3046  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3047  errmsg("localized string format value too long")));
3048  }
3049  else
3050  strcpy(s, months[tm->tm_mon - 1]);
3051  s += strlen(s);
3052  break;
3053  case DCH_mon:
3055  if (!tm->tm_mon)
3056  break;
3057  if (S_TM(n->suffix))
3058  {
3060 
3061  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3062  strcpy(s, str);
3063  else
3064  ereport(ERROR,
3065  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3066  errmsg("localized string format value too long")));
3067  }
3068  else
3069  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
3070  s += strlen(s);
3071  break;
3072  case DCH_MM:
3073  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
3074  tm->tm_mon);
3075  if (S_THth(n->suffix))
3076  str_numth(s, s, S_TH_TYPE(n->suffix));
3077  s += strlen(s);
3078  break;
3079  case DCH_DAY:
3081  if (S_TM(n->suffix))
3082  {
3084 
3085  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3086  strcpy(s, str);
3087  else
3088  ereport(ERROR,
3089  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3090  errmsg("localized string format value too long")));
3091  }
3092  else
3093  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3095  s += strlen(s);
3096  break;
3097  case DCH_Day:
3099  if (S_TM(n->suffix))
3100  {
3102 
3103  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3104  strcpy(s, str);
3105  else
3106  ereport(ERROR,
3107  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3108  errmsg("localized string format value too long")));
3109  }
3110  else
3111  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3112  days[tm->tm_wday]);
3113  s += strlen(s);
3114  break;
3115  case DCH_day:
3117  if (S_TM(n->suffix))
3118  {
3120 
3121  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3122  strcpy(s, str);
3123  else
3124  ereport(ERROR,
3125  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3126  errmsg("localized string format value too long")));
3127  }
3128  else
3129  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3131  s += strlen(s);
3132  break;
3133  case DCH_DY:
3135  if (S_TM(n->suffix))
3136  {
3138 
3139  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3140  strcpy(s, str);
3141  else
3142  ereport(ERROR,
3143  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3144  errmsg("localized string format value too long")));
3145  }
3146  else
3147  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3148  s += strlen(s);
3149  break;
3150  case DCH_Dy:
3152  if (S_TM(n->suffix))
3153  {
3155 
3156  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3157  strcpy(s, str);
3158  else
3159  ereport(ERROR,
3160  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3161  errmsg("localized string format value too long")));
3162  }
3163  else
3164  strcpy(s, days_short[tm->tm_wday]);
3165  s += strlen(s);
3166  break;
3167  case DCH_dy:
3169  if (S_TM(n->suffix))
3170  {
3172 
3173  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3174  strcpy(s, str);
3175  else
3176  ereport(ERROR,
3177  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3178  errmsg("localized string format value too long")));
3179  }
3180  else
3181  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3182  s += strlen(s);
3183  break;
3184  case DCH_DDD:
3185  case DCH_IDDD:
3186  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3187  (n->key->id == DCH_DDD) ?
3188  tm->tm_yday :
3190  if (S_THth(n->suffix))
3191  str_numth(s, s, S_TH_TYPE(n->suffix));
3192  s += strlen(s);
3193  break;
3194  case DCH_DD:
3195  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3196  if (S_THth(n->suffix))
3197  str_numth(s, s, S_TH_TYPE(n->suffix));
3198  s += strlen(s);
3199  break;
3200  case DCH_D:
3202  sprintf(s, "%d", tm->tm_wday + 1);
3203  if (S_THth(n->suffix))
3204  str_numth(s, s, S_TH_TYPE(n->suffix));
3205  s += strlen(s);
3206  break;
3207  case DCH_ID:
3209  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3210  if (S_THth(n->suffix))
3211  str_numth(s, s, S_TH_TYPE(n->suffix));
3212  s += strlen(s);
3213  break;
3214  case DCH_WW:
3215  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3216  (tm->tm_yday - 1) / 7 + 1);
3217  if (S_THth(n->suffix))
3218  str_numth(s, s, S_TH_TYPE(n->suffix));
3219  s += strlen(s);
3220  break;
3221  case DCH_IW:
3222  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3224  if (S_THth(n->suffix))
3225  str_numth(s, s, S_TH_TYPE(n->suffix));
3226  s += strlen(s);
3227  break;
3228  case DCH_Q:
3229  if (!tm->tm_mon)
3230  break;
3231  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3232  if (S_THth(n->suffix))
3233  str_numth(s, s, S_TH_TYPE(n->suffix));
3234  s += strlen(s);
3235  break;
3236  case DCH_CC:
3237  if (is_interval) /* straight calculation */
3238  i = tm->tm_year / 100;
3239  else
3240  {
3241  if (tm->tm_year > 0)
3242  /* Century 20 == 1901 - 2000 */
3243  i = (tm->tm_year - 1) / 100 + 1;
3244  else
3245  /* Century 6BC == 600BC - 501BC */
3246  i = tm->tm_year / 100 - 1;
3247  }
3248  if (i <= 99 && i >= -99)
3249  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3250  else
3251  sprintf(s, "%d", i);
3252  if (S_THth(n->suffix))
3253  str_numth(s, s, S_TH_TYPE(n->suffix));
3254  s += strlen(s);
3255  break;
3256  case DCH_Y_YYY:
3257  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3258  sprintf(s, "%d,%03d", i,
3259  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3260  if (S_THth(n->suffix))
3261  str_numth(s, s, S_TH_TYPE(n->suffix));
3262  s += strlen(s);
3263  break;
3264  case DCH_YYYY:
3265  case DCH_IYYY:
3266  sprintf(s, "%0*d",
3267  S_FM(n->suffix) ? 0 :
3268  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3269  (n->key->id == DCH_YYYY ?
3270  ADJUST_YEAR(tm->tm_year, is_interval) :
3272  tm->tm_mon,
3273  tm->tm_mday),
3274  is_interval)));
3275  if (S_THth(n->suffix))
3276  str_numth(s, s, S_TH_TYPE(n->suffix));
3277  s += strlen(s);
3278  break;
3279  case DCH_YYY:
3280  case DCH_IYY:
3281  sprintf(s, "%0*d",
3282  S_FM(n->suffix) ? 0 :
3283  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3284  (n->key->id == DCH_YYY ?
3285  ADJUST_YEAR(tm->tm_year, is_interval) :
3287  tm->tm_mon,
3288  tm->tm_mday),
3289  is_interval)) % 1000);
3290  if (S_THth(n->suffix))
3291  str_numth(s, s, S_TH_TYPE(n->suffix));
3292  s += strlen(s);
3293  break;
3294  case DCH_YY:
3295  case DCH_IY:
3296  sprintf(s, "%0*d",
3297  S_FM(n->suffix) ? 0 :
3298  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3299  (n->key->id == DCH_YY ?
3300  ADJUST_YEAR(tm->tm_year, is_interval) :
3302  tm->tm_mon,
3303  tm->tm_mday),
3304  is_interval)) % 100);
3305  if (S_THth(n->suffix))
3306  str_numth(s, s, S_TH_TYPE(n->suffix));
3307  s += strlen(s);
3308  break;
3309  case DCH_Y:
3310  case DCH_I:
3311  sprintf(s, "%1d",
3312  (n->key->id == DCH_Y ?
3313  ADJUST_YEAR(tm->tm_year, is_interval) :
3315  tm->tm_mon,
3316  tm->tm_mday),
3317  is_interval)) % 10);
3318  if (S_THth(n->suffix))
3319  str_numth(s, s, S_TH_TYPE(n->suffix));
3320  s += strlen(s);
3321  break;
3322  case DCH_RM:
3323  /* FALLTHROUGH */
3324  case DCH_rm:
3325 
3326  /*
3327  * For intervals, values like '12 month' will be reduced to 0
3328  * month and some years. These should be processed.
3329  */
3330  if (!tm->tm_mon && !tm->tm_year)
3331  break;
3332  else
3333  {
3334  int mon = 0;
3335  const char *const *months;
3336 
3337  if (n->key->id == DCH_RM)
3339  else
3341 
3342  /*
3343  * Compute the position in the roman-numeral array. Note
3344  * that the contents of the array are reversed, December
3345  * being first and January last.
3346  */
3347  if (tm->tm_mon == 0)
3348  {
3349  /*
3350  * This case is special, and tracks the case of full
3351  * interval years.
3352  */
3353  mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3354  }
3355  else if (tm->tm_mon < 0)
3356  {
3357  /*
3358  * Negative case. In this case, the calculation is
3359  * reversed, where -1 means December, -2 November,
3360  * etc.
3361  */
3362  mon = -1 * (tm->tm_mon + 1);
3363  }
3364  else
3365  {
3366  /*
3367  * Common case, with a strictly positive value. The
3368  * position in the array matches with the value of
3369  * tm_mon.
3370  */
3371  mon = MONTHS_PER_YEAR - tm->tm_mon;
3372  }
3373 
3374  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3375  months[mon]);
3376  s += strlen(s);
3377  }
3378  break;
3379  case DCH_W:
3380  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3381  if (S_THth(n->suffix))
3382  str_numth(s, s, S_TH_TYPE(n->suffix));
3383  s += strlen(s);
3384  break;
3385  case DCH_J:
3386  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3387  if (S_THth(n->suffix))
3388  str_numth(s, s, S_TH_TYPE(n->suffix));
3389  s += strlen(s);
3390  break;
3391  }
3392  }
3393 
3394  *s = '\0';
3395 }
3396 
3397 /*
3398  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3399  * The TmFromChar struct pointed to by 'out' is populated with the results.
3400  *
3401  * 'collid' identifies the collation to use, if needed.
3402  * 'std' specifies standard parsing mode.
3403  *
3404  * If escontext points to an ErrorSaveContext, data errors will be reported
3405  * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
3406  * whether an error occurred. Otherwise, errors are thrown.
3407  *
3408  * Note: we currently don't have any to_interval() function, so there
3409  * is no need here for INVALID_FOR_INTERVAL checks.
3410  */
3411 static void
3412 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3413  Oid collid, bool std, Node *escontext)
3414 {
3415  FormatNode *n;
3416  const char *s;
3417  int len,
3418  value;
3419  bool fx_mode = std;
3420 
3421  /* number of extra skipped characters (more than given in format string) */
3422  int extra_skip = 0;
3423 
3424  /* cache localized days and months */
3426 
3427  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3428  {
3429  /*
3430  * Ignore spaces at the beginning of the string and before fields when
3431  * not in FX (fixed width) mode.
3432  */
3433  if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3434  (n->type == NODE_TYPE_ACTION || n == node))
3435  {
3436  while (*s != '\0' && isspace((unsigned char) *s))
3437  {
3438  s++;
3439  extra_skip++;
3440  }
3441  }
3442 
3443  if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3444  {
3445  if (std)
3446  {
3447  /*
3448  * Standard mode requires strict matching between format
3449  * string separators/spaces and input string.
3450  */
3451  Assert(n->character[0] && !n->character[1]);
3452 
3453  if (*s == n->character[0])
3454  s++;
3455  else
3456  ereturn(escontext,,
3457  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3458  errmsg("unmatched format separator \"%c\"",
3459  n->character[0])));
3460  }
3461  else if (!fx_mode)
3462  {
3463  /*
3464  * In non FX (fixed format) mode one format string space or
3465  * separator match to one space or separator in input string.
3466  * Or match nothing if there is no space or separator in the
3467  * current position of input string.
3468  */
3469  extra_skip--;
3470  if (isspace((unsigned char) *s) || is_separator_char(s))
3471  {
3472  s++;
3473  extra_skip++;
3474  }
3475  }
3476  else
3477  {
3478  /*
3479  * In FX mode, on format string space or separator we consume
3480  * exactly one character from input string. Notice we don't
3481  * insist that the consumed character match the format's
3482  * character.
3483  */
3484  s += pg_mblen(s);
3485  }
3486  continue;
3487  }
3488  else if (n->type != NODE_TYPE_ACTION)
3489  {
3490  /*
3491  * Text character, so consume one character from input string.
3492  * Notice we don't insist that the consumed character match the
3493  * format's character.
3494  */
3495  if (!fx_mode)
3496  {
3497  /*
3498  * In non FX mode we might have skipped some extra characters
3499  * (more than specified in format string) before. In this
3500  * case we don't skip input string character, because it might
3501  * be part of field.
3502  */
3503  if (extra_skip > 0)
3504  extra_skip--;
3505  else
3506  s += pg_mblen(s);
3507  }
3508  else
3509  {
3510  int chlen = pg_mblen(s);
3511 
3512  /*
3513  * Standard mode requires strict match of format characters.
3514  */
3515  if (std && n->type == NODE_TYPE_CHAR &&
3516  strncmp(s, n->character, chlen) != 0)
3517  ereturn(escontext,,
3518  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3519  errmsg("unmatched format character \"%s\"",
3520  n->character)));
3521 
3522  s += chlen;
3523  }
3524  continue;
3525  }
3526 
3527  if (!from_char_set_mode(out, n->key->date_mode, escontext))
3528  return;
3529 
3530  switch (n->key->id)
3531  {
3532  case DCH_FX:
3533  fx_mode = true;
3534  break;
3535  case DCH_A_M:
3536  case DCH_P_M:
3537  case DCH_a_m:
3538  case DCH_p_m:
3540  NULL, InvalidOid,
3541  n, escontext))
3542  return;
3543  if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3544  return;
3545  out->clock = CLOCK_12_HOUR;
3546  break;
3547  case DCH_AM:
3548  case DCH_PM:
3549  case DCH_am:
3550  case DCH_pm:
3552  NULL, InvalidOid,
3553  n, escontext))
3554  return;
3555  if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3556  return;
3557  out->clock = CLOCK_12_HOUR;
3558  break;
3559  case DCH_HH:
3560  case DCH_HH12:
3561  if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3562  return;
3563  out->clock = CLOCK_12_HOUR;
3564  SKIP_THth(s, n->suffix);
3565  break;
3566  case DCH_HH24:
3567  if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3568  return;
3569  SKIP_THth(s, n->suffix);
3570  break;
3571  case DCH_MI:
3572  if (from_char_parse_int(&out->mi, &s, n, escontext) < 0)
3573  return;
3574  SKIP_THth(s, n->suffix);
3575  break;
3576  case DCH_SS:
3577  if (from_char_parse_int(&out->ss, &s, n, escontext) < 0)
3578  return;
3579  SKIP_THth(s, n->suffix);
3580  break;
3581  case DCH_MS: /* millisecond */
3582  len = from_char_parse_int_len(&out->ms, &s, 3, n, escontext);
3583  if (len < 0)
3584  return;
3585 
3586  /*
3587  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3588  */
3589  out->ms *= len == 1 ? 100 :
3590  len == 2 ? 10 : 1;
3591 
3592  SKIP_THth(s, n->suffix);
3593  break;
3594  case DCH_FF1:
3595  case DCH_FF2:
3596  case DCH_FF3:
3597  case DCH_FF4:
3598  case DCH_FF5:
3599  case DCH_FF6:
3600  out->ff = n->key->id - DCH_FF1 + 1;
3601  /* FALLTHROUGH */
3602  case DCH_US: /* microsecond */
3603  len = from_char_parse_int_len(&out->us, &s,
3604  n->key->id == DCH_US ? 6 :
3605  out->ff, n, escontext);
3606  if (len < 0)
3607  return;
3608 
3609  out->us *= len == 1 ? 100000 :
3610  len == 2 ? 10000 :
3611  len == 3 ? 1000 :
3612  len == 4 ? 100 :
3613  len == 5 ? 10 : 1;
3614 
3615  SKIP_THth(s, n->suffix);
3616  break;
3617  case DCH_SSSS:
3618  if (from_char_parse_int(&out->ssss, &s, n, escontext) < 0)
3619  return;
3620  SKIP_THth(s, n->suffix);
3621  break;
3622  case DCH_tz:
3623  case DCH_TZ:
3624  {
3625  int tzlen;
3626 
3627  tzlen = DecodeTimezoneAbbrevPrefix(s,
3628  &out->gmtoffset,
3629  &out->tzp);
3630  if (tzlen > 0)
3631  {
3632  out->has_tz = true;
3633  /* we only need the zone abbrev for DYNTZ case */
3634  if (out->tzp)
3635  out->abbrev = pnstrdup(s, tzlen);
3636  out->tzsign = 0; /* drop any earlier TZH/TZM info */
3637  s += tzlen;
3638  break;
3639  }
3640  else if (isalpha((unsigned char) *s))
3641  {
3642  /*
3643  * It doesn't match any abbreviation, but it starts
3644  * with a letter. OF format certainly won't succeed;
3645  * assume it's a misspelled abbreviation and complain
3646  * accordingly.
3647  */
3648  ereturn(escontext,,
3649  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3650  errmsg("invalid value \"%s\" for \"%s\"",
3651  s, n->key->name),
3652  errdetail("Time zone abbreviation is not recognized.")));
3653  }
3654  /* otherwise parse it like OF */
3655  }
3656  /* FALLTHROUGH */
3657  case DCH_OF:
3658  /* OF is equivalent to TZH or TZH:TZM */
3659  /* see TZH comments below */
3660  if (*s == '+' || *s == '-' || *s == ' ')
3661  {
3662  out->tzsign = *s == '-' ? -1 : +1;
3663  s++;
3664  }
3665  else
3666  {
3667  if (extra_skip > 0 && *(s - 1) == '-')
3668  out->tzsign = -1;
3669  else
3670  out->tzsign = +1;
3671  }
3672  if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3673  return;
3674  if (*s == ':')
3675  {
3676  s++;
3677  if (from_char_parse_int_len(&out->tzm, &s, 2, n,
3678  escontext) < 0)
3679  return;
3680  }
3681  break;
3682  case DCH_TZH:
3683 
3684  /*
3685  * Value of TZH might be negative. And the issue is that we
3686  * might swallow minus sign as the separator. So, if we have
3687  * skipped more characters than specified in the format
3688  * string, then we consider prepending last skipped minus to
3689  * TZH.
3690  */
3691  if (*s == '+' || *s == '-' || *s == ' ')
3692  {
3693  out->tzsign = *s == '-' ? -1 : +1;
3694  s++;
3695  }
3696  else
3697  {
3698  if (extra_skip > 0 && *(s - 1) == '-')
3699  out->tzsign = -1;
3700  else
3701  out->tzsign = +1;
3702  }
3703 
3704  if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3705  return;
3706  break;
3707  case DCH_TZM:
3708  /* assign positive timezone sign if TZH was not seen before */
3709  if (!out->tzsign)
3710  out->tzsign = +1;
3711  if (from_char_parse_int_len(&out->tzm, &s, 2, n, escontext) < 0)
3712  return;
3713  break;
3714  case DCH_A_D:
3715  case DCH_B_C:
3716  case DCH_a_d:
3717  case DCH_b_c:
3719  NULL, InvalidOid,
3720  n, escontext))
3721  return;
3722  if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3723  return;
3724  break;
3725  case DCH_AD:
3726  case DCH_BC:
3727  case DCH_ad:
3728  case DCH_bc:
3730  NULL, InvalidOid,
3731  n, escontext))
3732  return;
3733  if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3734  return;
3735  break;
3736  case DCH_MONTH:
3737  case DCH_Month:
3738  case DCH_month:
3740  S_TM(n->suffix) ? localized_full_months : NULL,
3741  collid,
3742  n, escontext))
3743  return;
3744  if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3745  return;
3746  break;
3747  case DCH_MON:
3748  case DCH_Mon:
3749  case DCH_mon:
3750  if (!from_char_seq_search(&value, &s, months,
3751  S_TM(n->suffix) ? localized_abbrev_months : NULL,
3752  collid,
3753  n, escontext))
3754  return;
3755  if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3756  return;
3757  break;
3758  case DCH_MM:
3759  if (from_char_parse_int(&out->mm, &s, n, escontext) < 0)
3760  return;
3761  SKIP_THth(s, n->suffix);
3762  break;
3763  case DCH_DAY:
3764  case DCH_Day:
3765  case DCH_day:
3766  if (!from_char_seq_search(&value, &s, days,
3767  S_TM(n->suffix) ? localized_full_days : NULL,
3768  collid,
3769  n, escontext))
3770  return;
3771  if (!from_char_set_int(&out->d, value, n, escontext))
3772  return;
3773  out->d++;
3774  break;
3775  case DCH_DY:
3776  case DCH_Dy:
3777  case DCH_dy:
3779  S_TM(n->suffix) ? localized_abbrev_days : NULL,
3780  collid,
3781  n, escontext))
3782  return;
3783  if (!from_char_set_int(&out->d, value, n, escontext))
3784  return;
3785  out->d++;
3786  break;
3787  case DCH_DDD:
3788  if (from_char_parse_int(&out->ddd, &s, n, escontext) < 0)
3789  return;
3790  SKIP_THth(s, n->suffix);
3791  break;
3792  case DCH_IDDD:
3793  if (from_char_parse_int_len(&out->ddd, &s, 3, n, escontext) < 0)
3794  return;
3795  SKIP_THth(s, n->suffix);
3796  break;
3797  case DCH_DD:
3798  if (from_char_parse_int(&out->dd, &s, n, escontext) < 0)
3799  return;
3800  SKIP_THth(s, n->suffix);
3801  break;
3802  case DCH_D:
3803  if (from_char_parse_int(&out->d, &s, n, escontext) < 0)
3804  return;
3805  SKIP_THth(s, n->suffix);
3806  break;
3807  case DCH_ID:
3808  if (from_char_parse_int_len(&out->d, &s, 1, n, escontext) < 0)
3809  return;
3810  /* Shift numbering to match Gregorian where Sunday = 1 */
3811  if (++out->d > 7)
3812  out->d = 1;
3813  SKIP_THth(s, n->suffix);
3814  break;
3815  case DCH_WW:
3816  case DCH_IW:
3817  if (from_char_parse_int(&out->ww, &s, n, escontext) < 0)
3818  return;
3819  SKIP_THth(s, n->suffix);
3820  break;
3821  case DCH_Q:
3822 
3823  /*
3824  * We ignore 'Q' when converting to date because it is unclear
3825  * which date in the quarter to use, and some people specify
3826  * both quarter and month, so if it was honored it might
3827  * conflict with the supplied month. That is also why we don't
3828  * throw an error.
3829  *
3830  * We still parse the source string for an integer, but it
3831  * isn't stored anywhere in 'out'.
3832  */
3833  if (from_char_parse_int((int *) NULL, &s, n, escontext) < 0)
3834  return;
3835  SKIP_THth(s, n->suffix);
3836  break;
3837  case DCH_CC:
3838  if (from_char_parse_int(&out->cc, &s, n, escontext) < 0)
3839  return;
3840  SKIP_THth(s, n->suffix);
3841  break;
3842  case DCH_Y_YYY:
3843  {
3844  int matched,
3845  years,
3846  millennia,
3847  nch;
3848 
3849  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3850  if (matched < 2)
3851  ereturn(escontext,,
3852  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3853  errmsg("invalid input string for \"Y,YYY\"")));
3854  years += (millennia * 1000);
3855  if (!from_char_set_int(&out->year, years, n, escontext))
3856  return;
3857  out->yysz = 4;
3858  s += nch;
3859  SKIP_THth(s, n->suffix);
3860  }
3861  break;
3862  case DCH_YYYY:
3863  case DCH_IYYY:
3864  if (from_char_parse_int(&out->year, &s, n, escontext) < 0)
3865  return;
3866  out->yysz = 4;
3867  SKIP_THth(s, n->suffix);
3868  break;
3869  case DCH_YYY:
3870  case DCH_IYY:
3871  len = from_char_parse_int(&out->year, &s, n, escontext);
3872  if (len < 0)
3873  return;
3874  if (len < 4)
3875  out->year = adjust_partial_year_to_2020(out->year);
3876  out->yysz = 3;
3877  SKIP_THth(s, n->suffix);
3878  break;
3879  case DCH_YY:
3880  case DCH_IY:
3881  len = from_char_parse_int(&out->year, &s, n, escontext);
3882  if (len < 0)
3883  return;
3884  if (len < 4)
3885  out->year = adjust_partial_year_to_2020(out->year);
3886  out->yysz = 2;
3887  SKIP_THth(s, n->suffix);
3888  break;
3889  case DCH_Y:
3890  case DCH_I:
3891  len = from_char_parse_int(&out->year, &s, n, escontext);
3892  if (len < 0)
3893  return;
3894  if (len < 4)
3895  out->year = adjust_partial_year_to_2020(out->year);
3896  out->yysz = 1;
3897  SKIP_THth(s, n->suffix);
3898  break;
3899  case DCH_RM:
3900  case DCH_rm:
3902  NULL, InvalidOid,
3903  n, escontext))
3904  return;
3905  if (!from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n,
3906  escontext))
3907  return;
3908  break;
3909  case DCH_W:
3910  if (from_char_parse_int(&out->w, &s, n, escontext) < 0)
3911  return;
3912  SKIP_THth(s, n->suffix);
3913  break;
3914  case DCH_J:
3915  if (from_char_parse_int(&out->j, &s, n, escontext) < 0)
3916  return;
3917  SKIP_THth(s, n->suffix);
3918  break;
3919  }
3920 
3921  /* Ignore all spaces after fields */
3922  if (!fx_mode)
3923  {
3924  extra_skip = 0;
3925  while (*s != '\0' && isspace((unsigned char) *s))
3926  {
3927  s++;
3928  extra_skip++;
3929  }
3930  }
3931  }
3932 
3933  /*
3934  * Standard parsing mode doesn't allow unmatched format patterns or
3935  * trailing characters in the input string.
3936  */
3937  if (std)
3938  {
3939  if (n->type != NODE_TYPE_END)
3940  ereturn(escontext,,
3941  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3942  errmsg("input string is too short for datetime format")));
3943 
3944  while (*s != '\0' && isspace((unsigned char) *s))
3945  s++;
3946 
3947  if (*s != '\0')
3948  ereturn(escontext,,
3949  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3950  errmsg("trailing characters remain in input string after datetime format")));
3951  }
3952 }
3953 
3954 /*
3955  * The invariant for DCH cache entry management is that DCHCounter is equal
3956  * to the maximum age value among the existing entries, and we increment it
3957  * whenever an access occurs. If we approach overflow, deal with that by
3958  * halving all the age values, so that we retain a fairly accurate idea of
3959  * which entries are oldest.
3960  */
3961 static inline void
3963 {
3964  if (DCHCounter >= (INT_MAX - 1))
3965  {
3966  for (int i = 0; i < n_DCHCache; i++)
3967  DCHCache[i]->age >>= 1;
3968  DCHCounter >>= 1;
3969  }
3970 }
3971 
3972 /*
3973  * Get mask of date/time/zone components present in format nodes.
3974  */
3975 static int
3977 {
3978  FormatNode *n;
3979  int flags = 0;
3980 
3981  for (n = node; n->type != NODE_TYPE_END; n++)
3982  {
3983  if (n->type != NODE_TYPE_ACTION)
3984  continue;
3985 
3986  switch (n->key->id)
3987  {
3988  case DCH_FX:
3989  break;
3990  case DCH_A_M:
3991  case DCH_P_M:
3992  case DCH_a_m:
3993  case DCH_p_m:
3994  case DCH_AM:
3995  case DCH_PM:
3996  case DCH_am:
3997  case DCH_pm:
3998  case DCH_HH:
3999  case DCH_HH12:
4000  case DCH_HH24:
4001  case DCH_MI:
4002  case DCH_SS:
4003  case DCH_MS: /* millisecond */
4004  case DCH_US: /* microsecond */
4005  case DCH_FF1:
4006  case DCH_FF2:
4007  case DCH_FF3:
4008  case DCH_FF4:
4009  case DCH_FF5:
4010  case DCH_FF6:
4011  case DCH_SSSS:
4012  flags |= DCH_TIMED;
4013  break;
4014  case DCH_tz:
4015  case DCH_TZ:
4016  case DCH_OF:
4017  case DCH_TZH:
4018  case DCH_TZM:
4019  flags |= DCH_ZONED;
4020  break;
4021  case DCH_A_D:
4022  case DCH_B_C:
4023  case DCH_a_d:
4024  case DCH_b_c:
4025  case DCH_AD:
4026  case DCH_BC:
4027  case DCH_ad:
4028  case DCH_bc:
4029  case DCH_MONTH:
4030  case DCH_Month:
4031  case DCH_month:
4032  case DCH_MON:
4033  case DCH_Mon:
4034  case DCH_mon:
4035  case DCH_MM:
4036  case DCH_DAY:
4037  case DCH_Day:
4038  case DCH_day:
4039  case DCH_DY:
4040  case DCH_Dy:
4041  case DCH_dy:
4042  case DCH_DDD:
4043  case DCH_IDDD:
4044  case DCH_DD:
4045  case DCH_D:
4046  case DCH_ID:
4047  case DCH_WW:
4048  case DCH_Q:
4049  case DCH_CC:
4050  case DCH_Y_YYY:
4051  case DCH_YYYY:
4052  case DCH_IYYY:
4053  case DCH_YYY:
4054  case DCH_IYY:
4055  case DCH_YY:
4056  case DCH_IY:
4057  case DCH_Y:
4058  case DCH_I:
4059  case DCH_RM:
4060  case DCH_rm:
4061  case DCH_W:
4062  case DCH_J:
4063  flags |= DCH_DATED;
4064  break;
4065  }
4066  }
4067 
4068  return flags;
4069 }
4070 
4071 /* select a DCHCacheEntry to hold the given format picture */
4072 static DCHCacheEntry *
4073 DCH_cache_getnew(const char *str, bool std)
4074 {
4075  DCHCacheEntry *ent;
4076 
4077  /* Ensure we can advance DCHCounter below */
4079 
4080  /*
4081  * If cache is full, remove oldest entry (or recycle first not-valid one)
4082  */
4084  {
4085  DCHCacheEntry *old = DCHCache[0];
4086 
4087 #ifdef DEBUG_TO_FROM_CHAR
4088  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
4089 #endif
4090  if (old->valid)
4091  {
4092  for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
4093  {
4094  ent = DCHCache[i];
4095  if (!ent->valid)
4096  {
4097  old = ent;
4098  break;
4099  }
4100  if (ent->age < old->age)
4101  old = ent;
4102  }
4103  }
4104 #ifdef DEBUG_TO_FROM_CHAR
4105  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
4106 #endif
4107  old->valid = false;
4108  strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
4109  old->age = (++DCHCounter);
4110  /* caller is expected to fill format, then set valid */
4111  return old;
4112  }
4113  else
4114  {
4115 #ifdef DEBUG_TO_FROM_CHAR
4116  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
4117 #endif
4118  Assert(DCHCache[n_DCHCache] == NULL);
4119  DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
4121  ent->valid = false;
4122  strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
4123  ent->std = std;
4124  ent->age = (++DCHCounter);
4125  /* caller is expected to fill format, then set valid */
4126  ++n_DCHCache;
4127  return ent;
4128  }
4129 }
4130 
4131 /* look for an existing DCHCacheEntry matching the given format picture */
4132 static DCHCacheEntry *
4133 DCH_cache_search(const char *str, bool std)
4134 {
4135  /* Ensure we can advance DCHCounter below */
4137 
4138  for (int i = 0; i < n_DCHCache; i++)
4139  {
4140  DCHCacheEntry *ent = DCHCache[i];
4141 
4142  if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
4143  {
4144  ent->age = (++DCHCounter);
4145  return ent;
4146  }
4147  }
4148 
4149  return NULL;
4150 }
4151 
4152 /* Find or create a DCHCacheEntry for the given format picture */
4153 static DCHCacheEntry *
4154 DCH_cache_fetch(const char *str, bool std)
4155 {
4156  DCHCacheEntry *ent;
4157 
4158  if ((ent = DCH_cache_search(str, std)) == NULL)
4159  {
4160  /*
4161  * Not in the cache, must run parser and save a new format-picture to
4162  * the cache. Do not mark the cache entry valid until parsing
4163  * succeeds.
4164  */
4165  ent = DCH_cache_getnew(str, std);
4166 
4168  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4169 
4170  ent->valid = true;
4171  }
4172  return ent;
4173 }
4174 
4175 /*
4176  * Format a date/time or interval into a string according to fmt.
4177  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
4178  * for formatting.
4179  */
4180 static text *
4181 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4182 {
4183  FormatNode *format;
4184  char *fmt_str,
4185  *result;
4186  bool incache;
4187  int fmt_len;
4188  text *res;
4189 
4190  /*
4191  * Convert fmt to C string
4192  */
4193  fmt_str = text_to_cstring(fmt);
4194  fmt_len = strlen(fmt_str);
4195 
4196  /*
4197  * Allocate workspace for result as C string
4198  */
4199  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4200  *result = '\0';
4201 
4202  if (fmt_len > DCH_CACHE_SIZE)
4203  {
4204  /*
4205  * Allocate new memory if format picture is bigger than static cache
4206  * and do not use cache (call parser always)
4207  */
4208  incache = false;
4209 
4210  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4211 
4212  parse_format(format, fmt_str, DCH_keywords,
4213  DCH_suff, DCH_index, DCH_FLAG, NULL);
4214  }
4215  else
4216  {
4217  /*
4218  * Use cache buffers
4219  */
4220  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4221 
4222  incache = true;
4223  format = ent->format;
4224  }
4225 
4226  /* The real work is here */
4227  DCH_to_char(format, is_interval, tmtc, result, collid);
4228 
4229  if (!incache)
4230  pfree(format);
4231 
4232  pfree(fmt_str);
4233 
4234  /* convert C-string result to TEXT format */
4235  res = cstring_to_text(result);
4236 
4237  pfree(result);
4238  return res;
4239 }
4240 
4241 /****************************************************************************
4242  * Public routines
4243  ***************************************************************************/
4244 
4245 /* -------------------
4246  * TIMESTAMP to_char()
4247  * -------------------
4248  */
4249 Datum
4251 {
4253  text *fmt = PG_GETARG_TEXT_PP(1),
4254  *res;
4255  TmToChar tmtc;
4256  struct pg_tm tt;
4257  struct fmt_tm *tm;
4258  int thisdate;
4259 
4260  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4261  PG_RETURN_NULL();
4262 
4263  ZERO_tmtc(&tmtc);
4264  tm = tmtcTm(&tmtc);
4265 
4266  if (timestamp2tm(dt, NULL, &tt, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4267  ereport(ERROR,
4268  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4269  errmsg("timestamp out of range")));
4270 
4271  /* calculate wday and yday, because timestamp2tm doesn't */
4272  thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4273  tt.tm_wday = (thisdate + 1) % 7;
4274  tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4275 
4276  COPY_tm(tm, &tt);
4277 
4278  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4279  PG_RETURN_NULL();
4280 
4282 }
4283 
4284 Datum
4286 {
4288  text *fmt = PG_GETARG_TEXT_PP(1),
4289  *res;
4290  TmToChar tmtc;
4291  int tz;
4292  struct pg_tm tt;
4293  struct fmt_tm *tm;
4294  int thisdate;
4295 
4296  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4297  PG_RETURN_NULL();
4298 
4299  ZERO_tmtc(&tmtc);
4300  tm = tmtcTm(&tmtc);
4301 
4302  if (timestamp2tm(dt, &tz, &tt, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4303  ereport(ERROR,
4304  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4305  errmsg("timestamp out of range")));
4306 
4307  /* calculate wday and yday, because timestamp2tm doesn't */
4308  thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4309  tt.tm_wday = (thisdate + 1) % 7;
4310  tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4311 
4312  COPY_tm(tm, &tt);
4313 
4314  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4315  PG_RETURN_NULL();
4316 
4318 }
4319 
4320 
4321 /* -------------------
4322  * INTERVAL to_char()
4323  * -------------------
4324  */
4325 Datum
4327 {
4328  Interval *it = PG_GETARG_INTERVAL_P(0);
4329  text *fmt = PG_GETARG_TEXT_PP(1),
4330  *res;
4331  TmToChar tmtc;
4332  struct fmt_tm *tm;
4333  struct pg_itm tt,
4334  *itm = &tt;
4335 
4336  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || INTERVAL_NOT_FINITE(it))
4337  PG_RETURN_NULL();
4338 
4339  ZERO_tmtc(&tmtc);
4340  tm = tmtcTm(&tmtc);
4341 
4342  interval2itm(*it, itm);
4343  tmtc.fsec = itm->tm_usec;
4344  tm->tm_sec = itm->tm_sec;
4345  tm->tm_min = itm->tm_min;
4346  tm->tm_hour = itm->tm_hour;
4347  tm->tm_mday = itm->tm_mday;
4348  tm->tm_mon = itm->tm_mon;
4349  tm->tm_year = itm->tm_year;
4350 
4351  /* wday is meaningless, yday approximates the total span in days */
4353 
4354  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4355  PG_RETURN_NULL();
4356 
4358 }
4359 
4360 /* ---------------------
4361  * TO_TIMESTAMP()
4362  *
4363  * Make Timestamp from date_str which is formatted at argument 'fmt'
4364  * ( to_timestamp is reverse to_char() )
4365  * ---------------------
4366  */
4367 Datum
4369 {
4370  text *date_txt = PG_GETARG_TEXT_PP(0);
4371  text *fmt = PG_GETARG_TEXT_PP(1);
4373  Timestamp result;
4374  int tz;
4375  struct pg_tm tm;
4376  struct fmt_tz ftz;
4377  fsec_t fsec;
4378  int fprec;
4379 
4380  do_to_timestamp(date_txt, fmt, collid, false,
4381  &tm, &fsec, &ftz, &fprec, NULL, NULL);
4382 
4383  /* Use the specified time zone, if any. */
4384  if (ftz.has_tz)
4385  tz = ftz.gmtoffset;
4386  else
4388 
4389  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4390  ereport(ERROR,
4391  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4392  errmsg("timestamp out of range")));
4393 
4394  /* Use the specified fractional precision, if any. */
4395  if (fprec)
4396  AdjustTimestampForTypmod(&result, fprec, NULL);
4397 
4398  PG_RETURN_TIMESTAMP(result);
4399 }
4400 
4401 /* ----------
4402  * TO_DATE
4403  * Make Date from date_str which is formatted at argument 'fmt'
4404  * ----------
4405  */
4406 Datum
4408 {
4409  text *date_txt = PG_GETARG_TEXT_PP(0);
4410  text *fmt = PG_GETARG_TEXT_PP(1);
4412  DateADT result;
4413  struct pg_tm tm;
4414  struct fmt_tz ftz;
4415  fsec_t fsec;
4416 
4417  do_to_timestamp(date_txt, fmt, collid, false,
4418  &tm, &fsec, &ftz, NULL, NULL, NULL);
4419 
4420  /* Prevent overflow in Julian-day routines */
4422  ereport(ERROR,
4423  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4424  errmsg("date out of range: \"%s\"",
4425  text_to_cstring(date_txt))));
4426 
4428 
4429  /* Now check for just-out-of-range dates */
4430  if (!IS_VALID_DATE(result))
4431  ereport(ERROR,
4432  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4433  errmsg("date out of range: \"%s\"",
4434  text_to_cstring(date_txt))));
4435 
4436  PG_RETURN_DATEADT(result);
4437 }
4438 
4439 /*
4440  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4441  * as a format string. The collation 'collid' may be used for case-folding
4442  * rules in some cases. 'strict' specifies standard parsing mode.
4443  *
4444  * The actual data type (returned in 'typid', 'typmod') is determined by
4445  * the presence of date/time/zone components in the format string.
4446  *
4447  * When a timezone component is present, the corresponding offset is
4448  * returned in '*tz'.
4449  *
4450  * If escontext points to an ErrorSaveContext, data errors will be reported
4451  * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
4452  * whether an error occurred. Otherwise, errors are thrown.
4453  */
4454 Datum
4455 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4456  Oid *typid, int32 *typmod, int *tz,
4457  Node *escontext)
4458 {
4459  struct pg_tm tm;
4460  struct fmt_tz ftz;
4461  fsec_t fsec;
4462  int fprec;
4463  uint32 flags;
4464 
4465  if (!do_to_timestamp(date_txt, fmt, collid, strict,
4466  &tm, &fsec, &ftz, &fprec, &flags, escontext))
4467  return (Datum) 0;
4468 
4469  *typmod = fprec ? fprec : -1; /* fractional part precision */
4470 
4471  if (flags & DCH_DATED)
4472  {
4473  if (flags & DCH_TIMED)
4474  {
4475  if (flags & DCH_ZONED)
4476  {
4477  TimestampTz result;
4478 
4479  if (ftz.has_tz)
4480  {
4481  *tz = ftz.gmtoffset;
4482  }
4483  else
4484  {
4485  /*
4486  * Time zone is present in format string, but not in input
4487  * string. Assuming do_to_timestamp() triggers no error
4488  * this should be possible only in non-strict case.
4489  */
4490  Assert(!strict);
4491 
4492  ereturn(escontext, (Datum) 0,
4493  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4494  errmsg("missing time zone in input string for type timestamptz")));
4495  }
4496 
4497  if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4498  ereturn(escontext, (Datum) 0,
4499  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4500  errmsg("timestamptz out of range")));
4501 
4502  AdjustTimestampForTypmod(&result, *typmod, escontext);
4503 
4504  *typid = TIMESTAMPTZOID;
4505  return TimestampTzGetDatum(result);
4506  }
4507  else
4508  {
4509  Timestamp result;
4510 
4511  if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4512  ereturn(escontext, (Datum) 0,
4513  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4514  errmsg("timestamp out of range")));
4515 
4516  AdjustTimestampForTypmod(&result, *typmod, escontext);
4517 
4518  *typid = TIMESTAMPOID;
4519  return TimestampGetDatum(result);
4520  }
4521  }
4522  else
4523  {
4524  if (flags & DCH_ZONED)
4525  {
4526  ereturn(escontext, (Datum) 0,
4527  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4528  errmsg("datetime format is zoned but not timed")));
4529  }
4530  else
4531  {
4532  DateADT result;
4533 
4534  /* Prevent overflow in Julian-day routines */
4536  ereturn(escontext, (Datum) 0,
4537  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4538  errmsg("date out of range: \"%s\"",
4539  text_to_cstring(date_txt))));
4540 
4541  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4543 
4544  /* Now check for just-out-of-range dates */
4545  if (!IS_VALID_DATE(result))
4546  ereturn(escontext, (Datum) 0,
4547  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4548  errmsg("date out of range: \"%s\"",
4549  text_to_cstring(date_txt))));
4550 
4551  *typid = DATEOID;
4552  return DateADTGetDatum(result);
4553  }
4554  }
4555  }
4556  else if (flags & DCH_TIMED)
4557  {
4558  if (flags & DCH_ZONED)
4559  {
4560  TimeTzADT *result = palloc(sizeof(TimeTzADT));
4561 
4562  if (ftz.has_tz)
4563  {
4564  *tz = ftz.gmtoffset;
4565  }
4566  else
4567  {
4568  /*
4569  * Time zone is present in format string, but not in input
4570  * string. Assuming do_to_timestamp() triggers no error this
4571  * should be possible only in non-strict case.
4572  */
4573  Assert(!strict);
4574 
4575  ereturn(escontext, (Datum) 0,
4576  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4577  errmsg("missing time zone in input string for type timetz")));
4578  }
4579 
4580  if (tm2timetz(&tm, fsec, *tz, result) != 0)
4581  ereturn(escontext, (Datum) 0,
4582  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4583  errmsg("timetz out of range")));
4584 
4585  AdjustTimeForTypmod(&result->time, *typmod);
4586 
4587  *typid = TIMETZOID;
4588  return TimeTzADTPGetDatum(result);
4589  }
4590  else
4591  {
4592  TimeADT result;
4593 
4594  if (tm2time(&tm, fsec, &result) != 0)
4595  ereturn(escontext, (Datum) 0,
4596  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4597  errmsg("time out of range")));
4598 
4599  AdjustTimeForTypmod(&result, *typmod);
4600 
4601  *typid = TIMEOID;
4602  return TimeADTGetDatum(result);
4603  }
4604  }
4605  else
4606  {
4607  ereturn(escontext, (Datum) 0,
4608  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4609  errmsg("datetime format is not dated and not timed")));
4610  }
4611 }
4612 
4613 /*
4614  * Parses the datetime format string in 'fmt_str' and returns true if it
4615  * contains a timezone specifier, false if not.
4616  */
4617 bool
4618 datetime_format_has_tz(const char *fmt_str)
4619 {
4620  bool incache;
4621  int fmt_len = strlen(fmt_str);
4622  int result;
4623  FormatNode *format;
4624 
4625  if (fmt_len > DCH_CACHE_SIZE)
4626  {
4627  /*
4628  * Allocate new memory if format picture is bigger than static cache
4629  * and do not use cache (call parser always)
4630  */
4631  incache = false;
4632 
4633  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4634 
4635  parse_format(format, fmt_str, DCH_keywords,
4636  DCH_suff, DCH_index, DCH_FLAG, NULL);
4637  }
4638  else
4639  {
4640  /*
4641  * Use cache buffers
4642  */
4643  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4644 
4645  incache = true;
4646  format = ent->format;
4647  }
4648 
4649  result = DCH_datetime_type(format);
4650 
4651  if (!incache)
4652  pfree(format);
4653 
4654  return result & DCH_ZONED;
4655 }
4656 
4657 /*
4658  * do_to_timestamp: shared code for to_timestamp and to_date
4659  *
4660  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4661  * fractional seconds, struct fmt_tz, and fractional precision.
4662  *
4663  * 'collid' identifies the collation to use, if needed.
4664  * 'std' specifies standard parsing mode.
4665  *
4666  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4667  * if that is not NULL.
4668  *
4669  * Returns true on success, false on failure (if escontext points to an
4670  * ErrorSaveContext; otherwise errors are thrown). Note that currently,
4671  * soft-error behavior is provided for bad data but not bad format.
4672  *
4673  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4674  * DCH_from_char to populate a TmFromChar with the parsed contents of
4675  * 'date_txt'.
4676  *
4677  * The TmFromChar is then analysed and converted into the final results in
4678  * struct 'tm', 'fsec', struct 'tz', and 'fprec'.
4679  */
4680 static bool
4681 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4682  struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
4683  int *fprec, uint32 *flags, Node *escontext)
4684 {
4685  FormatNode *format = NULL;
4686  TmFromChar tmfc;
4687  int fmt_len;
4688  char *date_str;
4689  int fmask;
4690  bool incache = false;
4691 
4692  Assert(tm != NULL);
4693  Assert(fsec != NULL);
4694 
4695  date_str = text_to_cstring(date_txt);
4696 
4697  ZERO_tmfc(&tmfc);
4698  ZERO_tm(tm);
4699  *fsec = 0;
4700  tz->has_tz = false;
4701  if (fprec)
4702  *fprec = 0;
4703  if (flags)
4704  *flags = 0;
4705  fmask = 0; /* bit mask for ValidateDate() */
4706 
4707  fmt_len = VARSIZE_ANY_EXHDR(fmt);
4708 
4709  if (fmt_len)
4710  {
4711  char *fmt_str;
4712 
4713  fmt_str = text_to_cstring(fmt);
4714 
4715  if (fmt_len > DCH_CACHE_SIZE)
4716  {
4717  /*
4718  * Allocate new memory if format picture is bigger than static
4719  * cache and do not use cache (call parser always)
4720  */
4721  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4722 
4724  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4725  }
4726  else
4727  {
4728  /*
4729  * Use cache buffers
4730  */
4731  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4732 
4733  incache = true;
4734  format = ent->format;
4735  }
4736 
4737 #ifdef DEBUG_TO_FROM_CHAR
4738  /* dump_node(format, fmt_len); */
4739  /* dump_index(DCH_keywords, DCH_index); */
4740 #endif
4741 
4742  DCH_from_char(format, date_str, &tmfc, collid, std, escontext);
4743  pfree(fmt_str);
4744  if (SOFT_ERROR_OCCURRED(escontext))
4745  goto fail;
4746 
4747  if (flags)
4748  *flags = DCH_datetime_type(format);
4749 
4750  if (!incache)
4751  {
4752  pfree(format);
4753  format = NULL;
4754  }
4755  }
4756 
4757  DEBUG_TMFC(&tmfc);
4758 
4759  /*
4760  * Convert to_date/to_timestamp input fields to standard 'tm'
4761  */
4762  if (tmfc.ssss)
4763  {
4764  int x = tmfc.ssss;
4765 
4766  tm->tm_hour = x / SECS_PER_HOUR;
4767  x %= SECS_PER_HOUR;
4768  tm->tm_min = x / SECS_PER_MINUTE;
4769  x %= SECS_PER_MINUTE;
4770  tm->tm_sec = x;
4771  }
4772 
4773  if (tmfc.ss)
4774  tm->tm_sec = tmfc.ss;
4775  if (tmfc.mi)
4776  tm->tm_min = tmfc.mi;
4777  if (tmfc.hh)
4778  tm->tm_hour = tmfc.hh;
4779 
4780  if (tmfc.clock == CLOCK_12_HOUR)
4781  {
4782  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4783  {
4784  errsave(escontext,
4785  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4786  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4787  tm->tm_hour),
4788  errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
4789  goto fail;
4790  }
4791 
4792  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4793  tm->tm_hour += HOURS_PER_DAY / 2;
4794  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4795  tm->tm_hour = 0;
4796  }
4797 
4798  if (tmfc.year)
4799  {
4800  /*
4801  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4802  * the year in the given century. Keep in mind that the 21st century
4803  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4804  * 600BC to 501BC.
4805  */
4806  if (tmfc.cc && tmfc.yysz <= 2)
4807  {
4808  if (tmfc.bc)
4809  tmfc.cc = -tmfc.cc;
4810  tm->tm_year = tmfc.year % 100;
4811  if (tm->tm_year)
4812  {
4813  if (tmfc.cc >= 0)
4814  tm->tm_year += (tmfc.cc - 1) * 100;
4815  else
4816  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4817  }
4818  else
4819  {
4820  /* find century year for dates ending in "00" */
4821  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4822  }
4823  }
4824  else
4825  {
4826  /* If a 4-digit year is provided, we use that and ignore CC. */
4827  tm->tm_year = tmfc.year;
4828  if (tmfc.bc)
4829  tm->tm_year = -tm->tm_year;
4830  /* correct for our representation of BC years */
4831  if (tm->tm_year < 0)
4832  tm->tm_year++;
4833  }
4834  fmask |= DTK_M(YEAR);
4835  }
4836  else if (tmfc.cc)
4837  {
4838  /* use first year of century */
4839  if (tmfc.bc)
4840  tmfc.cc = -tmfc.cc;
4841  if (tmfc.cc >= 0)
4842  /* +1 because 21st century started in 2001 */
4843  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4844  else
4845  /* +1 because year == 599 is 600 BC */
4846  tm->tm_year = tmfc.cc * 100 + 1;
4847  fmask |= DTK_M(YEAR);
4848  }
4849 
4850  if (tmfc.j)
4851  {
4852  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4853  fmask |= DTK_DATE_M;
4854  }
4855 
4856  if (tmfc.ww)
4857  {
4858  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4859  {
4860  /*
4861  * If tmfc.d is not set, then the date is left at the beginning of
4862  * the ISO week (Monday).
4863  */
4864  if (tmfc.d)
4865  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4866  else
4867  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4868  fmask |= DTK_DATE_M;
4869  }
4870  else
4871  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4872  }
4873 
4874  if (tmfc.w)
4875  tmfc.dd = (tmfc.w - 1) * 7 + 1;
4876  if (tmfc.dd)
4877  {
4878  tm->tm_mday = tmfc.dd;
4879  fmask |= DTK_M(