PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for roman number to standard number conversion
53  * - add support for number spelling
54  * - add support for string to string formatting (we must be better
55  * than Oracle :-),
56  * to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 #include <wctype.h>
73 
74 #ifdef USE_ICU
75 #include <unicode/ustring.h>
76 #endif
77 
78 #include "catalog/pg_collation.h"
79 #include "catalog/pg_type.h"
80 #include "mb/pg_wchar.h"
81 #include "nodes/miscnodes.h"
82 #include "parser/scansup.h"
83 #include "utils/builtins.h"
84 #include "utils/date.h"
85 #include "utils/datetime.h"
86 #include "utils/float.h"
87 #include "utils/formatting.h"
88 #include "utils/memutils.h"
89 #include "utils/numeric.h"
90 #include "utils/pg_locale.h"
91 #include "varatt.h"
92 
93 
94 /* ----------
95  * Routines flags
96  * ----------
97  */
98 #define DCH_FLAG 0x1 /* DATE-TIME flag */
99 #define NUM_FLAG 0x2 /* NUMBER flag */
100 #define STD_FLAG 0x4 /* STANDARD flag */
101 
102 /* ----------
103  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
104  * ----------
105  */
106 #define KeyWord_INDEX_SIZE ('~' - ' ')
107 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
108 
109 /* ----------
110  * Maximal length of one node
111  * ----------
112  */
113 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
114 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
115 
116 
117 /* ----------
118  * Format parser structs
119  * ----------
120  */
121 typedef struct
122 {
123  const char *name; /* suffix string */
124  int len, /* suffix length */
125  id, /* used in node->suffix */
126  type; /* prefix / postfix */
127 } KeySuffix;
128 
129 /* ----------
130  * FromCharDateMode
131  * ----------
132  *
133  * This value is used to nominate one of several distinct (and mutually
134  * exclusive) date conventions that a keyword can belong to.
135  */
136 typedef enum
137 {
138  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
139  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
140  FROM_CHAR_DATE_ISOWEEK, /* ISO 8601 week date */
142 
143 typedef struct
144 {
145  const char *name;
146  int len;
147  int id;
148  bool is_digit;
150 } KeyWord;
151 
152 typedef struct
153 {
154  uint8 type; /* NODE_TYPE_XXX, see below */
155  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
156  uint8 suffix; /* keyword prefix/suffix code, if any */
157  const KeyWord *key; /* if type is ACTION */
158 } FormatNode;
159 
160 #define NODE_TYPE_END 1
161 #define NODE_TYPE_ACTION 2
162 #define NODE_TYPE_CHAR 3
163 #define NODE_TYPE_SEPARATOR 4
164 #define NODE_TYPE_SPACE 5
165 
166 #define SUFFTYPE_PREFIX 1
167 #define SUFFTYPE_POSTFIX 2
168 
169 #define CLOCK_24_HOUR 0
170 #define CLOCK_12_HOUR 1
171 
172 
173 /* ----------
174  * Full months
175  * ----------
176  */
177 static const char *const months_full[] = {
178  "January", "February", "March", "April", "May", "June", "July",
179  "August", "September", "October", "November", "December", NULL
180 };
181 
182 static const char *const days_short[] = {
183  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
184 };
185 
186 /* ----------
187  * AD / BC
188  * ----------
189  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
190  * positive and map year == -1 to year zero, and shift all negative
191  * years up one. For interval years, we just return the year.
192  */
193 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
194 
195 #define A_D_STR "A.D."
196 #define a_d_STR "a.d."
197 #define AD_STR "AD"
198 #define ad_STR "ad"
199 
200 #define B_C_STR "B.C."
201 #define b_c_STR "b.c."
202 #define BC_STR "BC"
203 #define bc_STR "bc"
204 
205 /*
206  * AD / BC strings for seq_search.
207  *
208  * These are given in two variants, a long form with periods and a standard
209  * form without.
210  *
211  * The array is laid out such that matches for AD have an even index, and
212  * matches for BC have an odd index. So the boolean value for BC is given by
213  * taking the array index of the match, modulo 2.
214  */
215 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
216 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
217 
218 /* ----------
219  * AM / PM
220  * ----------
221  */
222 #define A_M_STR "A.M."
223 #define a_m_STR "a.m."
224 #define AM_STR "AM"
225 #define am_STR "am"
226 
227 #define P_M_STR "P.M."
228 #define p_m_STR "p.m."
229 #define PM_STR "PM"
230 #define pm_STR "pm"
231 
232 /*
233  * AM / PM strings for seq_search.
234  *
235  * These are given in two variants, a long form with periods and a standard
236  * form without.
237  *
238  * The array is laid out such that matches for AM have an even index, and
239  * matches for PM have an odd index. So the boolean value for PM is given by
240  * taking the array index of the match, modulo 2.
241  */
242 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
243 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
244 
245 /* ----------
246  * Months in roman-numeral
247  * (Must be in reverse order for seq_search (in FROM_CHAR), because
248  * 'VIII' must have higher precedence than 'V')
249  * ----------
250  */
251 static const char *const rm_months_upper[] =
252 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
253 
254 static const char *const rm_months_lower[] =
255 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
256 
257 /* ----------
258  * Roman numbers
259  * ----------
260  */
261 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
262 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
263 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
264 
265 /* ----------
266  * Ordinal postfixes
267  * ----------
268  */
269 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
270 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
271 
272 /* ----------
273  * Flags & Options:
274  * ----------
275  */
276 #define TH_UPPER 1
277 #define TH_LOWER 2
278 
279 /* ----------
280  * Number description struct
281  * ----------
282  */
283 typedef struct
284 {
285  int pre, /* (count) numbers before decimal */
286  post, /* (count) numbers after decimal */
287  lsign, /* want locales sign */
288  flag, /* number parameters */
289  pre_lsign_num, /* tmp value for lsign */
290  multi, /* multiplier for 'V' */
291  zero_start, /* position of first zero */
292  zero_end, /* position of last zero */
293  need_locale; /* needs it locale */
294 } NUMDesc;
295 
296 /* ----------
297  * Flags for NUMBER version
298  * ----------
299  */
300 #define NUM_F_DECIMAL (1 << 1)
301 #define NUM_F_LDECIMAL (1 << 2)
302 #define NUM_F_ZERO (1 << 3)
303 #define NUM_F_BLANK (1 << 4)
304 #define NUM_F_FILLMODE (1 << 5)
305 #define NUM_F_LSIGN (1 << 6)
306 #define NUM_F_BRACKET (1 << 7)
307 #define NUM_F_MINUS (1 << 8)
308 #define NUM_F_PLUS (1 << 9)
309 #define NUM_F_ROMAN (1 << 10)
310 #define NUM_F_MULTI (1 << 11)
311 #define NUM_F_PLUS_POST (1 << 12)
312 #define NUM_F_MINUS_POST (1 << 13)
313 #define NUM_F_EEEE (1 << 14)
314 
315 #define NUM_LSIGN_PRE (-1)
316 #define NUM_LSIGN_POST 1
317 #define NUM_LSIGN_NONE 0
318 
319 /* ----------
320  * Tests
321  * ----------
322  */
323 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
324 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
325 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
326 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
327 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
328 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
329 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
330 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
331 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
332 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
333 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
334 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
335 
336 /* ----------
337  * Format picture cache
338  *
339  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
340  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
341  *
342  * For simplicity, the cache entries are fixed-size, so they allow for the
343  * worst case of a FormatNode for each byte in the picture string.
344  *
345  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
346  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
347  * we don't waste too much space by palloc'ing them individually. Be sure
348  * to adjust those macros if you add fields to those structs.
349  *
350  * The max number of entries in each cache is DCH_CACHE_ENTRIES
351  * resp. NUM_CACHE_ENTRIES.
352  * ----------
353  */
354 #define DCH_CACHE_OVERHEAD \
355  MAXALIGN(sizeof(bool) + sizeof(int))
356 #define NUM_CACHE_OVERHEAD \
357  MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
358 
359 #define DCH_CACHE_SIZE \
360  ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
361 #define NUM_CACHE_SIZE \
362  ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
363 
364 #define DCH_CACHE_ENTRIES 20
365 #define NUM_CACHE_ENTRIES 20
366 
367 typedef struct
368 {
370  char str[DCH_CACHE_SIZE + 1];
371  bool std;
372  bool valid;
373  int age;
374 } DCHCacheEntry;
375 
376 typedef struct
377 {
379  char str[NUM_CACHE_SIZE + 1];
380  bool valid;
381  int age;
383 } NUMCacheEntry;
384 
385 /* global cache for date/time format pictures */
387 static int n_DCHCache = 0; /* current number of entries */
388 static int DCHCounter = 0; /* aging-event counter */
389 
390 /* global cache for number format pictures */
392 static int n_NUMCache = 0; /* current number of entries */
393 static int NUMCounter = 0; /* aging-event counter */
394 
395 /* ----------
396  * For char->date/time conversion
397  * ----------
398  */
399 typedef struct
400 {
402  int hh,
403  pm,
404  mi,
405  ss,
407  d, /* stored as 1-7, Sunday = 1, 0 means missing */
408  dd,
410  mm,
411  ms,
413  bc,
414  ww,
415  w,
416  cc,
417  j,
418  us,
419  yysz, /* is it YY or YYYY ? */
420  clock, /* 12 or 24 hour clock? */
421  tzsign, /* +1, -1, or 0 if no TZH/TZM fields */
424  ff; /* fractional precision */
425  bool has_tz; /* was there a TZ field? */
426  int gmtoffset; /* GMT offset of fixed-offset zone abbrev */
427  pg_tz *tzp; /* pg_tz for dynamic abbrev */
428  char *abbrev; /* dynamic abbrev */
429 } TmFromChar;
430 
431 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
432 
433 struct fmt_tz /* do_to_timestamp's timezone info output */
434 {
435  bool has_tz; /* was there any TZ/TZH/TZM field? */
436  int gmtoffset; /* GMT offset in seconds */
437 };
438 
439 /* ----------
440  * Debug
441  * ----------
442  */
443 #ifdef DEBUG_TO_FROM_CHAR
444 #define DEBUG_TMFC(_X) \
445  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
446  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
447  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
448  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
449  (_X)->yysz, (_X)->clock)
450 #define DEBUG_TM(_X) \
451  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
452  (_X)->tm_sec, (_X)->tm_year,\
453  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
454  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
455 #else
456 #define DEBUG_TMFC(_X)
457 #define DEBUG_TM(_X)
458 #endif
459 
460 /* ----------
461  * Datetime to char conversion
462  *
463  * To support intervals as well as timestamps, we use a custom "tm" struct
464  * that is almost like struct pg_tm, but has a 64-bit tm_hour field.
465  * We omit the tm_isdst and tm_zone fields, which are not used here.
466  * ----------
467  */
468 struct fmt_tm
469 {
470  int tm_sec;
471  int tm_min;
472  int64 tm_hour;
473  int tm_mday;
474  int tm_mon;
475  int tm_year;
476  int tm_wday;
477  int tm_yday;
478  long int tm_gmtoff;
479 };
480 
481 typedef struct TmToChar
482 {
483  struct fmt_tm tm; /* almost the classic 'tm' struct */
484  fsec_t fsec; /* fractional seconds */
485  const char *tzn; /* timezone */
487 
488 #define tmtcTm(_X) (&(_X)->tm)
489 #define tmtcTzn(_X) ((_X)->tzn)
490 #define tmtcFsec(_X) ((_X)->fsec)
491 
492 /* Note: this is used to copy pg_tm to fmt_tm, so not quite a bitwise copy */
493 #define COPY_tm(_DST, _SRC) \
494 do { \
495  (_DST)->tm_sec = (_SRC)->tm_sec; \
496  (_DST)->tm_min = (_SRC)->tm_min; \
497  (_DST)->tm_hour = (_SRC)->tm_hour; \
498  (_DST)->tm_mday = (_SRC)->tm_mday; \
499  (_DST)->tm_mon = (_SRC)->tm_mon; \
500  (_DST)->tm_year = (_SRC)->tm_year; \
501  (_DST)->tm_wday = (_SRC)->tm_wday; \
502  (_DST)->tm_yday = (_SRC)->tm_yday; \
503  (_DST)->tm_gmtoff = (_SRC)->tm_gmtoff; \
504 } while(0)
505 
506 /* Caution: this is used to zero both pg_tm and fmt_tm structs */
507 #define ZERO_tm(_X) \
508 do { \
509  memset(_X, 0, sizeof(*(_X))); \
510  (_X)->tm_mday = (_X)->tm_mon = 1; \
511 } while(0)
512 
513 #define ZERO_tmtc(_X) \
514 do { \
515  ZERO_tm( tmtcTm(_X) ); \
516  tmtcFsec(_X) = 0; \
517  tmtcTzn(_X) = NULL; \
518 } while(0)
519 
520 /*
521  * to_char(time) appears to to_char() as an interval, so this check
522  * is really for interval and time data types.
523  */
524 #define INVALID_FOR_INTERVAL \
525 do { \
526  if (is_interval) \
527  ereport(ERROR, \
528  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
529  errmsg("invalid format specification for an interval value"), \
530  errhint("Intervals are not tied to specific calendar dates."))); \
531 } while(0)
532 
533 /*****************************************************************************
534  * KeyWord definitions
535  *****************************************************************************/
536 
537 /* ----------
538  * Suffixes (FormatNode.suffix is an OR of these codes)
539  * ----------
540  */
541 #define DCH_S_FM 0x01
542 #define DCH_S_TH 0x02
543 #define DCH_S_th 0x04
544 #define DCH_S_SP 0x08
545 #define DCH_S_TM 0x10
546 
547 /* ----------
548  * Suffix tests
549  * ----------
550  */
551 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
552 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
553 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
554 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
555 
556 /* Oracle toggles FM behavior, we don't; see docs. */
557 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
558 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
559 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
560 
561 /* ----------
562  * Suffixes definition for DATE-TIME TO/FROM CHAR
563  * ----------
564  */
565 #define TM_SUFFIX_LEN 2
566 
567 static const KeySuffix DCH_suff[] = {
568  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
569  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
571  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
572  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
573  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
574  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
575  /* last */
576  {NULL, 0, 0, 0}
577 };
578 
579 
580 /* ----------
581  * Format-pictures (KeyWord).
582  *
583  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
584  * complicated -to-> easy:
585  *
586  * (example: "DDD","DD","Day","D" )
587  *
588  * (this specific sort needs the algorithm for sequential search for strings,
589  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
590  * or "HH12"? You must first try "HH12", because "HH" is in string, but
591  * it is not good.
592  *
593  * (!)
594  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
595  * (!)
596  *
597  * For fast search is used the 'int index[]', index is ascii table from position
598  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
599  * position or -1 if char is not used in the KeyWord. Search example for
600  * string "MM":
601  * 1) see in index to index['M' - 32],
602  * 2) take keywords position (enum DCH_MI) from index
603  * 3) run sequential search in keywords[] from this position
604  *
605  * ----------
606  */
607 
608 typedef enum
609 {
630  DCH_FX, /* global suffix */
722 
723  /* last */
724  _DCH_last_
726 
727 typedef enum
728 {
765 
766  /* last */
767  _NUM_last_
769 
770 /* ----------
771  * KeyWords for DATE-TIME version
772  * ----------
773  */
774 static const KeyWord DCH_keywords[] = {
775 /* name, len, id, is_digit, date_mode */
776  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
777  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
778  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
779  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
780  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
781  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
782  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
783  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
784  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
787  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
788  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
789  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
790  {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
791  {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
792  {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
793  {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
794  {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
795  {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
796  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
797  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
798  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
799  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
800  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
801  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
802  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
803  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
804  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
805  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
806  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
807  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
808  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
809  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
810  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
811  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
812  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
813  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
814  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
815  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
816  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
817  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
818  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
819  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
820  {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
821  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
822  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
823  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
824  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
825  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
826  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
827  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
828  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
829  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
830  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
831  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
832  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
833  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
834  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
835  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
836  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
837  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
838  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
839  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
840  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
841  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
842  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
843  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
844  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
845  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
846  {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
847  {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
848  {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
849  {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
850  {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
851  {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
852  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
853  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
854  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
855  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
856  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
857  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
858  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
859  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
860  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
861  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
862  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
863  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
864  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
865  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
866  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
867  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
868  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
869  {"of", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* o */
870  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
871  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
872  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
873  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
874  {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
875  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
876  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
877  {"tzh", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* t */
878  {"tzm", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
879  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE},
880  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
881  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
882  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
883  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
884  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
885  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
886  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
887  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
888 
889  /* last */
890  {NULL, 0, 0, 0, 0}
891 };
892 
893 /* ----------
894  * KeyWords for NUMBER version
895  *
896  * The is_digit and date_mode fields are not relevant here.
897  * ----------
898  */
899 static const KeyWord NUM_keywords[] = {
900 /* name, len, id is in Index */
901  {",", 1, NUM_COMMA}, /* , */
902  {".", 1, NUM_DEC}, /* . */
903  {"0", 1, NUM_0}, /* 0 */
904  {"9", 1, NUM_9}, /* 9 */
905  {"B", 1, NUM_B}, /* B */
906  {"C", 1, NUM_C}, /* C */
907  {"D", 1, NUM_D}, /* D */
908  {"EEEE", 4, NUM_E}, /* E */
909  {"FM", 2, NUM_FM}, /* F */
910  {"G", 1, NUM_G}, /* G */
911  {"L", 1, NUM_L}, /* L */
912  {"MI", 2, NUM_MI}, /* M */
913  {"PL", 2, NUM_PL}, /* P */
914  {"PR", 2, NUM_PR},
915  {"RN", 2, NUM_RN}, /* R */
916  {"SG", 2, NUM_SG}, /* S */
917  {"SP", 2, NUM_SP},
918  {"S", 1, NUM_S},
919  {"TH", 2, NUM_TH}, /* T */
920  {"V", 1, NUM_V}, /* V */
921  {"b", 1, NUM_B}, /* b */
922  {"c", 1, NUM_C}, /* c */
923  {"d", 1, NUM_D}, /* d */
924  {"eeee", 4, NUM_E}, /* e */
925  {"fm", 2, NUM_FM}, /* f */
926  {"g", 1, NUM_G}, /* g */
927  {"l", 1, NUM_L}, /* l */
928  {"mi", 2, NUM_MI}, /* m */
929  {"pl", 2, NUM_PL}, /* p */
930  {"pr", 2, NUM_PR},
931  {"rn", 2, NUM_rn}, /* r */
932  {"sg", 2, NUM_SG}, /* s */
933  {"sp", 2, NUM_SP},
934  {"s", 1, NUM_S},
935  {"th", 2, NUM_th}, /* t */
936  {"v", 1, NUM_V}, /* v */
937 
938  /* last */
939  {NULL, 0, 0}
940 };
941 
942 
943 /* ----------
944  * KeyWords index for DATE-TIME version
945  * ----------
946  */
947 static const int DCH_index[KeyWord_INDEX_SIZE] = {
948 /*
949 0 1 2 3 4 5 6 7 8 9
950 */
951  /*---- first 0..31 chars are skipped ----*/
952 
953  -1, -1, -1, -1, -1, -1, -1, -1,
954  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
955  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
956  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
957  DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
959  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
960  DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
962  -1, DCH_y_yyy, -1, -1, -1, -1
963 
964  /*---- chars over 126 are skipped ----*/
965 };
966 
967 /* ----------
968  * KeyWords index for NUMBER version
969  * ----------
970  */
971 static const int NUM_index[KeyWord_INDEX_SIZE] = {
972 /*
973 0 1 2 3 4 5 6 7 8 9
974 */
975  /*---- first 0..31 chars are skipped ----*/
976 
977  -1, -1, -1, -1, -1, -1, -1, -1,
978  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
979  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
980  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
981  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
982  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
983  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
984  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
985  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
986  -1, -1, -1, -1, -1, -1
987 
988  /*---- chars over 126 are skipped ----*/
989 };
990 
991 /* ----------
992  * Number processor struct
993  * ----------
994  */
995 typedef struct NUMProc
996 {
998  NUMDesc *Num; /* number description */
999 
1000  int sign, /* '-' or '+' */
1001  sign_wrote, /* was sign write */
1002  num_count, /* number of write digits */
1003  num_in, /* is inside number */
1004  num_curr, /* current position in number */
1005  out_pre_spaces, /* spaces before first digit */
1006 
1007  read_dec, /* to_number - was read dec. point */
1008  read_post, /* to_number - number of dec. digit */
1009  read_pre; /* to_number - number non-dec. digit */
1010 
1011  char *number, /* string with number */
1012  *number_p, /* pointer to current number position */
1013  *inout, /* in / out buffer */
1014  *inout_p, /* pointer to current inout position */
1015  *last_relevant, /* last relevant number after decimal point */
1016 
1017  *L_negative_sign, /* Locale */
1023 
1024 /* Return flags for DCH_from_char() */
1025 #define DCH_DATED 0x01
1026 #define DCH_TIMED 0x02
1027 #define DCH_ZONED 0x04
1028 
1029 /* ----------
1030  * Functions
1031  * ----------
1032  */
1033 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1034  const int *index);
1035 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1036 static bool is_separator_char(const char *str);
1037 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1038 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1039  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1040 
1041 static void DCH_to_char(FormatNode *node, bool is_interval,
1042  TmToChar *in, char *out, Oid collid);
1043 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1044  Oid collid, bool std, Node *escontext);
1045 
1046 #ifdef DEBUG_TO_FROM_CHAR
1047 static void dump_index(const KeyWord *k, const int *index);
1048 static void dump_node(FormatNode *node, int max);
1049 #endif
1050 
1051 static const char *get_th(char *num, int type);
1052 static char *str_numth(char *dest, char *num, int type);
1053 static int adjust_partial_year_to_2020(int year);
1054 static int strspace_len(const char *str);
1055 static bool from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1056  Node *escontext);
1057 static bool from_char_set_int(int *dest, const int value, const FormatNode *node,
1058  Node *escontext);
1059 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1060  FormatNode *node, Node *escontext);
1061 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1062  Node *escontext);
1063 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1064 static int seq_search_localized(const char *name, char **array, int *len,
1065  Oid collid);
1066 static bool from_char_seq_search(int *dest, const char **src,
1067  const char *const *array,
1068  char **localized_array, Oid collid,
1069  FormatNode *node, Node *escontext);
1070 static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1071  struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
1072  int *fprec, uint32 *flags, Node *escontext);
1073 static char *fill_str(char *str, int c, int max);
1074 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1075 static char *int_to_roman(int number);
1076 static void NUM_prepare_locale(NUMProc *Np);
1077 static char *get_last_relevant_decnum(char *num);
1078 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1079 static void NUM_numpart_to_char(NUMProc *Np, int id);
1080 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1081  char *number, int input_len, int to_char_out_pre_spaces,
1082  int sign, bool is_to_char, Oid collid);
1083 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1084 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1085 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1086 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1087 static NUMCacheEntry *NUM_cache_search(const char *str);
1088 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1089 
1090 
1091 /* ----------
1092  * Fast sequential search, use index for data selection which
1093  * go to seq. cycle (it is very fast for unwanted strings)
1094  * (can't be used binary search in format parsing)
1095  * ----------
1096  */
1097 static const KeyWord *
1098 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1099 {
1100  int poz;
1101 
1102  if (!KeyWord_INDEX_FILTER(*str))
1103  return NULL;
1104 
1105  if ((poz = *(index + (*str - ' '))) > -1)
1106  {
1107  const KeyWord *k = kw + poz;
1108 
1109  do
1110  {
1111  if (strncmp(str, k->name, k->len) == 0)
1112  return k;
1113  k++;
1114  if (!k->name)
1115  return NULL;
1116  } while (*str == *k->name);
1117  }
1118  return NULL;
1119 }
1120 
1121 static const KeySuffix *
1122 suff_search(const char *str, const KeySuffix *suf, int type)
1123 {
1124  const KeySuffix *s;
1125 
1126  for (s = suf; s->name != NULL; s++)
1127  {
1128  if (s->type != type)
1129  continue;
1130 
1131  if (strncmp(str, s->name, s->len) == 0)
1132  return s;
1133  }
1134  return NULL;
1135 }
1136 
1137 static bool
1139 {
1140  /* ASCII printable character, but not letter or digit */
1141  return (*str > 0x20 && *str < 0x7F &&
1142  !(*str >= 'A' && *str <= 'Z') &&
1143  !(*str >= 'a' && *str <= 'z') &&
1144  !(*str >= '0' && *str <= '9'));
1145 }
1146 
1147 /* ----------
1148  * Prepare NUMDesc (number description struct) via FormatNode struct
1149  * ----------
1150  */
1151 static void
1153 {
1154  if (n->type != NODE_TYPE_ACTION)
1155  return;
1156 
1157  if (IS_EEEE(num) && n->key->id != NUM_E)
1158  ereport(ERROR,
1159  (errcode(ERRCODE_SYNTAX_ERROR),
1160  errmsg("\"EEEE\" must be the last pattern used")));
1161 
1162  switch (n->key->id)
1163  {
1164  case NUM_9:
1165  if (IS_BRACKET(num))
1166  ereport(ERROR,
1167  (errcode(ERRCODE_SYNTAX_ERROR),
1168  errmsg("\"9\" must be ahead of \"PR\"")));
1169  if (IS_MULTI(num))
1170  {
1171  ++num->multi;
1172  break;
1173  }
1174  if (IS_DECIMAL(num))
1175  ++num->post;
1176  else
1177  ++num->pre;
1178  break;
1179 
1180  case NUM_0:
1181  if (IS_BRACKET(num))
1182  ereport(ERROR,
1183  (errcode(ERRCODE_SYNTAX_ERROR),
1184  errmsg("\"0\" must be ahead of \"PR\"")));
1185  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1186  {
1187  num->flag |= NUM_F_ZERO;
1188  num->zero_start = num->pre + 1;
1189  }
1190  if (!IS_DECIMAL(num))
1191  ++num->pre;
1192  else
1193  ++num->post;
1194 
1195  num->zero_end = num->pre + num->post;
1196  break;
1197 
1198  case NUM_B:
1199  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1200  num->flag |= NUM_F_BLANK;
1201  break;
1202 
1203  case NUM_D:
1204  num->flag |= NUM_F_LDECIMAL;
1205  num->need_locale = true;
1206  /* FALLTHROUGH */
1207  case NUM_DEC:
1208  if (IS_DECIMAL(num))
1209  ereport(ERROR,
1210  (errcode(ERRCODE_SYNTAX_ERROR),
1211  errmsg("multiple decimal points")));
1212  if (IS_MULTI(num))
1213  ereport(ERROR,
1214  (errcode(ERRCODE_SYNTAX_ERROR),
1215  errmsg("cannot use \"V\" and decimal point together")));
1216  num->flag |= NUM_F_DECIMAL;
1217  break;
1218 
1219  case NUM_FM:
1220  num->flag |= NUM_F_FILLMODE;
1221  break;
1222 
1223  case NUM_S:
1224  if (IS_LSIGN(num))
1225  ereport(ERROR,
1226  (errcode(ERRCODE_SYNTAX_ERROR),
1227  errmsg("cannot use \"S\" twice")));
1228  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1229  ereport(ERROR,
1230  (errcode(ERRCODE_SYNTAX_ERROR),
1231  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1232  if (!IS_DECIMAL(num))
1233  {
1234  num->lsign = NUM_LSIGN_PRE;
1235  num->pre_lsign_num = num->pre;
1236  num->need_locale = true;
1237  num->flag |= NUM_F_LSIGN;
1238  }
1239  else if (num->lsign == NUM_LSIGN_NONE)
1240  {
1241  num->lsign = NUM_LSIGN_POST;
1242  num->need_locale = true;
1243  num->flag |= NUM_F_LSIGN;
1244  }
1245  break;
1246 
1247  case NUM_MI:
1248  if (IS_LSIGN(num))
1249  ereport(ERROR,
1250  (errcode(ERRCODE_SYNTAX_ERROR),
1251  errmsg("cannot use \"S\" and \"MI\" together")));
1252  num->flag |= NUM_F_MINUS;
1253  if (IS_DECIMAL(num))
1254  num->flag |= NUM_F_MINUS_POST;
1255  break;
1256 
1257  case NUM_PL:
1258  if (IS_LSIGN(num))
1259  ereport(ERROR,
1260  (errcode(ERRCODE_SYNTAX_ERROR),
1261  errmsg("cannot use \"S\" and \"PL\" together")));
1262  num->flag |= NUM_F_PLUS;
1263  if (IS_DECIMAL(num))
1264  num->flag |= NUM_F_PLUS_POST;
1265  break;
1266 
1267  case NUM_SG:
1268  if (IS_LSIGN(num))
1269  ereport(ERROR,
1270  (errcode(ERRCODE_SYNTAX_ERROR),
1271  errmsg("cannot use \"S\" and \"SG\" together")));
1272  num->flag |= NUM_F_MINUS;
1273  num->flag |= NUM_F_PLUS;
1274  break;
1275 
1276  case NUM_PR:
1277  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1278  ereport(ERROR,
1279  (errcode(ERRCODE_SYNTAX_ERROR),
1280  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1281  num->flag |= NUM_F_BRACKET;
1282  break;
1283 
1284  case NUM_rn:
1285  case NUM_RN:
1286  num->flag |= NUM_F_ROMAN;
1287  break;
1288 
1289  case NUM_L:
1290  case NUM_G:
1291  num->need_locale = true;
1292  break;
1293 
1294  case NUM_V:
1295  if (IS_DECIMAL(num))
1296  ereport(ERROR,
1297  (errcode(ERRCODE_SYNTAX_ERROR),
1298  errmsg("cannot use \"V\" and decimal point together")));
1299  num->flag |= NUM_F_MULTI;
1300  break;
1301 
1302  case NUM_E:
1303  if (IS_EEEE(num))
1304  ereport(ERROR,
1305  (errcode(ERRCODE_SYNTAX_ERROR),
1306  errmsg("cannot use \"EEEE\" twice")));
1307  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1308  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1309  IS_ROMAN(num) || IS_MULTI(num))
1310  ereport(ERROR,
1311  (errcode(ERRCODE_SYNTAX_ERROR),
1312  errmsg("\"EEEE\" is incompatible with other formats"),
1313  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1314  num->flag |= NUM_F_EEEE;
1315  break;
1316  }
1317 }
1318 
1319 /* ----------
1320  * Format parser, search small keywords and keyword's suffixes, and make
1321  * format-node tree.
1322  *
1323  * for DATE-TIME & NUMBER version
1324  * ----------
1325  */
1326 static void
1327 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1328  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1329 {
1330  FormatNode *n;
1331 
1332 #ifdef DEBUG_TO_FROM_CHAR
1333  elog(DEBUG_elog_output, "to_char/number(): run parser");
1334 #endif
1335 
1336  n = node;
1337 
1338  while (*str)
1339  {
1340  int suffix = 0;
1341  const KeySuffix *s;
1342 
1343  /*
1344  * Prefix
1345  */
1346  if ((flags & DCH_FLAG) &&
1347  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1348  {
1349  suffix |= s->id;
1350  if (s->len)
1351  str += s->len;
1352  }
1353 
1354  /*
1355  * Keyword
1356  */
1357  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1358  {
1359  n->type = NODE_TYPE_ACTION;
1360  n->suffix = suffix;
1361  if (n->key->len)
1362  str += n->key->len;
1363 
1364  /*
1365  * NUM version: Prepare global NUMDesc struct
1366  */
1367  if (flags & NUM_FLAG)
1368  NUMDesc_prepare(Num, n);
1369 
1370  /*
1371  * Postfix
1372  */
1373  if ((flags & DCH_FLAG) && *str &&
1374  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1375  {
1376  n->suffix |= s->id;
1377  if (s->len)
1378  str += s->len;
1379  }
1380 
1381  n++;
1382  }
1383  else if (*str)
1384  {
1385  int chlen;
1386 
1387  if ((flags & STD_FLAG) && *str != '"')
1388  {
1389  /*
1390  * Standard mode, allow only following separators: "-./,':; ".
1391  * However, we support double quotes even in standard mode
1392  * (see below). This is our extension of standard mode.
1393  */
1394  if (strchr("-./,':; ", *str) == NULL)
1395  ereport(ERROR,
1396  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1397  errmsg("invalid datetime format separator: \"%s\"",
1398  pnstrdup(str, pg_mblen(str)))));
1399 
1400  if (*str == ' ')
1401  n->type = NODE_TYPE_SPACE;
1402  else
1404 
1405  n->character[0] = *str;
1406  n->character[1] = '\0';
1407  n->key = NULL;
1408  n->suffix = 0;
1409  n++;
1410  str++;
1411  }
1412  else if (*str == '"')
1413  {
1414  /*
1415  * Process double-quoted literal string, if any
1416  */
1417  str++;
1418  while (*str)
1419  {
1420  if (*str == '"')
1421  {
1422  str++;
1423  break;
1424  }
1425  /* backslash quotes the next character, if any */
1426  if (*str == '\\' && *(str + 1))
1427  str++;
1428  chlen = pg_mblen(str);
1429  n->type = NODE_TYPE_CHAR;
1430  memcpy(n->character, str, chlen);
1431  n->character[chlen] = '\0';
1432  n->key = NULL;
1433  n->suffix = 0;
1434  n++;
1435  str += chlen;
1436  }
1437  }
1438  else
1439  {
1440  /*
1441  * Outside double-quoted strings, backslash is only special if
1442  * it immediately precedes a double quote.
1443  */
1444  if (*str == '\\' && *(str + 1) == '"')
1445  str++;
1446  chlen = pg_mblen(str);
1447 
1448  if ((flags & DCH_FLAG) && is_separator_char(str))
1450  else if (isspace((unsigned char) *str))
1451  n->type = NODE_TYPE_SPACE;
1452  else
1453  n->type = NODE_TYPE_CHAR;
1454 
1455  memcpy(n->character, str, chlen);
1456  n->character[chlen] = '\0';
1457  n->key = NULL;
1458  n->suffix = 0;
1459  n++;
1460  str += chlen;
1461  }
1462  }
1463  }
1464 
1465  n->type = NODE_TYPE_END;
1466  n->suffix = 0;
1467 }
1468 
1469 /* ----------
1470  * DEBUG: Dump the FormatNode Tree (debug)
1471  * ----------
1472  */
1473 #ifdef DEBUG_TO_FROM_CHAR
1474 
1475 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1476 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1477 
1478 static void
1479 dump_node(FormatNode *node, int max)
1480 {
1481  FormatNode *n;
1482  int a;
1483 
1484  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1485 
1486  for (a = 0, n = node; a <= max; n++, a++)
1487  {
1488  if (n->type == NODE_TYPE_ACTION)
1489  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1490  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1491  else if (n->type == NODE_TYPE_CHAR)
1492  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1493  a, n->character);
1494  else if (n->type == NODE_TYPE_END)
1495  {
1496  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1497  return;
1498  }
1499  else
1500  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1501  }
1502 }
1503 #endif /* DEBUG */
1504 
1505 /*****************************************************************************
1506  * Private utils
1507  *****************************************************************************/
1508 
1509 /* ----------
1510  * Return ST/ND/RD/TH for simple (1..9) numbers
1511  * type --> 0 upper, 1 lower
1512  * ----------
1513  */
1514 static const char *
1515 get_th(char *num, int type)
1516 {
1517  int len = strlen(num),
1518  last;
1519 
1520  last = *(num + (len - 1));
1521  if (!isdigit((unsigned char) last))
1522  ereport(ERROR,
1523  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1524  errmsg("\"%s\" is not a number", num)));
1525 
1526  /*
1527  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1528  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1529  */
1530  if ((len > 1) && (num[len - 2] == '1'))
1531  last = 0;
1532 
1533  switch (last)
1534  {
1535  case '1':
1536  if (type == TH_UPPER)
1537  return numTH[0];
1538  return numth[0];
1539  case '2':
1540  if (type == TH_UPPER)
1541  return numTH[1];
1542  return numth[1];
1543  case '3':
1544  if (type == TH_UPPER)
1545  return numTH[2];
1546  return numth[2];
1547  default:
1548  if (type == TH_UPPER)
1549  return numTH[3];
1550  return numth[3];
1551  }
1552 }
1553 
1554 /* ----------
1555  * Convert string-number to ordinal string-number
1556  * type --> 0 upper, 1 lower
1557  * ----------
1558  */
1559 static char *
1560 str_numth(char *dest, char *num, int type)
1561 {
1562  if (dest != num)
1563  strcpy(dest, num);
1564  strcat(dest, get_th(num, type));
1565  return dest;
1566 }
1567 
1568 /*****************************************************************************
1569  * upper/lower/initcap functions
1570  *****************************************************************************/
1571 
1572 #ifdef USE_ICU
1573 
1574 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1575  const UChar *src, int32_t srcLength,
1576  const char *locale,
1577  UErrorCode *pErrorCode);
1578 
1579 static int32_t
1580 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1581  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1582 {
1583  UErrorCode status;
1584  int32_t len_dest;
1585 
1586  len_dest = len_source; /* try first with same length */
1587  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1588  status = U_ZERO_ERROR;
1589  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1590  mylocale->info.icu.locale, &status);
1591  if (status == U_BUFFER_OVERFLOW_ERROR)
1592  {
1593  /* try again with adjusted length */
1594  pfree(*buff_dest);
1595  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1596  status = U_ZERO_ERROR;
1597  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1598  mylocale->info.icu.locale, &status);
1599  }
1600  if (U_FAILURE(status))
1601  ereport(ERROR,
1602  (errmsg("case conversion failed: %s", u_errorName(status))));
1603  return len_dest;
1604 }
1605 
1606 static int32_t
1607 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1608  const UChar *src, int32_t srcLength,
1609  const char *locale,
1610  UErrorCode *pErrorCode)
1611 {
1612  return u_strToTitle(dest, destCapacity, src, srcLength,
1613  NULL, locale, pErrorCode);
1614 }
1615 
1616 #endif /* USE_ICU */
1617 
1618 /*
1619  * If the system provides the needed functions for wide-character manipulation
1620  * (which are all standardized by C99), then we implement upper/lower/initcap
1621  * using wide-character functions, if necessary. Otherwise we use the
1622  * traditional <ctype.h> functions, which of course will not work as desired
1623  * in multibyte character sets. Note that in either case we are effectively
1624  * assuming that the database character encoding matches the encoding implied
1625  * by LC_CTYPE.
1626  */
1627 
1628 /*
1629  * collation-aware, wide-character-aware lower function
1630  *
1631  * We pass the number of bytes so we can pass varlena and char*
1632  * to this function. The result is a palloc'd, null-terminated string.
1633  */
1634 char *
1635 str_tolower(const char *buff, size_t nbytes, Oid collid)
1636 {
1637  char *result;
1638 
1639  if (!buff)
1640  return NULL;
1641 
1642  if (!OidIsValid(collid))
1643  {
1644  /*
1645  * This typically means that the parser could not resolve a conflict
1646  * of implicit collations, so report it that way.
1647  */
1648  ereport(ERROR,
1649  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1650  errmsg("could not determine which collation to use for %s function",
1651  "lower()"),
1652  errhint("Use the COLLATE clause to set the collation explicitly.")));
1653  }
1654 
1655  /* C/POSIX collations use this path regardless of database encoding */
1656  if (lc_ctype_is_c(collid))
1657  {
1658  result = asc_tolower(buff, nbytes);
1659  }
1660  else
1661  {
1662  pg_locale_t mylocale;
1663 
1664  mylocale = pg_newlocale_from_collation(collid);
1665 
1666 #ifdef USE_ICU
1667  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1668  {
1669  int32_t len_uchar;
1670  int32_t len_conv;
1671  UChar *buff_uchar;
1672  UChar *buff_conv;
1673 
1674  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1675  len_conv = icu_convert_case(u_strToLower, mylocale,
1676  &buff_conv, buff_uchar, len_uchar);
1677  icu_from_uchar(&result, buff_conv, len_conv);
1678  pfree(buff_uchar);
1679  pfree(buff_conv);
1680  }
1681  else
1682 #endif
1683  {
1685  {
1686  wchar_t *workspace;
1687  size_t curr_char;
1688  size_t result_size;
1689 
1690  /* Overflow paranoia */
1691  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1692  ereport(ERROR,
1693  (errcode(ERRCODE_OUT_OF_MEMORY),
1694  errmsg("out of memory")));
1695 
1696  /* Output workspace cannot have more codes than input bytes */
1697  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1698 
1699  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1700 
1701  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1702  {
1703  if (mylocale)
1704  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1705  else
1706  workspace[curr_char] = towlower(workspace[curr_char]);
1707  }
1708 
1709  /*
1710  * Make result large enough; case change might change number
1711  * of bytes
1712  */
1713  result_size = curr_char * pg_database_encoding_max_length() + 1;
1714  result = palloc(result_size);
1715 
1716  wchar2char(result, workspace, result_size, mylocale);
1717  pfree(workspace);
1718  }
1719  else
1720  {
1721  char *p;
1722 
1723  result = pnstrdup(buff, nbytes);
1724 
1725  /*
1726  * Note: we assume that tolower_l() will not be so broken as
1727  * to need an isupper_l() guard test. When using the default
1728  * collation, we apply the traditional Postgres behavior that
1729  * forces ASCII-style treatment of I/i, but in non-default
1730  * collations you get exactly what the collation says.
1731  */
1732  for (p = result; *p; p++)
1733  {
1734  if (mylocale)
1735  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1736  else
1737  *p = pg_tolower((unsigned char) *p);
1738  }
1739  }
1740  }
1741  }
1742 
1743  return result;
1744 }
1745 
1746 /*
1747  * collation-aware, wide-character-aware upper function
1748  *
1749  * We pass the number of bytes so we can pass varlena and char*
1750  * to this function. The result is a palloc'd, null-terminated string.
1751  */
1752 char *
1753 str_toupper(const char *buff, size_t nbytes, Oid collid)
1754 {
1755  char *result;
1756 
1757  if (!buff)
1758  return NULL;
1759 
1760  if (!OidIsValid(collid))
1761  {
1762  /*
1763  * This typically means that the parser could not resolve a conflict
1764  * of implicit collations, so report it that way.
1765  */
1766  ereport(ERROR,
1767  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1768  errmsg("could not determine which collation to use for %s function",
1769  "upper()"),
1770  errhint("Use the COLLATE clause to set the collation explicitly.")));
1771  }
1772 
1773  /* C/POSIX collations use this path regardless of database encoding */
1774  if (lc_ctype_is_c(collid))
1775  {
1776  result = asc_toupper(buff, nbytes);
1777  }
1778  else
1779  {
1780  pg_locale_t mylocale;
1781 
1782  mylocale = pg_newlocale_from_collation(collid);
1783 
1784 #ifdef USE_ICU
1785  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1786  {
1787  int32_t len_uchar,
1788  len_conv;
1789  UChar *buff_uchar;
1790  UChar *buff_conv;
1791 
1792  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1793  len_conv = icu_convert_case(u_strToUpper, mylocale,
1794  &buff_conv, buff_uchar, len_uchar);
1795  icu_from_uchar(&result, buff_conv, len_conv);
1796  pfree(buff_uchar);
1797  pfree(buff_conv);
1798  }
1799  else
1800 #endif
1801  {
1803  {
1804  wchar_t *workspace;
1805  size_t curr_char;
1806  size_t result_size;
1807 
1808  /* Overflow paranoia */
1809  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1810  ereport(ERROR,
1811  (errcode(ERRCODE_OUT_OF_MEMORY),
1812  errmsg("out of memory")));
1813 
1814  /* Output workspace cannot have more codes than input bytes */
1815  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1816 
1817  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1818 
1819  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1820  {
1821  if (mylocale)
1822  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1823  else
1824  workspace[curr_char] = towupper(workspace[curr_char]);
1825  }
1826 
1827  /*
1828  * Make result large enough; case change might change number
1829  * of bytes
1830  */
1831  result_size = curr_char * pg_database_encoding_max_length() + 1;
1832  result = palloc(result_size);
1833 
1834  wchar2char(result, workspace, result_size, mylocale);
1835  pfree(workspace);
1836  }
1837  else
1838  {
1839  char *p;
1840 
1841  result = pnstrdup(buff, nbytes);
1842 
1843  /*
1844  * Note: we assume that toupper_l() will not be so broken as
1845  * to need an islower_l() guard test. When using the default
1846  * collation, we apply the traditional Postgres behavior that
1847  * forces ASCII-style treatment of I/i, but in non-default
1848  * collations you get exactly what the collation says.
1849  */
1850  for (p = result; *p; p++)
1851  {
1852  if (mylocale)
1853  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1854  else
1855  *p = pg_toupper((unsigned char) *p);
1856  }
1857  }
1858  }
1859  }
1860 
1861  return result;
1862 }
1863 
1864 /*
1865  * collation-aware, wide-character-aware initcap function
1866  *
1867  * We pass the number of bytes so we can pass varlena and char*
1868  * to this function. The result is a palloc'd, null-terminated string.
1869  */
1870 char *
1871 str_initcap(const char *buff, size_t nbytes, Oid collid)
1872 {
1873  char *result;
1874  int wasalnum = false;
1875 
1876  if (!buff)
1877  return NULL;
1878 
1879  if (!OidIsValid(collid))
1880  {
1881  /*
1882  * This typically means that the parser could not resolve a conflict
1883  * of implicit collations, so report it that way.
1884  */
1885  ereport(ERROR,
1886  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1887  errmsg("could not determine which collation to use for %s function",
1888  "initcap()"),
1889  errhint("Use the COLLATE clause to set the collation explicitly.")));
1890  }
1891 
1892  /* C/POSIX collations use this path regardless of database encoding */
1893  if (lc_ctype_is_c(collid))
1894  {
1895  result = asc_initcap(buff, nbytes);
1896  }
1897  else
1898  {
1899  pg_locale_t mylocale;
1900 
1901  mylocale = pg_newlocale_from_collation(collid);
1902 
1903 #ifdef USE_ICU
1904  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1905  {
1906  int32_t len_uchar,
1907  len_conv;
1908  UChar *buff_uchar;
1909  UChar *buff_conv;
1910 
1911  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1912  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1913  &buff_conv, buff_uchar, len_uchar);
1914  icu_from_uchar(&result, buff_conv, len_conv);
1915  pfree(buff_uchar);
1916  pfree(buff_conv);
1917  }
1918  else
1919 #endif
1920  {
1922  {
1923  wchar_t *workspace;
1924  size_t curr_char;
1925  size_t result_size;
1926 
1927  /* Overflow paranoia */
1928  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1929  ereport(ERROR,
1930  (errcode(ERRCODE_OUT_OF_MEMORY),
1931  errmsg("out of memory")));
1932 
1933  /* Output workspace cannot have more codes than input bytes */
1934  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1935 
1936  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1937 
1938  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1939  {
1940  if (mylocale)
1941  {
1942  if (wasalnum)
1943  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1944  else
1945  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1946  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1947  }
1948  else
1949  {
1950  if (wasalnum)
1951  workspace[curr_char] = towlower(workspace[curr_char]);
1952  else
1953  workspace[curr_char] = towupper(workspace[curr_char]);
1954  wasalnum = iswalnum(workspace[curr_char]);
1955  }
1956  }
1957 
1958  /*
1959  * Make result large enough; case change might change number
1960  * of bytes
1961  */
1962  result_size = curr_char * pg_database_encoding_max_length() + 1;
1963  result = palloc(result_size);
1964 
1965  wchar2char(result, workspace, result_size, mylocale);
1966  pfree(workspace);
1967  }
1968  else
1969  {
1970  char *p;
1971 
1972  result = pnstrdup(buff, nbytes);
1973 
1974  /*
1975  * Note: we assume that toupper_l()/tolower_l() will not be so
1976  * broken as to need guard tests. When using the default
1977  * collation, we apply the traditional Postgres behavior that
1978  * forces ASCII-style treatment of I/i, but in non-default
1979  * collations you get exactly what the collation says.
1980  */
1981  for (p = result; *p; p++)
1982  {
1983  if (mylocale)
1984  {
1985  if (wasalnum)
1986  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1987  else
1988  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1989  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
1990  }
1991  else
1992  {
1993  if (wasalnum)
1994  *p = pg_tolower((unsigned char) *p);
1995  else
1996  *p = pg_toupper((unsigned char) *p);
1997  wasalnum = isalnum((unsigned char) *p);
1998  }
1999  }
2000  }
2001  }
2002  }
2003 
2004  return result;
2005 }
2006 
2007 /*
2008  * ASCII-only lower function
2009  *
2010  * We pass the number of bytes so we can pass varlena and char*
2011  * to this function. The result is a palloc'd, null-terminated string.
2012  */
2013 char *
2014 asc_tolower(const char *buff, size_t nbytes)
2015 {
2016  char *result;
2017  char *p;
2018 
2019  if (!buff)
2020  return NULL;
2021 
2022  result = pnstrdup(buff, nbytes);
2023 
2024  for (p = result; *p; p++)
2025  *p = pg_ascii_tolower((unsigned char) *p);
2026 
2027  return result;
2028 }
2029 
2030 /*
2031  * ASCII-only upper function
2032  *
2033  * We pass the number of bytes so we can pass varlena and char*
2034  * to this function. The result is a palloc'd, null-terminated string.
2035  */
2036 char *
2037 asc_toupper(const char *buff, size_t nbytes)
2038 {
2039  char *result;
2040  char *p;
2041 
2042  if (!buff)
2043  return NULL;
2044 
2045  result = pnstrdup(buff, nbytes);
2046 
2047  for (p = result; *p; p++)
2048  *p = pg_ascii_toupper((unsigned char) *p);
2049 
2050  return result;
2051 }
2052 
2053 /*
2054  * ASCII-only initcap function
2055  *
2056  * We pass the number of bytes so we can pass varlena and char*
2057  * to this function. The result is a palloc'd, null-terminated string.
2058  */
2059 char *
2060 asc_initcap(const char *buff, size_t nbytes)
2061 {
2062  char *result;
2063  char *p;
2064  int wasalnum = false;
2065 
2066  if (!buff)
2067  return NULL;
2068 
2069  result = pnstrdup(buff, nbytes);
2070 
2071  for (p = result; *p; p++)
2072  {
2073  char c;
2074 
2075  if (wasalnum)
2076  *p = c = pg_ascii_tolower((unsigned char) *p);
2077  else
2078  *p = c = pg_ascii_toupper((unsigned char) *p);
2079  /* we don't trust isalnum() here */
2080  wasalnum = ((c >= 'A' && c <= 'Z') ||
2081  (c >= 'a' && c <= 'z') ||
2082  (c >= '0' && c <= '9'));
2083  }
2084 
2085  return result;
2086 }
2087 
2088 /* convenience routines for when the input is null-terminated */
2089 
2090 static char *
2091 str_tolower_z(const char *buff, Oid collid)
2092 {
2093  return str_tolower(buff, strlen(buff), collid);
2094 }
2095 
2096 static char *
2097 str_toupper_z(const char *buff, Oid collid)
2098 {
2099  return str_toupper(buff, strlen(buff), collid);
2100 }
2101 
2102 static char *
2103 str_initcap_z(const char *buff, Oid collid)
2104 {
2105  return str_initcap(buff, strlen(buff), collid);
2106 }
2107 
2108 static char *
2109 asc_tolower_z(const char *buff)
2110 {
2111  return asc_tolower(buff, strlen(buff));
2112 }
2113 
2114 static char *
2115 asc_toupper_z(const char *buff)
2116 {
2117  return asc_toupper(buff, strlen(buff));
2118 }
2119 
2120 /* asc_initcap_z is not currently needed */
2121 
2122 
2123 /* ----------
2124  * Skip TM / th in FROM_CHAR
2125  *
2126  * If S_THth is on, skip two chars, assuming there are two available
2127  * ----------
2128  */
2129 #define SKIP_THth(ptr, _suf) \
2130  do { \
2131  if (S_THth(_suf)) \
2132  { \
2133  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2134  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2135  } \
2136  } while (0)
2137 
2138 
2139 #ifdef DEBUG_TO_FROM_CHAR
2140 /* -----------
2141  * DEBUG: Call for debug and for index checking; (Show ASCII char
2142  * and defined keyword for each used position
2143  * ----------
2144  */
2145 static void
2146 dump_index(const KeyWord *k, const int *index)
2147 {
2148  int i,
2149  count = 0,
2150  free_i = 0;
2151 
2152  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2153 
2154  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2155  {
2156  if (index[i] != -1)
2157  {
2158  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2159  count++;
2160  }
2161  else
2162  {
2163  free_i++;
2164  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2165  }
2166  }
2167  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2168  count, free_i);
2169 }
2170 #endif /* DEBUG */
2171 
2172 /* ----------
2173  * Return true if next format picture is not digit value
2174  * ----------
2175  */
2176 static bool
2178 {
2179  if (n->type == NODE_TYPE_END)
2180  return false;
2181 
2182  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2183  return true;
2184 
2185  /*
2186  * Next node
2187  */
2188  n++;
2189 
2190  /* end of format string is treated like a non-digit separator */
2191  if (n->type == NODE_TYPE_END)
2192  return true;
2193 
2194  if (n->type == NODE_TYPE_ACTION)
2195  {
2196  if (n->key->is_digit)
2197  return false;
2198 
2199  return true;
2200  }
2201  else if (n->character[1] == '\0' &&
2202  isdigit((unsigned char) n->character[0]))
2203  return false;
2204 
2205  return true; /* some non-digit input (separator) */
2206 }
2207 
2208 
2209 static int
2211 {
2212  /*
2213  * Adjust all dates toward 2020; this is effectively what happens when we
2214  * assume '70' is 1970 and '69' is 2069.
2215  */
2216  /* Force 0-69 into the 2000's */
2217  if (year < 70)
2218  return year + 2000;
2219  /* Force 70-99 into the 1900's */
2220  else if (year < 100)
2221  return year + 1900;
2222  /* Force 100-519 into the 2000's */
2223  else if (year < 520)
2224  return year + 2000;
2225  /* Force 520-999 into the 1000's */
2226  else if (year < 1000)
2227  return year + 1000;
2228  else
2229  return year;
2230 }
2231 
2232 
2233 static int
2234 strspace_len(const char *str)
2235 {
2236  int len = 0;
2237 
2238  while (*str && isspace((unsigned char) *str))
2239  {
2240  str++;
2241  len++;
2242  }
2243  return len;
2244 }
2245 
2246 /*
2247  * Set the date mode of a from-char conversion.
2248  *
2249  * Puke if the date mode has already been set, and the caller attempts to set
2250  * it to a conflicting mode.
2251  *
2252  * Returns true on success, false on failure (if escontext points to an
2253  * ErrorSaveContext; otherwise errors are thrown).
2254  */
2255 static bool
2257  Node *escontext)
2258 {
2259  if (mode != FROM_CHAR_DATE_NONE)
2260  {
2261  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2262  tmfc->mode = mode;
2263  else if (tmfc->mode != mode)
2264  ereturn(escontext, false,
2265  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2266  errmsg("invalid combination of date conventions"),
2267  errhint("Do not mix Gregorian and ISO week date "
2268  "conventions in a formatting template.")));
2269  }
2270  return true;
2271 }
2272 
2273 /*
2274  * Set the integer pointed to by 'dest' to the given value.
2275  *
2276  * Puke if the destination integer has previously been set to some other
2277  * non-zero value.
2278  *
2279  * Returns true on success, false on failure (if escontext points to an
2280  * ErrorSaveContext; otherwise errors are thrown).
2281  */
2282 static bool
2283 from_char_set_int(int *dest, const int value, const FormatNode *node,
2284  Node *escontext)
2285 {
2286  if (*dest != 0 && *dest != value)
2287  ereturn(escontext, false,
2288  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2289  errmsg("conflicting values for \"%s\" field in formatting string",
2290  node->key->name),
2291  errdetail("This value contradicts a previous setting "
2292  "for the same field type.")));
2293  *dest = value;
2294  return true;
2295 }
2296 
2297 /*
2298  * Read a single integer from the source string, into the int pointed to by
2299  * 'dest'. If 'dest' is NULL, the result is discarded.
2300  *
2301  * In fixed-width mode (the node does not have the FM suffix), consume at most
2302  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2303  *
2304  * We use strtol() to recover the integer value from the source string, in
2305  * accordance with the given FormatNode.
2306  *
2307  * If the conversion completes successfully, src will have been advanced to
2308  * point at the character immediately following the last character used in the
2309  * conversion.
2310  *
2311  * Returns the number of characters consumed, or -1 on error (if escontext
2312  * points to an ErrorSaveContext; otherwise errors are thrown).
2313  *
2314  * Note that from_char_parse_int() provides a more convenient wrapper where
2315  * the length of the field is the same as the length of the format keyword (as
2316  * with DD and MI).
2317  */
2318 static int
2319 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2320  Node *escontext)
2321 {
2322  long result;
2323  char copy[DCH_MAX_ITEM_SIZ + 1];
2324  const char *init = *src;
2325  int used;
2326 
2327  /*
2328  * Skip any whitespace before parsing the integer.
2329  */
2330  *src += strspace_len(*src);
2331 
2333  used = (int) strlcpy(copy, *src, len + 1);
2334 
2335  if (S_FM(node->suffix) || is_next_separator(node))
2336  {
2337  /*
2338  * This node is in Fill Mode, or the next node is known to be a
2339  * non-digit value, so we just slurp as many characters as we can get.
2340  */
2341  char *endptr;
2342 
2343  errno = 0;
2344  result = strtol(init, &endptr, 10);
2345  *src = endptr;
2346  }
2347  else
2348  {
2349  /*
2350  * We need to pull exactly the number of characters given in 'len' out
2351  * of the string, and convert those.
2352  */
2353  char *last;
2354 
2355  if (used < len)
2356  ereturn(escontext, -1,
2357  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2358  errmsg("source string too short for \"%s\" formatting field",
2359  node->key->name),
2360  errdetail("Field requires %d characters, but only %d remain.",
2361  len, used),
2362  errhint("If your source string is not fixed-width, "
2363  "try using the \"FM\" modifier.")));
2364 
2365  errno = 0;
2366  result = strtol(copy, &last, 10);
2367  used = last - copy;
2368 
2369  if (used > 0 && used < len)
2370  ereturn(escontext, -1,
2371  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2372  errmsg("invalid value \"%s\" for \"%s\"",
2373  copy, node->key->name),
2374  errdetail("Field requires %d characters, but only %d could be parsed.",
2375  len, used),
2376  errhint("If your source string is not fixed-width, "
2377  "try using the \"FM\" modifier.")));
2378 
2379  *src += used;
2380  }
2381 
2382  if (*src == init)
2383  ereturn(escontext, -1,
2384  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2385  errmsg("invalid value \"%s\" for \"%s\"",
2386  copy, node->key->name),
2387  errdetail("Value must be an integer.")));
2388 
2389  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2390  ereturn(escontext, -1,
2391  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2392  errmsg("value for \"%s\" in source string is out of range",
2393  node->key->name),
2394  errdetail("Value must be in the range %d to %d.",
2395  INT_MIN, INT_MAX)));
2396 
2397  if (dest != NULL)
2398  {
2399  if (!from_char_set_int(dest, (int) result, node, escontext))
2400  return -1;
2401  }
2402 
2403  return *src - init;
2404 }
2405 
2406 /*
2407  * Call from_char_parse_int_len(), using the length of the format keyword as
2408  * the expected length of the field.
2409  *
2410  * Don't call this function if the field differs in length from the format
2411  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2412  * In such cases, call from_char_parse_int_len() instead to specify the
2413  * required length explicitly.
2414  */
2415 static int
2416 from_char_parse_int(int *dest, const char **src, FormatNode *node,
2417  Node *escontext)
2418 {
2419  return from_char_parse_int_len(dest, src, node->key->len, node, escontext);
2420 }
2421 
2422 /*
2423  * Sequentially search null-terminated "array" for a case-insensitive match
2424  * to the initial character(s) of "name".
2425  *
2426  * Returns array index of match, or -1 for no match.
2427  *
2428  * *len is set to the length of the match, or 0 for no match.
2429  *
2430  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2431  * suitable for comparisons to ASCII strings.
2432  */
2433 static int
2434 seq_search_ascii(const char *name, const char *const *array, int *len)
2435 {
2436  unsigned char firstc;
2437  const char *const *a;
2438 
2439  *len = 0;
2440 
2441  /* empty string can't match anything */
2442  if (!*name)
2443  return -1;
2444 
2445  /* we handle first char specially to gain some speed */
2446  firstc = pg_ascii_tolower((unsigned char) *name);
2447 
2448  for (a = array; *a != NULL; a++)
2449  {
2450  const char *p;
2451  const char *n;
2452 
2453  /* compare first chars */
2454  if (pg_ascii_tolower((unsigned char) **a) != firstc)
2455  continue;
2456 
2457  /* compare rest of string */
2458  for (p = *a + 1, n = name + 1;; p++, n++)
2459  {
2460  /* return success if we matched whole array entry */
2461  if (*p == '\0')
2462  {
2463  *len = n - name;
2464  return a - array;
2465  }
2466  /* else, must have another character in "name" ... */
2467  if (*n == '\0')
2468  break;
2469  /* ... and it must match */
2470  if (pg_ascii_tolower((unsigned char) *p) !=
2471  pg_ascii_tolower((unsigned char) *n))
2472  break;
2473  }
2474  }
2475 
2476  return -1;
2477 }
2478 
2479 /*
2480  * Sequentially search an array of possibly non-English words for
2481  * a case-insensitive match to the initial character(s) of "name".
2482  *
2483  * This has the same API as seq_search_ascii(), but we use a more general
2484  * case-folding transformation to achieve case-insensitivity. Case folding
2485  * is done per the rules of the collation identified by "collid".
2486  *
2487  * The array is treated as const, but we don't declare it that way because
2488  * the arrays exported by pg_locale.c aren't const.
2489  */
2490 static int
2491 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2492 {
2493  char **a;
2494  char *upper_name;
2495  char *lower_name;
2496 
2497  *len = 0;
2498 
2499  /* empty string can't match anything */
2500  if (!*name)
2501  return -1;
2502 
2503  /*
2504  * The case-folding processing done below is fairly expensive, so before
2505  * doing that, make a quick pass to see if there is an exact match.
2506  */
2507  for (a = array; *a != NULL; a++)
2508  {
2509  int element_len = strlen(*a);
2510 
2511  if (strncmp(name, *a, element_len) == 0)
2512  {
2513  *len = element_len;
2514  return a - array;
2515  }
2516  }
2517 
2518  /*
2519  * Fold to upper case, then to lower case, so that we can match reliably
2520  * even in languages in which case conversions are not injective.
2521  */
2522  upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2523  lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2524  pfree(upper_name);
2525 
2526  for (a = array; *a != NULL; a++)
2527  {
2528  char *upper_element;
2529  char *lower_element;
2530  int element_len;
2531 
2532  /* Likewise upper/lower-case array element */
2533  upper_element = str_toupper(*a, strlen(*a), collid);
2534  lower_element = str_tolower(upper_element, strlen(upper_element),
2535  collid);
2536  pfree(upper_element);
2537  element_len = strlen(lower_element);
2538 
2539  /* Match? */
2540  if (strncmp(lower_name, lower_element, element_len) == 0)
2541  {
2542  *len = element_len;
2543  pfree(lower_element);
2544  pfree(lower_name);
2545  return a - array;
2546  }
2547  pfree(lower_element);
2548  }
2549 
2550  pfree(lower_name);
2551  return -1;
2552 }
2553 
2554 /*
2555  * Perform a sequential search in 'array' (or 'localized_array', if that's
2556  * not NULL) for an entry matching the first character(s) of the 'src'
2557  * string case-insensitively.
2558  *
2559  * The 'array' is presumed to be English words (all-ASCII), but
2560  * if 'localized_array' is supplied, that might be non-English
2561  * so we need a more expensive case-folding transformation
2562  * (which will follow the rules of the collation 'collid').
2563  *
2564  * If a match is found, copy the array index of the match into the integer
2565  * pointed to by 'dest' and advance 'src' to the end of the part of the string
2566  * which matched.
2567  *
2568  * Returns true on match, false on failure (if escontext points to an
2569  * ErrorSaveContext; otherwise errors are thrown).
2570  *
2571  * 'node' is used only for error reports: node->key->name identifies the
2572  * field type we were searching for.
2573  */
2574 static bool
2575 from_char_seq_search(int *dest, const char **src, const char *const *array,
2576  char **localized_array, Oid collid,
2577  FormatNode *node, Node *escontext)
2578 {
2579  int len;
2580 
2581  if (localized_array == NULL)
2582  *dest = seq_search_ascii(*src, array, &len);
2583  else
2584  *dest = seq_search_localized(*src, localized_array, &len, collid);
2585 
2586  if (len <= 0)
2587  {
2588  /*
2589  * In the error report, truncate the string at the next whitespace (if
2590  * any) to avoid including irrelevant data.
2591  */
2592  char *copy = pstrdup(*src);
2593  char *c;
2594 
2595  for (c = copy; *c; c++)
2596  {
2597  if (scanner_isspace(*c))
2598  {
2599  *c = '\0';
2600  break;
2601  }
2602  }
2603 
2604  ereturn(escontext, false,
2605  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2606  errmsg("invalid value \"%s\" for \"%s\"",
2607  copy, node->key->name),
2608  errdetail("The given value did not match any of "
2609  "the allowed values for this field.")));
2610  }
2611  *src += len;
2612  return true;
2613 }
2614 
2615 /* ----------
2616  * Process a TmToChar struct as denoted by a list of FormatNodes.
2617  * The formatted data is written to the string pointed to by 'out'.
2618  * ----------
2619  */
2620 static void
2621 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2622 {
2623  FormatNode *n;
2624  char *s;
2625  struct fmt_tm *tm = &in->tm;
2626  int i;
2627 
2628  /* cache localized days and months */
2630 
2631  s = out;
2632  for (n = node; n->type != NODE_TYPE_END; n++)
2633  {
2634  if (n->type != NODE_TYPE_ACTION)
2635  {
2636  strcpy(s, n->character);
2637  s += strlen(s);
2638  continue;
2639  }
2640 
2641  switch (n->key->id)
2642  {
2643  case DCH_A_M:
2644  case DCH_P_M:
2645  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2646  ? P_M_STR : A_M_STR);
2647  s += strlen(s);
2648  break;
2649  case DCH_AM:
2650  case DCH_PM:
2651  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2652  ? PM_STR : AM_STR);
2653  s += strlen(s);
2654  break;
2655  case DCH_a_m:
2656  case DCH_p_m:
2657  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2658  ? p_m_STR : a_m_STR);
2659  s += strlen(s);
2660  break;
2661  case DCH_am:
2662  case DCH_pm:
2663  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2664  ? pm_STR : am_STR);
2665  s += strlen(s);
2666  break;
2667  case DCH_HH:
2668  case DCH_HH12:
2669 
2670  /*
2671  * display time as shown on a 12-hour clock, even for
2672  * intervals
2673  */
2674  sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2675  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ?
2676  (long long) (HOURS_PER_DAY / 2) :
2677  (long long) (tm->tm_hour % (HOURS_PER_DAY / 2)));
2678  if (S_THth(n->suffix))
2679  str_numth(s, s, S_TH_TYPE(n->suffix));
2680  s += strlen(s);
2681  break;
2682  case DCH_HH24:
2683  sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2684  (long long) tm->tm_hour);
2685  if (S_THth(n->suffix))
2686  str_numth(s, s, S_TH_TYPE(n->suffix));
2687  s += strlen(s);
2688  break;
2689  case DCH_MI:
2690  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2691  tm->tm_min);
2692  if (S_THth(n->suffix))
2693  str_numth(s, s, S_TH_TYPE(n->suffix));
2694  s += strlen(s);
2695  break;
2696  case DCH_SS:
2697  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2698  tm->tm_sec);
2699  if (S_THth(n->suffix))
2700  str_numth(s, s, S_TH_TYPE(n->suffix));
2701  s += strlen(s);
2702  break;
2703 
2704 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2705  sprintf(s, frac_fmt, (int) (frac_val)); \
2706  if (S_THth(n->suffix)) \
2707  str_numth(s, s, S_TH_TYPE(n->suffix)); \
2708  s += strlen(s)
2709 
2710  case DCH_FF1: /* tenth of second */
2711  DCH_to_char_fsec("%01d", in->fsec / 100000);
2712  break;
2713  case DCH_FF2: /* hundredth of second */
2714  DCH_to_char_fsec("%02d", in->fsec / 10000);
2715  break;
2716  case DCH_FF3:
2717  case DCH_MS: /* millisecond */
2718  DCH_to_char_fsec("%03d", in->fsec / 1000);
2719  break;
2720  case DCH_FF4: /* tenth of a millisecond */
2721  DCH_to_char_fsec("%04d", in->fsec / 100);
2722  break;
2723  case DCH_FF5: /* hundredth of a millisecond */
2724  DCH_to_char_fsec("%05d", in->fsec / 10);
2725  break;
2726  case DCH_FF6:
2727  case DCH_US: /* microsecond */
2728  DCH_to_char_fsec("%06d", in->fsec);
2729  break;
2730 #undef DCH_to_char_fsec
2731  case DCH_SSSS:
2732  sprintf(s, "%lld",
2733  (long long) (tm->tm_hour * SECS_PER_HOUR +
2735  tm->tm_sec));
2736  if (S_THth(n->suffix))
2737  str_numth(s, s, S_TH_TYPE(n->suffix));
2738  s += strlen(s);
2739  break;
2740  case DCH_tz:
2742  if (tmtcTzn(in))
2743  {
2744  /* We assume here that timezone names aren't localized */
2745  char *p = asc_tolower_z(tmtcTzn(in));
2746 
2747  strcpy(s, p);
2748  pfree(p);
2749  s += strlen(s);
2750  }
2751  break;
2752  case DCH_TZ:
2754  if (tmtcTzn(in))
2755  {
2756  strcpy(s, tmtcTzn(in));
2757  s += strlen(s);
2758  }
2759  break;
2760  case DCH_TZH:
2762  sprintf(s, "%c%02d",
2763  (tm->tm_gmtoff >= 0) ? '+' : '-',
2764  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2765  s += strlen(s);
2766  break;
2767  case DCH_TZM:
2769  sprintf(s, "%02d",
2770  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2771  s += strlen(s);
2772  break;
2773  case DCH_OF:
2775  sprintf(s, "%c%0*d",
2776  (tm->tm_gmtoff >= 0) ? '+' : '-',
2777  S_FM(n->suffix) ? 0 : 2,
2778  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2779  s += strlen(s);
2780  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2781  {
2782  sprintf(s, ":%02d",
2783  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2784  s += strlen(s);
2785  }
2786  break;
2787  case DCH_A_D:
2788  case DCH_B_C:
2790  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2791  s += strlen(s);
2792  break;
2793  case DCH_AD:
2794  case DCH_BC:
2796  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2797  s += strlen(s);
2798  break;
2799  case DCH_a_d:
2800  case DCH_b_c:
2802  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2803  s += strlen(s);
2804  break;
2805  case DCH_ad:
2806  case DCH_bc:
2808  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2809  s += strlen(s);
2810  break;
2811  case DCH_MONTH:
2813  if (!tm->tm_mon)
2814  break;
2815  if (S_TM(n->suffix))
2816  {
2818 
2819  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2820  strcpy(s, str);
2821  else
2822  ereport(ERROR,
2823  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2824  errmsg("localized string format value too long")));
2825  }
2826  else
2827  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2829  s += strlen(s);
2830  break;
2831  case DCH_Month:
2833  if (!tm->tm_mon)
2834  break;
2835  if (S_TM(n->suffix))
2836  {
2838 
2839  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2840  strcpy(s, str);
2841  else
2842  ereport(ERROR,
2843  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2844  errmsg("localized string format value too long")));
2845  }
2846  else
2847  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2848  months_full[tm->tm_mon - 1]);
2849  s += strlen(s);
2850  break;
2851  case DCH_month:
2853  if (!tm->tm_mon)
2854  break;
2855  if (S_TM(n->suffix))
2856  {
2858 
2859  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2860  strcpy(s, str);
2861  else
2862  ereport(ERROR,
2863  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2864  errmsg("localized string format value too long")));
2865  }
2866  else
2867  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2869  s += strlen(s);
2870  break;
2871  case DCH_MON:
2873  if (!tm->tm_mon)
2874  break;
2875  if (S_TM(n->suffix))
2876  {
2878 
2879  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2880  strcpy(s, str);
2881  else
2882  ereport(ERROR,
2883  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2884  errmsg("localized string format value too long")));
2885  }
2886  else
2887  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2888  s += strlen(s);
2889  break;
2890  case DCH_Mon:
2892  if (!tm->tm_mon)
2893  break;
2894  if (S_TM(n->suffix))
2895  {
2897 
2898  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2899  strcpy(s, str);
2900  else
2901  ereport(ERROR,
2902  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2903  errmsg("localized string format value too long")));
2904  }
2905  else
2906  strcpy(s, months[tm->tm_mon - 1]);
2907  s += strlen(s);
2908  break;
2909  case DCH_mon:
2911  if (!tm->tm_mon)
2912  break;
2913  if (S_TM(n->suffix))
2914  {
2916 
2917  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2918  strcpy(s, str);
2919  else
2920  ereport(ERROR,
2921  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2922  errmsg("localized string format value too long")));
2923  }
2924  else
2925  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2926  s += strlen(s);
2927  break;
2928  case DCH_MM:
2929  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2930  tm->tm_mon);
2931  if (S_THth(n->suffix))
2932  str_numth(s, s, S_TH_TYPE(n->suffix));
2933  s += strlen(s);
2934  break;
2935  case DCH_DAY:
2937  if (S_TM(n->suffix))
2938  {
2940 
2941  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2942  strcpy(s, str);
2943  else
2944  ereport(ERROR,
2945  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2946  errmsg("localized string format value too long")));
2947  }
2948  else
2949  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2951  s += strlen(s);
2952  break;
2953  case DCH_Day:
2955  if (S_TM(n->suffix))
2956  {
2958 
2959  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2960  strcpy(s, str);
2961  else
2962  ereport(ERROR,
2963  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2964  errmsg("localized string format value too long")));
2965  }
2966  else
2967  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2968  days[tm->tm_wday]);
2969  s += strlen(s);
2970  break;
2971  case DCH_day:
2973  if (S_TM(n->suffix))
2974  {
2976 
2977  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2978  strcpy(s, str);
2979  else
2980  ereport(ERROR,
2981  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2982  errmsg("localized string format value too long")));
2983  }
2984  else
2985  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2987  s += strlen(s);
2988  break;
2989  case DCH_DY:
2991  if (S_TM(n->suffix))
2992  {
2994 
2995  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2996  strcpy(s, str);
2997  else
2998  ereport(ERROR,
2999  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3000  errmsg("localized string format value too long")));
3001  }
3002  else
3003  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3004  s += strlen(s);
3005  break;
3006  case DCH_Dy:
3008  if (S_TM(n->suffix))
3009  {
3011 
3012  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3013  strcpy(s, str);
3014  else
3015  ereport(ERROR,
3016  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3017  errmsg("localized string format value too long")));
3018  }
3019  else
3020  strcpy(s, days_short[tm->tm_wday]);
3021  s += strlen(s);
3022  break;
3023  case DCH_dy:
3025  if (S_TM(n->suffix))
3026  {
3028 
3029  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3030  strcpy(s, str);
3031  else
3032  ereport(ERROR,
3033  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3034  errmsg("localized string format value too long")));
3035  }
3036  else
3037  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3038  s += strlen(s);
3039  break;
3040  case DCH_DDD:
3041  case DCH_IDDD:
3042  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3043  (n->key->id == DCH_DDD) ?
3044  tm->tm_yday :
3046  if (S_THth(n->suffix))
3047  str_numth(s, s, S_TH_TYPE(n->suffix));
3048  s += strlen(s);
3049  break;
3050  case DCH_DD:
3051  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3052  if (S_THth(n->suffix))
3053  str_numth(s, s, S_TH_TYPE(n->suffix));
3054  s += strlen(s);
3055  break;
3056  case DCH_D:
3058  sprintf(s, "%d", tm->tm_wday + 1);
3059  if (S_THth(n->suffix))
3060  str_numth(s, s, S_TH_TYPE(n->suffix));
3061  s += strlen(s);
3062  break;
3063  case DCH_ID:
3065  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3066  if (S_THth(n->suffix))
3067  str_numth(s, s, S_TH_TYPE(n->suffix));
3068  s += strlen(s);
3069  break;
3070  case DCH_WW:
3071  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3072  (tm->tm_yday - 1) / 7 + 1);
3073  if (S_THth(n->suffix))
3074  str_numth(s, s, S_TH_TYPE(n->suffix));
3075  s += strlen(s);
3076  break;
3077  case DCH_IW:
3078  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3080  if (S_THth(n->suffix))
3081  str_numth(s, s, S_TH_TYPE(n->suffix));
3082  s += strlen(s);
3083  break;
3084  case DCH_Q:
3085  if (!tm->tm_mon)
3086  break;
3087  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3088  if (S_THth(n->suffix))
3089  str_numth(s, s, S_TH_TYPE(n->suffix));
3090  s += strlen(s);
3091  break;
3092  case DCH_CC:
3093  if (is_interval) /* straight calculation */
3094  i = tm->tm_year / 100;
3095  else
3096  {
3097  if (tm->tm_year > 0)
3098  /* Century 20 == 1901 - 2000 */
3099  i = (tm->tm_year - 1) / 100 + 1;
3100  else
3101  /* Century 6BC == 600BC - 501BC */
3102  i = tm->tm_year / 100 - 1;
3103  }
3104  if (i <= 99 && i >= -99)
3105  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3106  else
3107  sprintf(s, "%d", i);
3108  if (S_THth(n->suffix))
3109  str_numth(s, s, S_TH_TYPE(n->suffix));
3110  s += strlen(s);
3111  break;
3112  case DCH_Y_YYY:
3113  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3114  sprintf(s, "%d,%03d", i,
3115  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3116  if (S_THth(n->suffix))
3117  str_numth(s, s, S_TH_TYPE(n->suffix));
3118  s += strlen(s);
3119  break;
3120  case DCH_YYYY:
3121  case DCH_IYYY:
3122  sprintf(s, "%0*d",
3123  S_FM(n->suffix) ? 0 :
3124  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3125  (n->key->id == DCH_YYYY ?
3126  ADJUST_YEAR(tm->tm_year, is_interval) :
3128  tm->tm_mon,
3129  tm->tm_mday),
3130  is_interval)));
3131  if (S_THth(n->suffix))
3132  str_numth(s, s, S_TH_TYPE(n->suffix));
3133  s += strlen(s);
3134  break;
3135  case DCH_YYY:
3136  case DCH_IYY:
3137  sprintf(s, "%0*d",
3138  S_FM(n->suffix) ? 0 :
3139  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3140  (n->key->id == DCH_YYY ?
3141  ADJUST_YEAR(tm->tm_year, is_interval) :
3143  tm->tm_mon,
3144  tm->tm_mday),
3145  is_interval)) % 1000);
3146  if (S_THth(n->suffix))
3147  str_numth(s, s, S_TH_TYPE(n->suffix));
3148  s += strlen(s);
3149  break;
3150  case DCH_YY:
3151  case DCH_IY:
3152  sprintf(s, "%0*d",
3153  S_FM(n->suffix) ? 0 :
3154  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3155  (n->key->id == DCH_YY ?
3156  ADJUST_YEAR(tm->tm_year, is_interval) :
3158  tm->tm_mon,
3159  tm->tm_mday),
3160  is_interval)) % 100);
3161  if (S_THth(n->suffix))
3162  str_numth(s, s, S_TH_TYPE(n->suffix));
3163  s += strlen(s);
3164  break;
3165  case DCH_Y:
3166  case DCH_I:
3167  sprintf(s, "%1d",
3168  (n->key->id == DCH_Y ?
3169  ADJUST_YEAR(tm->tm_year, is_interval) :
3171  tm->tm_mon,
3172  tm->tm_mday),
3173  is_interval)) % 10);
3174  if (S_THth(n->suffix))
3175  str_numth(s, s, S_TH_TYPE(n->suffix));
3176  s += strlen(s);
3177  break;
3178  case DCH_RM:
3179  /* FALLTHROUGH */
3180  case DCH_rm:
3181 
3182  /*
3183  * For intervals, values like '12 month' will be reduced to 0
3184  * month and some years. These should be processed.
3185  */
3186  if (!tm->tm_mon && !tm->tm_year)
3187  break;
3188  else
3189  {
3190  int mon = 0;
3191  const char *const *months;
3192 
3193  if (n->key->id == DCH_RM)
3195  else
3197 
3198  /*
3199  * Compute the position in the roman-numeral array. Note
3200  * that the contents of the array are reversed, December
3201  * being first and January last.
3202  */
3203  if (tm->tm_mon == 0)
3204  {
3205  /*
3206  * This case is special, and tracks the case of full
3207  * interval years.
3208  */
3209  mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3210  }
3211  else if (tm->tm_mon < 0)
3212  {
3213  /*
3214  * Negative case. In this case, the calculation is
3215  * reversed, where -1 means December, -2 November,
3216  * etc.
3217  */
3218  mon = -1 * (tm->tm_mon + 1);
3219  }
3220  else
3221  {
3222  /*
3223  * Common case, with a strictly positive value. The
3224  * position in the array matches with the value of
3225  * tm_mon.
3226  */
3227  mon = MONTHS_PER_YEAR - tm->tm_mon;
3228  }
3229 
3230  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3231  months[mon]);
3232  s += strlen(s);
3233  }
3234  break;
3235  case DCH_W:
3236  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3237  if (S_THth(n->suffix))
3238  str_numth(s, s, S_TH_TYPE(n->suffix));
3239  s += strlen(s);
3240  break;
3241  case DCH_J:
3242  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3243  if (S_THth(n->suffix))
3244  str_numth(s, s, S_TH_TYPE(n->suffix));
3245  s += strlen(s);
3246  break;
3247  }
3248  }
3249 
3250  *s = '\0';
3251 }
3252 
3253 /*
3254  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3255  * The TmFromChar struct pointed to by 'out' is populated with the results.
3256  *
3257  * 'collid' identifies the collation to use, if needed.
3258  * 'std' specifies standard parsing mode.
3259  *
3260  * If escontext points to an ErrorSaveContext, data errors will be reported
3261  * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
3262  * whether an error occurred. Otherwise, errors are thrown.
3263  *
3264  * Note: we currently don't have any to_interval() function, so there
3265  * is no need here for INVALID_FOR_INTERVAL checks.
3266  */
3267 static void
3268 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3269  Oid collid, bool std, Node *escontext)
3270 {
3271  FormatNode *n;
3272  const char *s;
3273  int len,
3274  value;
3275  bool fx_mode = std;
3276 
3277  /* number of extra skipped characters (more than given in format string) */
3278  int extra_skip = 0;
3279 
3280  /* cache localized days and months */
3282 
3283  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3284  {
3285  /*
3286  * Ignore spaces at the beginning of the string and before fields when
3287  * not in FX (fixed width) mode.
3288  */
3289  if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3290  (n->type == NODE_TYPE_ACTION || n == node))
3291  {
3292  while (*s != '\0' && isspace((unsigned char) *s))
3293  {
3294  s++;
3295  extra_skip++;
3296  }
3297  }
3298 
3299  if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3300  {
3301  if (std)
3302  {
3303  /*
3304  * Standard mode requires strict matching between format
3305  * string separators/spaces and input string.
3306  */
3307  Assert(n->character[0] && !n->character[1]);
3308 
3309  if (*s == n->character[0])
3310  s++;
3311  else
3312  ereturn(escontext,,
3313  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3314  errmsg("unmatched format separator \"%c\"",
3315  n->character[0])));
3316  }
3317  else if (!fx_mode)
3318  {
3319  /*
3320  * In non FX (fixed format) mode one format string space or
3321  * separator match to one space or separator in input string.
3322  * Or match nothing if there is no space or separator in the
3323  * current position of input string.
3324  */
3325  extra_skip--;
3326  if (isspace((unsigned char) *s) || is_separator_char(s))
3327  {
3328  s++;
3329  extra_skip++;
3330  }
3331  }
3332  else
3333  {
3334  /*
3335  * In FX mode, on format string space or separator we consume
3336  * exactly one character from input string. Notice we don't
3337  * insist that the consumed character match the format's
3338  * character.
3339  */
3340  s += pg_mblen(s);
3341  }
3342  continue;
3343  }
3344  else if (n->type != NODE_TYPE_ACTION)
3345  {
3346  /*
3347  * Text character, so consume one character from input string.
3348  * Notice we don't insist that the consumed character match the
3349  * format's character.
3350  */
3351  if (!fx_mode)
3352  {
3353  /*
3354  * In non FX mode we might have skipped some extra characters
3355  * (more than specified in format string) before. In this
3356  * case we don't skip input string character, because it might
3357  * be part of field.
3358  */
3359  if (extra_skip > 0)
3360  extra_skip--;
3361  else
3362  s += pg_mblen(s);
3363  }
3364  else
3365  {
3366  int chlen = pg_mblen(s);
3367 
3368  /*
3369  * Standard mode requires strict match of format characters.
3370  */
3371  if (std && n->type == NODE_TYPE_CHAR &&
3372  strncmp(s, n->character, chlen) != 0)
3373  ereturn(escontext,,
3374  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3375  errmsg("unmatched format character \"%s\"",
3376  n->character)));
3377 
3378  s += chlen;
3379  }
3380  continue;
3381  }
3382 
3383  if (!from_char_set_mode(out, n->key->date_mode, escontext))
3384  return;
3385 
3386  switch (n->key->id)
3387  {
3388  case DCH_FX:
3389  fx_mode = true;
3390  break;
3391  case DCH_A_M:
3392  case DCH_P_M:
3393  case DCH_a_m:
3394  case DCH_p_m:
3396  NULL, InvalidOid,
3397  n, escontext))
3398  return;
3399  if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3400  return;
3401  out->clock = CLOCK_12_HOUR;
3402  break;
3403  case DCH_AM:
3404  case DCH_PM:
3405  case DCH_am:
3406  case DCH_pm:
3408  NULL, InvalidOid,
3409  n, escontext))
3410  return;
3411  if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3412  return;
3413  out->clock = CLOCK_12_HOUR;
3414  break;
3415  case DCH_HH:
3416  case DCH_HH12:
3417  if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3418  return;
3419  out->clock = CLOCK_12_HOUR;
3420  SKIP_THth(s, n->suffix);
3421  break;
3422  case DCH_HH24:
3423  if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3424  return;
3425  SKIP_THth(s, n->suffix);
3426  break;
3427  case DCH_MI:
3428  if (from_char_parse_int(&out->mi, &s, n, escontext) < 0)
3429  return;
3430  SKIP_THth(s, n->suffix);
3431  break;
3432  case DCH_SS:
3433  if (from_char_parse_int(&out->ss, &s, n, escontext) < 0)
3434  return;
3435  SKIP_THth(s, n->suffix);
3436  break;
3437  case DCH_MS: /* millisecond */
3438  len = from_char_parse_int_len(&out->ms, &s, 3, n, escontext);
3439  if (len < 0)
3440  return;
3441 
3442  /*
3443  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3444  */
3445  out->ms *= len == 1 ? 100 :
3446  len == 2 ? 10 : 1;
3447 
3448  SKIP_THth(s, n->suffix);
3449  break;
3450  case DCH_FF1:
3451  case DCH_FF2:
3452  case DCH_FF3:
3453  case DCH_FF4:
3454  case DCH_FF5:
3455  case DCH_FF6:
3456  out->ff = n->key->id - DCH_FF1 + 1;
3457  /* FALLTHROUGH */
3458  case DCH_US: /* microsecond */
3459  len = from_char_parse_int_len(&out->us, &s,
3460  n->key->id == DCH_US ? 6 :
3461  out->ff, n, escontext);
3462  if (len < 0)
3463  return;
3464 
3465  out->us *= len == 1 ? 100000 :
3466  len == 2 ? 10000 :
3467  len == 3 ? 1000 :
3468  len == 4 ? 100 :
3469  len == 5 ? 10 : 1;
3470 
3471  SKIP_THth(s, n->suffix);
3472  break;
3473  case DCH_SSSS:
3474  if (from_char_parse_int(&out->ssss, &s, n, escontext) < 0)
3475  return;
3476  SKIP_THth(s, n->suffix);
3477  break;
3478  case DCH_tz:
3479  case DCH_TZ:
3480  {
3481  int tzlen;
3482 
3483  tzlen = DecodeTimezoneAbbrevPrefix(s,
3484  &out->gmtoffset,
3485  &out->tzp);
3486  if (tzlen > 0)
3487  {
3488  out->has_tz = true;
3489  /* we only need the zone abbrev for DYNTZ case */
3490  if (out->tzp)
3491  out->abbrev = pnstrdup(s, tzlen);
3492  out->tzsign = 0; /* drop any earlier TZH/TZM info */
3493  s += tzlen;
3494  break;
3495  }
3496  else if (isalpha((unsigned char) *s))
3497  {
3498  /*
3499  * It doesn't match any abbreviation, but it starts
3500  * with a letter. OF format certainly won't succeed;
3501  * assume it's a misspelled abbreviation and complain
3502  * accordingly.
3503  */
3504  ereturn(escontext,,
3505  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3506  errmsg("invalid value \"%s\" for \"%s\"",
3507  s, n->key->name),
3508  errdetail("Time zone abbreviation is not recognized.")));
3509  }
3510  /* otherwise parse it like OF */
3511  }
3512  /* FALLTHROUGH */
3513  case DCH_OF:
3514  /* OF is equivalent to TZH or TZH:TZM */
3515  /* see TZH comments below */
3516  if (*s == '+' || *s == '-' || *s == ' ')
3517  {
3518  out->tzsign = *s == '-' ? -1 : +1;
3519  s++;
3520  }
3521  else
3522  {
3523  if (extra_skip > 0 && *(s - 1) == '-')
3524  out->tzsign = -1;
3525  else
3526  out->tzsign = +1;
3527  }
3528  if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3529  return;
3530  if (*s == ':')
3531  {
3532  s++;
3533  if (from_char_parse_int_len(&out->tzm, &s, 2, n,
3534  escontext) < 0)
3535  return;
3536  }
3537  break;
3538  case DCH_TZH:
3539 
3540  /*
3541  * Value of TZH might be negative. And the issue is that we
3542  * might swallow minus sign as the separator. So, if we have
3543  * skipped more characters than specified in the format
3544  * string, then we consider prepending last skipped minus to
3545  * TZH.
3546  */
3547  if (*s == '+' || *s == '-' || *s == ' ')
3548  {
3549  out->tzsign = *s == '-' ? -1 : +1;
3550  s++;
3551  }
3552  else
3553  {
3554  if (extra_skip > 0 && *(s - 1) == '-')
3555  out->tzsign = -1;
3556  else
3557  out->tzsign = +1;
3558  }
3559 
3560  if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3561  return;
3562  break;
3563  case DCH_TZM:
3564  /* assign positive timezone sign if TZH was not seen before */
3565  if (!out->tzsign)
3566  out->tzsign = +1;
3567  if (from_char_parse_int_len(&out->tzm, &s, 2, n, escontext) < 0)
3568  return;
3569  break;
3570  case DCH_A_D:
3571  case DCH_B_C:
3572  case DCH_a_d:
3573  case DCH_b_c:
3575  NULL, InvalidOid,
3576  n, escontext))
3577  return;
3578  if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3579  return;
3580  break;
3581  case DCH_AD:
3582  case DCH_BC:
3583  case DCH_ad:
3584  case DCH_bc:
3586  NULL, InvalidOid,
3587  n, escontext))
3588  return;
3589  if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3590  return;
3591  break;
3592  case DCH_MONTH:
3593  case DCH_Month:
3594  case DCH_month:
3596  S_TM(n->suffix) ? localized_full_months : NULL,
3597  collid,
3598  n, escontext))
3599  return;
3600  if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3601  return;
3602  break;
3603  case DCH_MON:
3604  case DCH_Mon:
3605  case DCH_mon:
3606  if (!from_char_seq_search(&value, &s, months,
3607  S_TM(n->suffix) ? localized_abbrev_months : NULL,
3608  collid,
3609  n, escontext))
3610  return;
3611  if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3612  return;
3613  break;
3614  case DCH_MM:
3615  if (from_char_parse_int(&out->mm, &s, n, escontext) < 0)
3616  return;
3617  SKIP_THth(s, n->suffix);
3618  break;
3619  case DCH_DAY:
3620  case DCH_Day:
3621  case DCH_day:
3622  if (!from_char_seq_search(&value, &s, days,
3623  S_TM(n->suffix) ? localized_full_days : NULL,
3624  collid,
3625  n, escontext))
3626  return;
3627  if (!from_char_set_int(&out->d, value, n, escontext))
3628  return;
3629  out->d++;
3630  break;
3631  case DCH_DY:
3632  case DCH_Dy:
3633  case DCH_dy:
3635  S_TM(n->suffix) ? localized_abbrev_days : NULL,
3636  collid,
3637  n, escontext))
3638  return;
3639  if (!from_char_set_int(&out->d, value, n, escontext))
3640  return;
3641  out->d++;
3642  break;
3643  case DCH_DDD:
3644  if (from_char_parse_int(&out->ddd, &s, n, escontext) < 0)
3645  return;
3646  SKIP_THth(s, n->suffix);
3647  break;
3648  case DCH_IDDD:
3649  if (from_char_parse_int_len(&out->ddd, &s, 3, n, escontext) < 0)
3650  return;
3651  SKIP_THth(s, n->suffix);
3652  break;
3653  case DCH_DD:
3654  if (from_char_parse_int(&out->dd, &s, n, escontext) < 0)
3655  return;
3656  SKIP_THth(s, n->suffix);
3657  break;
3658  case DCH_D:
3659  if (from_char_parse_int(&out->d, &s, n, escontext) < 0)
3660  return;
3661  SKIP_THth(s, n->suffix);
3662  break;
3663  case DCH_ID:
3664  if (from_char_parse_int_len(&out->d, &s, 1, n, escontext) < 0)
3665  return;
3666  /* Shift numbering to match Gregorian where Sunday = 1 */
3667  if (++out->d > 7)
3668  out->d = 1;
3669  SKIP_THth(s, n->suffix);
3670  break;
3671  case DCH_WW:
3672  case DCH_IW:
3673  if (from_char_parse_int(&out->ww, &s, n, escontext) < 0)
3674  return;
3675  SKIP_THth(s, n->suffix);
3676  break;
3677  case DCH_Q:
3678 
3679  /*
3680  * We ignore 'Q' when converting to date because it is unclear
3681  * which date in the quarter to use, and some people specify
3682  * both quarter and month, so if it was honored it might
3683  * conflict with the supplied month. That is also why we don't
3684  * throw an error.
3685  *
3686  * We still parse the source string for an integer, but it
3687  * isn't stored anywhere in 'out'.
3688  */
3689  if (from_char_parse_int((int *) NULL, &s, n, escontext) < 0)
3690  return;
3691  SKIP_THth(s, n->suffix);
3692  break;
3693  case DCH_CC:
3694  if (from_char_parse_int(&out->cc, &s, n, escontext) < 0)
3695  return;
3696  SKIP_THth(s, n->suffix);
3697  break;
3698  case DCH_Y_YYY:
3699  {
3700  int matched,
3701  years,
3702  millennia,
3703  nch;
3704 
3705  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3706  if (matched < 2)
3707  ereturn(escontext,,
3708  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3709  errmsg("invalid input string for \"Y,YYY\"")));
3710  years += (millennia * 1000);
3711  if (!from_char_set_int(&out->year, years, n, escontext))
3712  return;
3713  out->yysz = 4;
3714  s += nch;
3715  SKIP_THth(s, n->suffix);
3716  }
3717  break;
3718  case DCH_YYYY:
3719  case DCH_IYYY:
3720  if (from_char_parse_int(&out->year, &s, n, escontext) < 0)
3721  return;
3722  out->yysz = 4;
3723  SKIP_THth(s, n->suffix);
3724  break;
3725  case DCH_YYY:
3726  case DCH_IYY:
3727  len = from_char_parse_int(&out->year, &s, n, escontext);
3728  if (len < 0)
3729  return;
3730  if (len < 4)
3731  out->year = adjust_partial_year_to_2020(out->year);
3732  out->yysz = 3;
3733  SKIP_THth(s, n->suffix);
3734  break;
3735  case DCH_YY:
3736  case DCH_IY:
3737  len = from_char_parse_int(&out->year, &s, n, escontext);
3738  if (len < 0)
3739  return;
3740  if (len < 4)
3741  out->year = adjust_partial_year_to_2020(out->year);
3742  out->yysz = 2;
3743  SKIP_THth(s, n->suffix);
3744  break;
3745  case DCH_Y:
3746  case DCH_I:
3747  len = from_char_parse_int(&out->year, &s, n, escontext);
3748  if (len < 0)
3749  return;
3750  if (len < 4)
3751  out->year = adjust_partial_year_to_2020(out->year);
3752  out->yysz = 1;
3753  SKIP_THth(s, n->suffix);
3754  break;
3755  case DCH_RM:
3756  case DCH_rm:
3758  NULL, InvalidOid,
3759  n, escontext))
3760  return;
3761  if (!from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n,
3762  escontext))
3763  return;
3764  break;
3765  case DCH_W:
3766  if (from_char_parse_int(&out->w, &s, n, escontext) < 0)
3767  return;
3768  SKIP_THth(s, n->suffix);
3769  break;
3770  case DCH_J:
3771  if (from_char_parse_int(&out->j, &s, n, escontext) < 0)
3772  return;
3773  SKIP_THth(s, n->suffix);
3774  break;
3775  }
3776 
3777  /* Ignore all spaces after fields */
3778  if (!fx_mode)
3779  {
3780  extra_skip = 0;
3781  while (*s != '\0' && isspace((unsigned char) *s))
3782  {
3783  s++;
3784  extra_skip++;
3785  }
3786  }
3787  }
3788 
3789  /*
3790  * Standard parsing mode doesn't allow unmatched format patterns or
3791  * trailing characters in the input string.
3792  */
3793  if (std)
3794  {
3795  if (n->type != NODE_TYPE_END)
3796  ereturn(escontext,,
3797  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3798  errmsg("input string is too short for datetime format")));
3799 
3800  while (*s != '\0' && isspace((unsigned char) *s))
3801  s++;
3802 
3803  if (*s != '\0')
3804  ereturn(escontext,,
3805  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3806  errmsg("trailing characters remain in input string after datetime format")));
3807  }
3808 }
3809 
3810 /*
3811  * The invariant for DCH cache entry management is that DCHCounter is equal
3812  * to the maximum age value among the existing entries, and we increment it
3813  * whenever an access occurs. If we approach overflow, deal with that by
3814  * halving all the age values, so that we retain a fairly accurate idea of
3815  * which entries are oldest.
3816  */
3817 static inline void
3819 {
3820  if (DCHCounter >= (INT_MAX - 1))
3821  {
3822  for (int i = 0; i < n_DCHCache; i++)
3823  DCHCache[i]->age >>= 1;
3824  DCHCounter >>= 1;
3825  }
3826 }
3827 
3828 /*
3829  * Get mask of date/time/zone components present in format nodes.
3830  */
3831 static int
3833 {
3834  FormatNode *n;
3835  int flags = 0;
3836 
3837  for (n = node; n->type != NODE_TYPE_END; n++)
3838  {
3839  if (n->type != NODE_TYPE_ACTION)
3840  continue;
3841 
3842  switch (n->key->id)
3843  {
3844  case DCH_FX:
3845  break;
3846  case DCH_A_M:
3847  case DCH_P_M:
3848  case DCH_a_m:
3849  case DCH_p_m:
3850  case DCH_AM:
3851  case DCH_PM:
3852  case DCH_am:
3853  case DCH_pm:
3854  case DCH_HH:
3855  case DCH_HH12:
3856  case DCH_HH24:
3857  case DCH_MI:
3858  case DCH_SS:
3859  case DCH_MS: /* millisecond */
3860  case DCH_US: /* microsecond */
3861  case DCH_FF1:
3862  case DCH_FF2:
3863  case DCH_FF3:
3864  case DCH_FF4:
3865  case DCH_FF5:
3866  case DCH_FF6:
3867  case DCH_SSSS:
3868  flags |= DCH_TIMED;
3869  break;
3870  case DCH_tz:
3871  case DCH_TZ:
3872  case DCH_OF:
3873  case DCH_TZH:
3874  case DCH_TZM:
3875  flags |= DCH_ZONED;
3876  break;
3877  case DCH_A_D:
3878  case DCH_B_C:
3879  case DCH_a_d:
3880  case DCH_b_c:
3881  case DCH_AD:
3882  case DCH_BC:
3883  case DCH_ad:
3884  case DCH_bc:
3885  case DCH_MONTH:
3886  case DCH_Month:
3887  case DCH_month:
3888  case DCH_MON:
3889  case DCH_Mon:
3890  case DCH_mon:
3891  case DCH_MM:
3892  case DCH_DAY:
3893  case DCH_Day:
3894  case DCH_day:
3895  case DCH_DY:
3896  case DCH_Dy:
3897  case DCH_dy:
3898  case DCH_DDD:
3899  case DCH_IDDD:
3900  case DCH_DD:
3901  case DCH_D:
3902  case DCH_ID:
3903  case DCH_WW:
3904  case DCH_Q:
3905  case DCH_CC:
3906  case DCH_Y_YYY:
3907  case DCH_YYYY:
3908  case DCH_IYYY:
3909  case DCH_YYY:
3910  case DCH_IYY:
3911  case DCH_YY:
3912  case DCH_IY:
3913  case DCH_Y:
3914  case DCH_I:
3915  case DCH_RM:
3916  case DCH_rm:
3917  case DCH_W:
3918  case DCH_J:
3919  flags |= DCH_DATED;
3920  break;
3921  }
3922  }
3923 
3924  return flags;
3925 }
3926 
3927 /* select a DCHCacheEntry to hold the given format picture */
3928 static DCHCacheEntry *
3929 DCH_cache_getnew(const char *str, bool std)
3930 {
3931  DCHCacheEntry *ent;
3932 
3933  /* Ensure we can advance DCHCounter below */
3935 
3936  /*
3937  * If cache is full, remove oldest entry (or recycle first not-valid one)
3938  */
3940  {
3941  DCHCacheEntry *old = DCHCache[0];
3942 
3943 #ifdef DEBUG_TO_FROM_CHAR
3944  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3945 #endif
3946  if (old->valid)
3947  {
3948  for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3949  {
3950  ent = DCHCache[i];
3951  if (!ent->valid)
3952  {
3953  old = ent;
3954  break;
3955  }
3956  if (ent->age < old->age)
3957  old = ent;
3958  }
3959  }
3960 #ifdef DEBUG_TO_FROM_CHAR
3961  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3962 #endif
3963  old->valid = false;
3964  strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
3965  old->age = (++DCHCounter);
3966  /* caller is expected to fill format, then set valid */
3967  return old;
3968  }
3969  else
3970  {
3971 #ifdef DEBUG_TO_FROM_CHAR
3972  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3973 #endif
3974  Assert(DCHCache[n_DCHCache] == NULL);
3975  DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3977  ent->valid = false;
3978  strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
3979  ent->std = std;
3980  ent->age = (++DCHCounter);
3981  /* caller is expected to fill format, then set valid */
3982  ++n_DCHCache;
3983  return ent;
3984  }
3985 }
3986 
3987 /* look for an existing DCHCacheEntry matching the given format picture */
3988 static DCHCacheEntry *
3989 DCH_cache_search(const char *str, bool std)
3990 {
3991  /* Ensure we can advance DCHCounter below */
3993 
3994  for (int i = 0; i < n_DCHCache; i++)
3995  {
3996  DCHCacheEntry *ent = DCHCache[i];
3997 
3998  if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3999  {
4000  ent->age = (++DCHCounter);
4001  return ent;
4002  }
4003  }
4004 
4005  return NULL;
4006 }
4007 
4008 /* Find or create a DCHCacheEntry for the given format picture */
4009 static DCHCacheEntry *
4010 DCH_cache_fetch(const char *str, bool std)
4011 {
4012  DCHCacheEntry *ent;
4013 
4014  if ((ent = DCH_cache_search(str, std)) == NULL)
4015  {
4016  /*
4017  * Not in the cache, must run parser and save a new format-picture to
4018  * the cache. Do not mark the cache entry valid until parsing
4019  * succeeds.
4020  */
4021  ent = DCH_cache_getnew(str, std);
4022 
4024  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4025 
4026  ent->valid = true;
4027  }
4028  return ent;
4029 }
4030 
4031 /*
4032  * Format a date/time or interval into a string according to fmt.
4033  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
4034  * for formatting.
4035  */
4036 static text *
4037 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4038 {
4039  FormatNode *format;
4040  char *fmt_str,
4041  *result;
4042  bool incache;
4043  int fmt_len;
4044  text *res;
4045 
4046  /*
4047  * Convert fmt to C string
4048  */
4049  fmt_str = text_to_cstring(fmt);
4050  fmt_len = strlen(fmt_str);
4051 
4052  /*
4053  * Allocate workspace for result as C string
4054  */
4055  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4056  *result = '\0';
4057 
4058  if (fmt_len > DCH_CACHE_SIZE)
4059  {
4060  /*
4061  * Allocate new memory if format picture is bigger than static cache
4062  * and do not use cache (call parser always)
4063  */
4064  incache = false;
4065 
4066  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4067 
4068  parse_format(format, fmt_str, DCH_keywords,
4069  DCH_suff, DCH_index, DCH_FLAG, NULL);
4070  }
4071  else
4072  {
4073  /*
4074  * Use cache buffers
4075  */
4076  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4077 
4078  incache = true;
4079  format = ent->format;
4080  }
4081 
4082  /* The real work is here */
4083  DCH_to_char(format, is_interval, tmtc, result, collid);
4084 
4085  if (!incache)
4086  pfree(format);
4087 
4088  pfree(fmt_str);
4089 
4090  /* convert C-string result to TEXT format */
4091  res = cstring_to_text(result);
4092 
4093  pfree(result);
4094  return res;
4095 }
4096 
4097 /****************************************************************************
4098  * Public routines
4099  ***************************************************************************/
4100 
4101 /* -------------------
4102  * TIMESTAMP to_char()
4103  * -------------------
4104  */
4105 Datum
4107 {
4109  text *fmt = PG_GETARG_TEXT_PP(1),
4110  *res;
4111  TmToChar tmtc;
4112  struct pg_tm tt;
4113  struct fmt_tm *tm;
4114  int thisdate;
4115 
4116  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4117  PG_RETURN_NULL();
4118 
4119  ZERO_tmtc(&tmtc);
4120  tm = tmtcTm(&tmtc);
4121 
4122  if (timestamp2tm(dt, NULL, &tt, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4123  ereport(ERROR,
4124  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4125  errmsg("timestamp out of range")));
4126 
4127  /* calculate wday and yday, because timestamp2tm doesn't */
4128  thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4129  tt.tm_wday = (thisdate + 1) % 7;
4130  tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4131 
4132  COPY_tm(tm, &tt);
4133 
4134  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4135  PG_RETURN_NULL();
4136 
4138 }
4139 
4140 Datum
4142 {
4144  text *fmt = PG_GETARG_TEXT_PP(1),
4145  *res;
4146  TmToChar tmtc;
4147  int tz;
4148  struct pg_tm tt;
4149  struct fmt_tm *tm;
4150  int thisdate;
4151 
4152  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4153  PG_RETURN_NULL();
4154 
4155  ZERO_tmtc(&tmtc);
4156  tm = tmtcTm(&tmtc);
4157 
4158  if (timestamp2tm(dt, &tz, &tt, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4159  ereport(ERROR,
4160  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4161  errmsg("timestamp out of range")));
4162 
4163  /* calculate wday and yday, because timestamp2tm doesn't */
4164  thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4165  tt.tm_wday = (thisdate + 1) % 7;
4166  tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4167 
4168  COPY_tm(tm, &tt);
4169 
4170  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4171  PG_RETURN_NULL();
4172 
4174 }
4175 
4176 
4177 /* -------------------
4178  * INTERVAL to_char()
4179  * -------------------
4180  */
4181 Datum
4183 {
4184  Interval *it = PG_GETARG_INTERVAL_P(0);
4185  text *fmt = PG_GETARG_TEXT_PP(1),
4186  *res;
4187  TmToChar tmtc;
4188  struct fmt_tm *tm;
4189  struct pg_itm tt,
4190  *itm = &tt;
4191 
4192  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || INTERVAL_NOT_FINITE(it))
4193  PG_RETURN_NULL();
4194 
4195  ZERO_tmtc(&tmtc);
4196  tm = tmtcTm(&tmtc);
4197 
4198  interval2itm(*it, itm);
4199  tmtc.fsec = itm->tm_usec;
4200  tm->tm_sec = itm->tm_sec;
4201  tm->tm_min = itm->tm_min;
4202  tm->tm_hour = itm->tm_hour;
4203  tm->tm_mday = itm->tm_mday;
4204  tm->tm_mon = itm->tm_mon;
4205  tm->tm_year = itm->tm_year;
4206 
4207  /* wday is meaningless, yday approximates the total span in days */
4209 
4210  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4211  PG_RETURN_NULL();
4212 
4214 }
4215 
4216 /* ---------------------
4217  * TO_TIMESTAMP()
4218  *
4219  * Make Timestamp from date_str which is formatted at argument 'fmt'
4220  * ( to_timestamp is reverse to_char() )
4221  * ---------------------
4222  */
4223 Datum
4225 {
4226  text *date_txt = PG_GETARG_TEXT_PP(0);
4227  text *fmt = PG_GETARG_TEXT_PP(1);
4229  Timestamp result;
4230  int tz;
4231  struct pg_tm tm;
4232  struct fmt_tz ftz;
4233  fsec_t fsec;
4234  int fprec;
4235 
4236  do_to_timestamp(date_txt, fmt, collid, false,
4237  &tm, &fsec, &ftz, &fprec, NULL, NULL);
4238 
4239  /* Use the specified time zone, if any. */
4240  if (ftz.has_tz)
4241  tz = ftz.gmtoffset;
4242  else
4244 
4245  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4246  ereport(ERROR,
4247  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4248  errmsg("timestamp out of range")));
4249 
4250  /* Use the specified fractional precision, if any. */
4251  if (fprec)
4252  AdjustTimestampForTypmod(&result, fprec, NULL);
4253 
4254  PG_RETURN_TIMESTAMP(result);
4255 }
4256 
4257 /* ----------
4258  * TO_DATE
4259  * Make Date from date_str which is formatted at argument 'fmt'
4260  * ----------
4261  */
4262 Datum
4264 {
4265  text *date_txt = PG_GETARG_TEXT_PP(0);
4266  text *fmt = PG_GETARG_TEXT_PP(1);
4268  DateADT result;
4269  struct pg_tm tm;
4270  struct fmt_tz ftz;
4271  fsec_t fsec;
4272 
4273  do_to_timestamp(date_txt, fmt, collid, false,
4274  &tm, &fsec, &ftz, NULL, NULL, NULL);
4275 
4276  /* Prevent overflow in Julian-day routines */
4278  ereport(ERROR,
4279  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4280  errmsg("date out of range: \"%s\"",
4281  text_to_cstring(date_txt))));
4282 
4284 
4285  /* Now check for just-out-of-range dates */
4286  if (!IS_VALID_DATE(result))
4287  ereport(ERROR,
4288  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4289  errmsg("date out of range: \"%s\"",
4290  text_to_cstring(date_txt))));
4291 
4292  PG_RETURN_DATEADT(result);
4293 }
4294 
4295 /*
4296  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4297  * as a format string. The collation 'collid' may be used for case-folding
4298  * rules in some cases. 'strict' specifies standard parsing mode.
4299  *
4300  * The actual data type (returned in 'typid', 'typmod') is determined by
4301  * the presence of date/time/zone components in the format string.
4302  *
4303  * When a timezone component is present, the corresponding offset is
4304  * returned in '*tz'.
4305  *
4306  * If escontext points to an ErrorSaveContext, data errors will be reported
4307  * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
4308  * whether an error occurred. Otherwise, errors are thrown.
4309  */
4310 Datum
4311 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4312  Oid *typid, int32 *typmod, int *tz,
4313  Node *escontext)
4314 {
4315  struct pg_tm tm;
4316  struct fmt_tz ftz;
4317  fsec_t fsec;
4318  int fprec;
4319  uint32 flags;
4320 
4321  if (!do_to_timestamp(date_txt, fmt, collid, strict,
4322  &tm, &fsec, &ftz, &fprec, &flags, escontext))
4323  return (Datum) 0;
4324 
4325  *typmod = fprec ? fprec : -1; /* fractional part precision */
4326 
4327  if (flags & DCH_DATED)
4328  {
4329  if (flags & DCH_TIMED)
4330  {
4331  if (flags & DCH_ZONED)
4332  {
4333  TimestampTz result;
4334 
4335  if (ftz.has_tz)
4336  {
4337  *tz = ftz.gmtoffset;
4338  }
4339  else
4340  {
4341  /*
4342  * Time zone is present in format string, but not in input
4343  * string. Assuming do_to_timestamp() triggers no error
4344  * this should be possible only in non-strict case.
4345  */
4346  Assert(!strict);
4347 
4348  ereturn(escontext, (Datum) 0,
4349  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4350  errmsg("missing time zone in input string for type timestamptz")));
4351  }
4352 
4353  if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4354  ereturn(escontext, (Datum) 0,
4355  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4356  errmsg("timestamptz out of range")));
4357 
4358  AdjustTimestampForTypmod(&result, *typmod, escontext);
4359 
4360  *typid = TIMESTAMPTZOID;
4361  return TimestampTzGetDatum(result);
4362  }
4363  else
4364  {
4365  Timestamp result;
4366 
4367  if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4368  ereturn(escontext, (Datum) 0,
4369  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4370  errmsg("timestamp out of range")));
4371 
4372  AdjustTimestampForTypmod(&result, *typmod, escontext);
4373 
4374  *typid = TIMESTAMPOID;
4375  return TimestampGetDatum(result);
4376  }
4377  }
4378  else
4379  {
4380  if (flags & DCH_ZONED)
4381  {
4382  ereturn(escontext, (Datum) 0,
4383  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4384  errmsg("datetime format is zoned but not timed")));
4385  }
4386  else
4387  {
4388  DateADT result;
4389 
4390  /* Prevent overflow in Julian-day routines */
4392  ereturn(escontext, (Datum) 0,
4393  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4394  errmsg("date out of range: \"%s\"",
4395  text_to_cstring(date_txt))));
4396 
4397  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4399 
4400  /* Now check for just-out-of-range dates */
4401  if (!IS_VALID_DATE(result))
4402  ereturn(escontext, (Datum) 0,
4403  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4404  errmsg("date out of range: \"%s\"",
4405  text_to_cstring(date_txt))));
4406 
4407  *typid = DATEOID;
4408  return DateADTGetDatum(result);
4409  }
4410  }
4411  }
4412  else if (flags & DCH_TIMED)
4413  {
4414  if (flags & DCH_ZONED)
4415  {
4416  TimeTzADT *result = palloc(sizeof(TimeTzADT));
4417 
4418  if (ftz.has_tz)
4419  {
4420  *tz = ftz.gmtoffset;
4421  }
4422  else
4423  {
4424  /*
4425  * Time zone is present in format string, but not in input
4426  * string. Assuming do_to_timestamp() triggers no error this
4427  * should be possible only in non-strict case.
4428  */
4429  Assert(!strict);
4430 
4431  ereturn(escontext, (Datum) 0,
4432  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4433  errmsg("missing time zone in input string for type timetz")));
4434  }
4435 
4436  if (tm2timetz(&tm, fsec, *tz, result) != 0)
4437  ereturn(escontext, (Datum) 0,
4438  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4439  errmsg("timetz out of range")));
4440 
4441  AdjustTimeForTypmod(&result->time, *typmod);
4442 
4443  *typid = TIMETZOID;
4444  return TimeTzADTPGetDatum(result);
4445  }
4446  else
4447  {
4448  TimeADT result;
4449 
4450  if (tm2time(&tm, fsec, &result) != 0)
4451  ereturn(escontext, (Datum) 0,
4452  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4453  errmsg("time out of range")));
4454 
4455  AdjustTimeForTypmod(&result, *typmod);
4456 
4457  *typid = TIMEOID;
4458  return TimeADTGetDatum(result);
4459  }
4460  }
4461  else
4462  {
4463  ereturn(escontext, (Datum) 0,
4464  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4465  errmsg("datetime format is not dated and not timed")));
4466  }
4467 }
4468 
4469 /*
4470  * do_to_timestamp: shared code for to_timestamp and to_date
4471  *
4472  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4473  * fractional seconds, struct fmt_tz, and fractional precision.
4474  *
4475  * 'collid' identifies the collation to use, if needed.
4476  * 'std' specifies standard parsing mode.
4477  *
4478  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4479  * if that is not NULL.
4480  *
4481  * Returns true on success, false on failure (if escontext points to an
4482  * ErrorSaveContext; otherwise errors are thrown). Note that currently,
4483  * soft-error behavior is provided for bad data but not bad format.
4484  *
4485  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4486  * DCH_from_char to populate a TmFromChar with the parsed contents of
4487  * 'date_txt'.
4488  *
4489  * The TmFromChar is then analysed and converted into the final results in
4490  * struct 'tm', 'fsec', struct 'tz', and 'fprec'.
4491  */
4492 static bool
4493 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4494  struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
4495  int *fprec, uint32 *flags, Node *escontext)
4496 {
4497  FormatNode *format = NULL;
4498  TmFromChar tmfc;
4499  int fmt_len;
4500  char *date_str;
4501  int fmask;
4502  bool incache = false;
4503 
4504  Assert(tm != NULL);
4505  Assert(fsec != NULL);
4506 
4507  date_str = text_to_cstring(date_txt);
4508 
4509  ZERO_tmfc(&tmfc);
4510  ZERO_tm(tm);
4511  *fsec = 0;
4512  tz->has_tz = false;
4513  if (fprec)
4514  *fprec = 0;
4515  if (flags)
4516  *flags = 0;
4517  fmask = 0; /* bit mask for ValidateDate() */
4518 
4519  fmt_len = VARSIZE_ANY_EXHDR(fmt);
4520 
4521  if (fmt_len)
4522  {
4523  char *fmt_str;
4524 
4525  fmt_str = text_to_cstring(fmt);
4526 
4527  if (fmt_len > DCH_CACHE_SIZE)
4528  {
4529  /*
4530  * Allocate new memory if format picture is bigger than static
4531  * cache and do not use cache (call parser always)
4532  */
4533  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4534 
4536  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4537  }
4538  else
4539  {
4540  /*
4541  * Use cache buffers
4542  */
4543  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4544 
4545  incache = true;
4546  format = ent->format;
4547  }
4548 
4549 #ifdef DEBUG_TO_FROM_CHAR
4550  /* dump_node(format, fmt_len); */
4551  /* dump_index(DCH_keywords, DCH_index); */
4552 #endif
4553 
4554  DCH_from_char(format, date_str, &tmfc, collid, std, escontext);
4555  pfree(fmt_str);
4556  if (SOFT_ERROR_OCCURRED(escontext))
4557  goto fail;
4558 
4559  if (flags)
4560  *flags = DCH_datetime_type(format);
4561 
4562  if (!incache)
4563  {
4564  pfree(format);
4565  format = NULL;
4566  }
4567  }
4568 
4569  DEBUG_TMFC(&tmfc);
4570 
4571  /*
4572  * Convert to_date/to_timestamp input fields to standard 'tm'
4573  */
4574  if (tmfc.ssss)
4575  {
4576  int x = tmfc.ssss;
4577 
4578  tm->tm_hour = x / SECS_PER_HOUR;
4579  x %= SECS_PER_HOUR;
4580  tm->tm_min = x / SECS_PER_MINUTE;
4581  x %= SECS_PER_MINUTE;
4582  tm->tm_sec = x;
4583  }
4584 
4585  if (tmfc.ss)
4586  tm->tm_sec = tmfc.ss;
4587  if (tmfc.mi)
4588  tm->tm_min = tmfc.mi;
4589  if (tmfc.hh)
4590  tm->tm_hour = tmfc.hh;
4591 
4592  if (tmfc.clock == CLOCK_12_HOUR)
4593  {
4594  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4595  {
4596  errsave(escontext,
4597  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4598  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4599  tm->tm_hour),
4600  errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
4601  goto fail;
4602  }
4603 
4604  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4605  tm->tm_hour += HOURS_PER_DAY / 2;
4606  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4607  tm->tm_hour = 0;
4608  }
4609 
4610  if (tmfc.year)
4611  {
4612  /*
4613  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4614  * the year in the given century. Keep in mind that the 21st century
4615  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4616  * 600BC to 501BC.
4617  */
4618  if (tmfc.cc && tmfc.yysz <= 2)
4619  {
4620  if (tmfc.bc)
4621  tmfc.cc = -tmfc.cc;
4622  tm->tm_year = tmfc.year % 100;
4623  if (tm->tm_year)
4624  {
4625  if (tmfc.cc >= 0)
4626  tm->tm_year += (tmfc.cc - 1) * 100;
4627  else
4628  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4629  }
4630  else
4631  {
4632  /* find century year for dates ending in "00" */
4633  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4634  }
4635  }
4636  else
4637  {
4638  /* If a 4-digit year is provided, we use that and ignore CC. */
4639  tm->tm_year = tmfc.year;
4640  if (tmfc.bc)
4641  tm->tm_year = -tm->tm_year;
4642  /* correct for our representation of BC years */
4643  if (tm->tm_year < 0)
4644  tm->tm_year++;
4645  }
4646  fmask |= DTK_M(YEAR);
4647  }
4648  else if (tmfc.cc)
4649  {
4650  /* use first year of century */
4651  if (tmfc.bc)
4652  tmfc.cc = -tmfc.cc;
4653  if (tmfc.cc >= 0)
4654  /* +1 because 21st century started in 2001 */
4655  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4656  else
4657  /* +1 because year == 599 is 600 BC */
4658  tm->tm_year = tmfc.cc * 100 + 1;
4659  fmask |= DTK_M(YEAR);
4660  }
4661 
4662  if (tmfc.j)
4663  {
4664  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4665  fmask |= DTK_DATE_M;
4666  }
4667 
4668  if (tmfc.ww)
4669  {
4670  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4671  {
4672  /*
4673  * If tmfc.d is not set, then the date is left at the beginning of
4674  * the ISO week (Monday).
4675  */
4676  if (tmfc.d)
4677  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4678  else
4679  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4680  fmask |= DTK_DATE_M;
4681  }
4682  else
4683  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4684  }
4685 
4686  if (tmfc.w)
4687  tmfc.dd = (tmfc.w - 1) * 7 + 1;
4688  if (tmfc.dd)
4689  {
4690  tm->tm_mday = tmfc.dd;
4691  fmask |= DTK_M(DAY);
4692  }
4693  if (tmfc.mm)
4694  {
4695  tm->tm_mon = tmfc.mm;
4696  fmask |= DTK_M(MONTH);
4697  }
4698 
4699  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4700  {
4701  /*
4702  * The month and day field have not been set, so we use the
4703  * day-of-year field to populate them. Depending on the date mode,
4704  * this field may be interpreted as a Gregorian day-of-year, or an ISO
4705  * week date day-of-year.
4706  */
4707 
4708  if (!tm->tm_year && !tmfc.bc)
4709  {
4710  errsave(escontext,
4711  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4712  errmsg("cannot calculate day of year without year information")));
4713  goto fail;
4714  }
4715 
4716  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4717  {
4718  int j0; /* zeroth day of the ISO year, in Julian */
4719 
4720  j0 = isoweek2j(tm->tm_year, 1) - 1;
4721 
4722  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4723  fmask |= DTK_DATE_M;
4724  }
4725  else
4726  {
4727  const int *y;
4728  int i;
4729 
4730  static const int ysum[2][13] = {
4731  {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4732  {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4733 
4734  y = ysum[isleap(tm->tm_year)];
4735 
4736  for (i = 1; i <= MONTHS_PER_YEAR; i++)
4737  {
4738  if (tmfc.ddd <= y[i])
4739  break;
4740  }
4741  if (tm->tm_mon <= 1)
4742  tm->tm_mon = i;
4743 
4744  if (tm->tm_mday <= 1)
4745  tm->tm_mday = tmfc.ddd - y[i - 1];
4746 
4747  fmask |= DTK_M(MONTH) | DTK_M(DAY);
4748  }
4749  }
4750 
4751  if (tmfc.ms)
4752  *fsec += tmfc.ms * 1000;
4753  if (tmfc.us)
4754  *fsec += tmfc.us;
4755  if (fprec)
4756  *fprec = tmfc.ff; /* fractional precision, if specified */
4757 
4758  /* Range-check date fields according to bit mask computed above */
4759  if (fmask != 0)
4760  {
4761  /* We already dealt with AD/BC, so pass isjulian = true */
4762  int dterr = ValidateDate(fmask, true, false, false, tm);
4763 
4764  if (dterr != 0)
4765  {
4766  /*
4767  * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4768  * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4769  * irrelevant hint about datestyle.
4770  */
4772  date_str, "timestamp", escontext);
4773  goto fail;
4774  }
4775  }
4776 
4777  /* Range-check time fields too */
4778  if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4779  tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4780  tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4781  *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4782  {
4784  date_str, "timestamp", escontext);
4785  goto fail;
4786  }
4787 
4788  /*
4789  * If timezone info was present, reduce it to a GMT offset. (We cannot do
4790  * this until we've filled all of the tm struct, since the zone's offset
4791  * might be time-varying.)
4792  */
4793  if (tmfc.tzsign)
4794  {
4795  /* TZH and/or TZM fields */
4796  if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4797  tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4798  {
4800  date_str, "timestamp", escontext);
4801  goto fail;
4802  }
4803 
4804  tz->has_tz = true;
4805  tz->gmtoffset = (tmfc.tzh * MINS_PER_HOUR + tmfc.tzm) * SECS_PER_MINUTE;
4806  /* note we are flipping the sign convention here */
4807  if (tmfc.tzsign > 0)
4808  tz->gmtoffset = -tz->gmtoffset;
4809  }
4810  else if (tmfc.has_tz)
4811  {
4812  /* TZ field */
4813  tz->has_tz = true;
4814  if (tmfc.tzp == NULL)
4815  {
4816  /* fixed-offset abbreviation; flip the sign convention */
4817  tz->gmtoffset = -tmfc.gmtoffset;
4818  }
4819  else
4820  {
4821  /* dynamic-offset abbreviation, resolve using specified time */
4823  tmfc.tzp);
4824  }
4825  }
4826 
4827  DEBUG_TM(tm);
4828 
4829  if (format && !incache)
4830  pfree(format);
4831  pfree(date_str);
4832 
4833  return true;
4834 
4835 fail:
4836  if (format && !incache)
4837  pfree(format);
4838  pfree(date_str);
4839 
4840  return false;
4841 }
4842 
4843 
4844 /**********************************************************************
4845  * the NUMBER version part
4846  *********************************************************************/
4847 
4848