PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for roman number to standard number conversion
53  * - add support for number spelling
54  * - add support for string to string formatting (we must be better
55  * than Oracle :-),
56  * to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 #include <wctype.h>
73 
74 #ifdef USE_ICU
75 #include <unicode/ustring.h>
76 #endif
77 
78 #include "catalog/pg_collation.h"
79 #include "catalog/pg_type.h"
80 #include "common/unicode_case.h"
82 #include "mb/pg_wchar.h"
83 #include "nodes/miscnodes.h"
84 #include "parser/scansup.h"
85 #include "utils/builtins.h"
86 #include "utils/date.h"
87 #include "utils/datetime.h"
88 #include "utils/formatting.h"
89 #include "utils/memutils.h"
90 #include "utils/numeric.h"
91 #include "utils/pg_locale.h"
92 #include "varatt.h"
93 
94 
95 /* ----------
96  * Routines flags
97  * ----------
98  */
99 #define DCH_FLAG 0x1 /* DATE-TIME flag */
100 #define NUM_FLAG 0x2 /* NUMBER flag */
101 #define STD_FLAG 0x4 /* STANDARD flag */
102 
103 /* ----------
104  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
105  * ----------
106  */
107 #define KeyWord_INDEX_SIZE ('~' - ' ')
108 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
109 
110 /* ----------
111  * Maximal length of one node
112  * ----------
113  */
114 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
115 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
116 
117 
118 /* ----------
119  * Format parser structs
120  * ----------
121  */
122 typedef struct
123 {
124  const char *name; /* suffix string */
125  int len, /* suffix length */
126  id, /* used in node->suffix */
127  type; /* prefix / postfix */
128 } KeySuffix;
129 
130 /* ----------
131  * FromCharDateMode
132  * ----------
133  *
134  * This value is used to nominate one of several distinct (and mutually
135  * exclusive) date conventions that a keyword can belong to.
136  */
137 typedef enum
138 {
139  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
140  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
141  FROM_CHAR_DATE_ISOWEEK, /* ISO 8601 week date */
143 
144 typedef struct
145 {
146  const char *name;
147  int len;
148  int id;
149  bool is_digit;
151 } KeyWord;
152 
153 typedef struct
154 {
155  uint8 type; /* NODE_TYPE_XXX, see below */
156  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
157  uint8 suffix; /* keyword prefix/suffix code, if any */
158  const KeyWord *key; /* if type is ACTION */
159 } FormatNode;
160 
161 #define NODE_TYPE_END 1
162 #define NODE_TYPE_ACTION 2
163 #define NODE_TYPE_CHAR 3
164 #define NODE_TYPE_SEPARATOR 4
165 #define NODE_TYPE_SPACE 5
166 
167 #define SUFFTYPE_PREFIX 1
168 #define SUFFTYPE_POSTFIX 2
169 
170 #define CLOCK_24_HOUR 0
171 #define CLOCK_12_HOUR 1
172 
173 
174 /* ----------
175  * Full months
176  * ----------
177  */
178 static const char *const months_full[] = {
179  "January", "February", "March", "April", "May", "June", "July",
180  "August", "September", "October", "November", "December", NULL
181 };
182 
183 static const char *const days_short[] = {
184  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
185 };
186 
187 /* ----------
188  * AD / BC
189  * ----------
190  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
191  * positive and map year == -1 to year zero, and shift all negative
192  * years up one. For interval years, we just return the year.
193  */
194 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
195 
196 #define A_D_STR "A.D."
197 #define a_d_STR "a.d."
198 #define AD_STR "AD"
199 #define ad_STR "ad"
200 
201 #define B_C_STR "B.C."
202 #define b_c_STR "b.c."
203 #define BC_STR "BC"
204 #define bc_STR "bc"
205 
206 /*
207  * AD / BC strings for seq_search.
208  *
209  * These are given in two variants, a long form with periods and a standard
210  * form without.
211  *
212  * The array is laid out such that matches for AD have an even index, and
213  * matches for BC have an odd index. So the boolean value for BC is given by
214  * taking the array index of the match, modulo 2.
215  */
216 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
217 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
218 
219 /* ----------
220  * AM / PM
221  * ----------
222  */
223 #define A_M_STR "A.M."
224 #define a_m_STR "a.m."
225 #define AM_STR "AM"
226 #define am_STR "am"
227 
228 #define P_M_STR "P.M."
229 #define p_m_STR "p.m."
230 #define PM_STR "PM"
231 #define pm_STR "pm"
232 
233 /*
234  * AM / PM strings for seq_search.
235  *
236  * These are given in two variants, a long form with periods and a standard
237  * form without.
238  *
239  * The array is laid out such that matches for AM have an even index, and
240  * matches for PM have an odd index. So the boolean value for PM is given by
241  * taking the array index of the match, modulo 2.
242  */
243 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
244 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
245 
246 /* ----------
247  * Months in roman-numeral
248  * (Must be in reverse order for seq_search (in FROM_CHAR), because
249  * 'VIII' must have higher precedence than 'V')
250  * ----------
251  */
252 static const char *const rm_months_upper[] =
253 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
254 
255 static const char *const rm_months_lower[] =
256 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
257 
258 /* ----------
259  * Roman numbers
260  * ----------
261  */
262 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
263 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
264 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
265 
266 /* ----------
267  * Ordinal postfixes
268  * ----------
269  */
270 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
271 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
272 
273 /* ----------
274  * Flags & Options:
275  * ----------
276  */
277 #define TH_UPPER 1
278 #define TH_LOWER 2
279 
280 /* ----------
281  * Number description struct
282  * ----------
283  */
284 typedef struct
285 {
286  int pre, /* (count) numbers before decimal */
287  post, /* (count) numbers after decimal */
288  lsign, /* want locales sign */
289  flag, /* number parameters */
290  pre_lsign_num, /* tmp value for lsign */
291  multi, /* multiplier for 'V' */
292  zero_start, /* position of first zero */
293  zero_end, /* position of last zero */
294  need_locale; /* needs it locale */
295 } NUMDesc;
296 
297 /* ----------
298  * Flags for NUMBER version
299  * ----------
300  */
301 #define NUM_F_DECIMAL (1 << 1)
302 #define NUM_F_LDECIMAL (1 << 2)
303 #define NUM_F_ZERO (1 << 3)
304 #define NUM_F_BLANK (1 << 4)
305 #define NUM_F_FILLMODE (1 << 5)
306 #define NUM_F_LSIGN (1 << 6)
307 #define NUM_F_BRACKET (1 << 7)
308 #define NUM_F_MINUS (1 << 8)
309 #define NUM_F_PLUS (1 << 9)
310 #define NUM_F_ROMAN (1 << 10)
311 #define NUM_F_MULTI (1 << 11)
312 #define NUM_F_PLUS_POST (1 << 12)
313 #define NUM_F_MINUS_POST (1 << 13)
314 #define NUM_F_EEEE (1 << 14)
315 
316 #define NUM_LSIGN_PRE (-1)
317 #define NUM_LSIGN_POST 1
318 #define NUM_LSIGN_NONE 0
319 
320 /* ----------
321  * Tests
322  * ----------
323  */
324 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
325 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
326 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
327 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
328 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
329 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
330 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
331 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
332 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
333 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
334 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
335 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
336 
337 /* ----------
338  * Format picture cache
339  *
340  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
341  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
342  *
343  * For simplicity, the cache entries are fixed-size, so they allow for the
344  * worst case of a FormatNode for each byte in the picture string.
345  *
346  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
347  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
348  * we don't waste too much space by palloc'ing them individually. Be sure
349  * to adjust those macros if you add fields to those structs.
350  *
351  * The max number of entries in each cache is DCH_CACHE_ENTRIES
352  * resp. NUM_CACHE_ENTRIES.
353  * ----------
354  */
355 #define DCH_CACHE_OVERHEAD \
356  MAXALIGN(sizeof(bool) + sizeof(int))
357 #define NUM_CACHE_OVERHEAD \
358  MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
359 
360 #define DCH_CACHE_SIZE \
361  ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
362 #define NUM_CACHE_SIZE \
363  ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
364 
365 #define DCH_CACHE_ENTRIES 20
366 #define NUM_CACHE_ENTRIES 20
367 
368 typedef struct
369 {
371  char str[DCH_CACHE_SIZE + 1];
372  bool std;
373  bool valid;
374  int age;
375 } DCHCacheEntry;
376 
377 typedef struct
378 {
380  char str[NUM_CACHE_SIZE + 1];
381  bool valid;
382  int age;
384 } NUMCacheEntry;
385 
386 /* global cache for date/time format pictures */
388 static int n_DCHCache = 0; /* current number of entries */
389 static int DCHCounter = 0; /* aging-event counter */
390 
391 /* global cache for number format pictures */
393 static int n_NUMCache = 0; /* current number of entries */
394 static int NUMCounter = 0; /* aging-event counter */
395 
396 /* ----------
397  * For char->date/time conversion
398  * ----------
399  */
400 typedef struct
401 {
403  int hh,
404  pm,
405  mi,
406  ss,
408  d, /* stored as 1-7, Sunday = 1, 0 means missing */
409  dd,
411  mm,
412  ms,
414  bc,
415  ww,
416  w,
417  cc,
418  j,
419  us,
420  yysz, /* is it YY or YYYY ? */
421  clock, /* 12 or 24 hour clock? */
422  tzsign, /* +1, -1, or 0 if no TZH/TZM fields */
425  ff; /* fractional precision */
426  bool has_tz; /* was there a TZ field? */
427  int gmtoffset; /* GMT offset of fixed-offset zone abbrev */
428  pg_tz *tzp; /* pg_tz for dynamic abbrev */
429  char *abbrev; /* dynamic abbrev */
430 } TmFromChar;
431 
432 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
433 
434 struct fmt_tz /* do_to_timestamp's timezone info output */
435 {
436  bool has_tz; /* was there any TZ/TZH/TZM field? */
437  int gmtoffset; /* GMT offset in seconds */
438 };
439 
440 /* ----------
441  * Debug
442  * ----------
443  */
444 #ifdef DEBUG_TO_FROM_CHAR
445 #define DEBUG_TMFC(_X) \
446  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
447  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
448  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
449  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
450  (_X)->yysz, (_X)->clock)
451 #define DEBUG_TM(_X) \
452  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
453  (_X)->tm_sec, (_X)->tm_year,\
454  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
455  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
456 #else
457 #define DEBUG_TMFC(_X)
458 #define DEBUG_TM(_X)
459 #endif
460 
461 /* ----------
462  * Datetime to char conversion
463  *
464  * To support intervals as well as timestamps, we use a custom "tm" struct
465  * that is almost like struct pg_tm, but has a 64-bit tm_hour field.
466  * We omit the tm_isdst and tm_zone fields, which are not used here.
467  * ----------
468  */
469 struct fmt_tm
470 {
471  int tm_sec;
472  int tm_min;
473  int64 tm_hour;
474  int tm_mday;
475  int tm_mon;
476  int tm_year;
477  int tm_wday;
478  int tm_yday;
479  long int tm_gmtoff;
480 };
481 
482 typedef struct TmToChar
483 {
484  struct fmt_tm tm; /* almost the classic 'tm' struct */
485  fsec_t fsec; /* fractional seconds */
486  const char *tzn; /* timezone */
488 
489 #define tmtcTm(_X) (&(_X)->tm)
490 #define tmtcTzn(_X) ((_X)->tzn)
491 #define tmtcFsec(_X) ((_X)->fsec)
492 
493 /* Note: this is used to copy pg_tm to fmt_tm, so not quite a bitwise copy */
494 #define COPY_tm(_DST, _SRC) \
495 do { \
496  (_DST)->tm_sec = (_SRC)->tm_sec; \
497  (_DST)->tm_min = (_SRC)->tm_min; \
498  (_DST)->tm_hour = (_SRC)->tm_hour; \
499  (_DST)->tm_mday = (_SRC)->tm_mday; \
500  (_DST)->tm_mon = (_SRC)->tm_mon; \
501  (_DST)->tm_year = (_SRC)->tm_year; \
502  (_DST)->tm_wday = (_SRC)->tm_wday; \
503  (_DST)->tm_yday = (_SRC)->tm_yday; \
504  (_DST)->tm_gmtoff = (_SRC)->tm_gmtoff; \
505 } while(0)
506 
507 /* Caution: this is used to zero both pg_tm and fmt_tm structs */
508 #define ZERO_tm(_X) \
509 do { \
510  memset(_X, 0, sizeof(*(_X))); \
511  (_X)->tm_mday = (_X)->tm_mon = 1; \
512 } while(0)
513 
514 #define ZERO_tmtc(_X) \
515 do { \
516  ZERO_tm( tmtcTm(_X) ); \
517  tmtcFsec(_X) = 0; \
518  tmtcTzn(_X) = NULL; \
519 } while(0)
520 
521 /*
522  * to_char(time) appears to to_char() as an interval, so this check
523  * is really for interval and time data types.
524  */
525 #define INVALID_FOR_INTERVAL \
526 do { \
527  if (is_interval) \
528  ereport(ERROR, \
529  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
530  errmsg("invalid format specification for an interval value"), \
531  errhint("Intervals are not tied to specific calendar dates."))); \
532 } while(0)
533 
534 /*****************************************************************************
535  * KeyWord definitions
536  *****************************************************************************/
537 
538 /* ----------
539  * Suffixes (FormatNode.suffix is an OR of these codes)
540  * ----------
541  */
542 #define DCH_S_FM 0x01
543 #define DCH_S_TH 0x02
544 #define DCH_S_th 0x04
545 #define DCH_S_SP 0x08
546 #define DCH_S_TM 0x10
547 
548 /* ----------
549  * Suffix tests
550  * ----------
551  */
552 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
553 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
554 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
555 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
556 
557 /* Oracle toggles FM behavior, we don't; see docs. */
558 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
559 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
560 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
561 
562 /* ----------
563  * Suffixes definition for DATE-TIME TO/FROM CHAR
564  * ----------
565  */
566 #define TM_SUFFIX_LEN 2
567 
568 static const KeySuffix DCH_suff[] = {
569  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
570  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
573  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
574  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
575  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
576  /* last */
577  {NULL, 0, 0, 0}
578 };
579 
580 
581 /* ----------
582  * Format-pictures (KeyWord).
583  *
584  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
585  * complicated -to-> easy:
586  *
587  * (example: "DDD","DD","Day","D" )
588  *
589  * (this specific sort needs the algorithm for sequential search for strings,
590  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
591  * or "HH12"? You must first try "HH12", because "HH" is in string, but
592  * it is not good.
593  *
594  * (!)
595  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
596  * (!)
597  *
598  * For fast search is used the 'int index[]', index is ascii table from position
599  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
600  * position or -1 if char is not used in the KeyWord. Search example for
601  * string "MM":
602  * 1) see in index to index['M' - 32],
603  * 2) take keywords position (enum DCH_MI) from index
604  * 3) run sequential search in keywords[] from this position
605  *
606  * ----------
607  */
608 
609 typedef enum
610 {
631  DCH_FX, /* global suffix */
723 
724  /* last */
725  _DCH_last_
727 
728 typedef enum
729 {
766 
767  /* last */
768  _NUM_last_
770 
771 /* ----------
772  * KeyWords for DATE-TIME version
773  * ----------
774  */
775 static const KeyWord DCH_keywords[] = {
776 /* name, len, id, is_digit, date_mode */
777  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
778  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
779  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
780  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
781  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
782  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
783  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
784  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
785  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
786  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
787  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
788  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
789  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
790  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
791  {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
792  {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
793  {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
794  {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
795  {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
796  {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
797  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
798  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
799  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
800  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
801  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
802  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
803  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
804  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
805  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
806  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
807  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
808  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
809  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
810  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
811  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
812  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
813  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
814  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
815  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
816  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
817  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
818  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
819  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
820  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
821  {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
822  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
823  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
824  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
825  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
826  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
827  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
828  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
829  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
830  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
831  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
832  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
833  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
834  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
835  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
836  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
837  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
838  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
839  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
840  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
841  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
842  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
843  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
844  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
845  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
846  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
847  {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
848  {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
849  {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
850  {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
851  {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
852  {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
853  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
854  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
855  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
856  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
857  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
858  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
859  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
860  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
861  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
862  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
863  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
864  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
865  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
866  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
867  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
868  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
869  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
870  {"of", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* o */
871  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
872  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
873  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
874  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
875  {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
876  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
877  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
878  {"tzh", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* t */
879  {"tzm", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
880  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE},
881  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
882  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
883  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
884  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
885  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
886  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
887  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
888  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
889 
890  /* last */
891  {NULL, 0, 0, 0, 0}
892 };
893 
894 /* ----------
895  * KeyWords for NUMBER version
896  *
897  * The is_digit and date_mode fields are not relevant here.
898  * ----------
899  */
900 static const KeyWord NUM_keywords[] = {
901 /* name, len, id is in Index */
902  {",", 1, NUM_COMMA}, /* , */
903  {".", 1, NUM_DEC}, /* . */
904  {"0", 1, NUM_0}, /* 0 */
905  {"9", 1, NUM_9}, /* 9 */
906  {"B", 1, NUM_B}, /* B */
907  {"C", 1, NUM_C}, /* C */
908  {"D", 1, NUM_D}, /* D */
909  {"EEEE", 4, NUM_E}, /* E */
910  {"FM", 2, NUM_FM}, /* F */
911  {"G", 1, NUM_G}, /* G */
912  {"L", 1, NUM_L}, /* L */
913  {"MI", 2, NUM_MI}, /* M */
914  {"PL", 2, NUM_PL}, /* P */
915  {"PR", 2, NUM_PR},
916  {"RN", 2, NUM_RN}, /* R */
917  {"SG", 2, NUM_SG}, /* S */
918  {"SP", 2, NUM_SP},
919  {"S", 1, NUM_S},
920  {"TH", 2, NUM_TH}, /* T */
921  {"V", 1, NUM_V}, /* V */
922  {"b", 1, NUM_B}, /* b */
923  {"c", 1, NUM_C}, /* c */
924  {"d", 1, NUM_D}, /* d */
925  {"eeee", 4, NUM_E}, /* e */
926  {"fm", 2, NUM_FM}, /* f */
927  {"g", 1, NUM_G}, /* g */
928  {"l", 1, NUM_L}, /* l */
929  {"mi", 2, NUM_MI}, /* m */
930  {"pl", 2, NUM_PL}, /* p */
931  {"pr", 2, NUM_PR},
932  {"rn", 2, NUM_rn}, /* r */
933  {"sg", 2, NUM_SG}, /* s */
934  {"sp", 2, NUM_SP},
935  {"s", 1, NUM_S},
936  {"th", 2, NUM_th}, /* t */
937  {"v", 1, NUM_V}, /* v */
938 
939  /* last */
940  {NULL, 0, 0}
941 };
942 
943 
944 /* ----------
945  * KeyWords index for DATE-TIME version
946  * ----------
947  */
948 static const int DCH_index[KeyWord_INDEX_SIZE] = {
949 /*
950 0 1 2 3 4 5 6 7 8 9
951 */
952  /*---- first 0..31 chars are skipped ----*/
953 
954  -1, -1, -1, -1, -1, -1, -1, -1,
955  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
956  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
957  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
958  DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
960  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
961  DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
963  -1, DCH_y_yyy, -1, -1, -1, -1
964 
965  /*---- chars over 126 are skipped ----*/
966 };
967 
968 /* ----------
969  * KeyWords index for NUMBER version
970  * ----------
971  */
972 static const int NUM_index[KeyWord_INDEX_SIZE] = {
973 /*
974 0 1 2 3 4 5 6 7 8 9
975 */
976  /*---- first 0..31 chars are skipped ----*/
977 
978  -1, -1, -1, -1, -1, -1, -1, -1,
979  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
980  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
981  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
982  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
983  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
984  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
985  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
986  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
987  -1, -1, -1, -1, -1, -1
988 
989  /*---- chars over 126 are skipped ----*/
990 };
991 
992 /* ----------
993  * Number processor struct
994  * ----------
995  */
996 typedef struct NUMProc
997 {
999  NUMDesc *Num; /* number description */
1000 
1001  int sign, /* '-' or '+' */
1002  sign_wrote, /* was sign write */
1003  num_count, /* number of write digits */
1004  num_in, /* is inside number */
1005  num_curr, /* current position in number */
1006  out_pre_spaces, /* spaces before first digit */
1007 
1008  read_dec, /* to_number - was read dec. point */
1009  read_post, /* to_number - number of dec. digit */
1010  read_pre; /* to_number - number non-dec. digit */
1011 
1012  char *number, /* string with number */
1013  *number_p, /* pointer to current number position */
1014  *inout, /* in / out buffer */
1015  *inout_p, /* pointer to current inout position */
1016  *last_relevant, /* last relevant number after decimal point */
1017 
1018  *L_negative_sign, /* Locale */
1024 
1025 /* Return flags for DCH_from_char() */
1026 #define DCH_DATED 0x01
1027 #define DCH_TIMED 0x02
1028 #define DCH_ZONED 0x04
1029 
1030 /* ----------
1031  * Functions
1032  * ----------
1033  */
1034 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1035  const int *index);
1036 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1037 static bool is_separator_char(const char *str);
1038 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1039 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1040  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1041 
1042 static void DCH_to_char(FormatNode *node, bool is_interval,
1043  TmToChar *in, char *out, Oid collid);
1044 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1045  Oid collid, bool std, Node *escontext);
1046 
1047 #ifdef DEBUG_TO_FROM_CHAR
1048 static void dump_index(const KeyWord *k, const int *index);
1049 static void dump_node(FormatNode *node, int max);
1050 #endif
1051 
1052 static const char *get_th(char *num, int type);
1053 static char *str_numth(char *dest, char *num, int type);
1054 static int adjust_partial_year_to_2020(int year);
1055 static int strspace_len(const char *str);
1056 static bool from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1057  Node *escontext);
1058 static bool from_char_set_int(int *dest, const int value, const FormatNode *node,
1059  Node *escontext);
1060 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1061  FormatNode *node, Node *escontext);
1062 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1063  Node *escontext);
1064 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1065 static int seq_search_localized(const char *name, char **array, int *len,
1066  Oid collid);
1067 static bool from_char_seq_search(int *dest, const char **src,
1068  const char *const *array,
1069  char **localized_array, Oid collid,
1070  FormatNode *node, Node *escontext);
1071 static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1072  struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
1073  int *fprec, uint32 *flags, Node *escontext);
1074 static char *fill_str(char *str, int c, int max);
1075 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1076 static char *int_to_roman(int number);
1077 static void NUM_prepare_locale(NUMProc *Np);
1078 static char *get_last_relevant_decnum(char *num);
1079 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1080 static void NUM_numpart_to_char(NUMProc *Np, int id);
1081 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1082  char *number, int input_len, int to_char_out_pre_spaces,
1083  int sign, bool is_to_char, Oid collid);
1084 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1085 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1086 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1087 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1088 static NUMCacheEntry *NUM_cache_search(const char *str);
1089 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1090 
1091 
1092 /* ----------
1093  * Fast sequential search, use index for data selection which
1094  * go to seq. cycle (it is very fast for unwanted strings)
1095  * (can't be used binary search in format parsing)
1096  * ----------
1097  */
1098 static const KeyWord *
1099 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1100 {
1101  int poz;
1102 
1103  if (!KeyWord_INDEX_FILTER(*str))
1104  return NULL;
1105 
1106  if ((poz = *(index + (*str - ' '))) > -1)
1107  {
1108  const KeyWord *k = kw + poz;
1109 
1110  do
1111  {
1112  if (strncmp(str, k->name, k->len) == 0)
1113  return k;
1114  k++;
1115  if (!k->name)
1116  return NULL;
1117  } while (*str == *k->name);
1118  }
1119  return NULL;
1120 }
1121 
1122 static const KeySuffix *
1123 suff_search(const char *str, const KeySuffix *suf, int type)
1124 {
1125  const KeySuffix *s;
1126 
1127  for (s = suf; s->name != NULL; s++)
1128  {
1129  if (s->type != type)
1130  continue;
1131 
1132  if (strncmp(str, s->name, s->len) == 0)
1133  return s;
1134  }
1135  return NULL;
1136 }
1137 
1138 static bool
1140 {
1141  /* ASCII printable character, but not letter or digit */
1142  return (*str > 0x20 && *str < 0x7F &&
1143  !(*str >= 'A' && *str <= 'Z') &&
1144  !(*str >= 'a' && *str <= 'z') &&
1145  !(*str >= '0' && *str <= '9'));
1146 }
1147 
1148 /* ----------
1149  * Prepare NUMDesc (number description struct) via FormatNode struct
1150  * ----------
1151  */
1152 static void
1154 {
1155  if (n->type != NODE_TYPE_ACTION)
1156  return;
1157 
1158  if (IS_EEEE(num) && n->key->id != NUM_E)
1159  ereport(ERROR,
1160  (errcode(ERRCODE_SYNTAX_ERROR),
1161  errmsg("\"EEEE\" must be the last pattern used")));
1162 
1163  switch (n->key->id)
1164  {
1165  case NUM_9:
1166  if (IS_BRACKET(num))
1167  ereport(ERROR,
1168  (errcode(ERRCODE_SYNTAX_ERROR),
1169  errmsg("\"9\" must be ahead of \"PR\"")));
1170  if (IS_MULTI(num))
1171  {
1172  ++num->multi;
1173  break;
1174  }
1175  if (IS_DECIMAL(num))
1176  ++num->post;
1177  else
1178  ++num->pre;
1179  break;
1180 
1181  case NUM_0:
1182  if (IS_BRACKET(num))
1183  ereport(ERROR,
1184  (errcode(ERRCODE_SYNTAX_ERROR),
1185  errmsg("\"0\" must be ahead of \"PR\"")));
1186  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1187  {
1188  num->flag |= NUM_F_ZERO;
1189  num->zero_start = num->pre + 1;
1190  }
1191  if (!IS_DECIMAL(num))
1192  ++num->pre;
1193  else
1194  ++num->post;
1195 
1196  num->zero_end = num->pre + num->post;
1197  break;
1198 
1199  case NUM_B:
1200  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1201  num->flag |= NUM_F_BLANK;
1202  break;
1203 
1204  case NUM_D:
1205  num->flag |= NUM_F_LDECIMAL;
1206  num->need_locale = true;
1207  /* FALLTHROUGH */
1208  case NUM_DEC:
1209  if (IS_DECIMAL(num))
1210  ereport(ERROR,
1211  (errcode(ERRCODE_SYNTAX_ERROR),
1212  errmsg("multiple decimal points")));
1213  if (IS_MULTI(num))
1214  ereport(ERROR,
1215  (errcode(ERRCODE_SYNTAX_ERROR),
1216  errmsg("cannot use \"V\" and decimal point together")));
1217  num->flag |= NUM_F_DECIMAL;
1218  break;
1219 
1220  case NUM_FM:
1221  num->flag |= NUM_F_FILLMODE;
1222  break;
1223 
1224  case NUM_S:
1225  if (IS_LSIGN(num))
1226  ereport(ERROR,
1227  (errcode(ERRCODE_SYNTAX_ERROR),
1228  errmsg("cannot use \"S\" twice")));
1229  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1230  ereport(ERROR,
1231  (errcode(ERRCODE_SYNTAX_ERROR),
1232  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1233  if (!IS_DECIMAL(num))
1234  {
1235  num->lsign = NUM_LSIGN_PRE;
1236  num->pre_lsign_num = num->pre;
1237  num->need_locale = true;
1238  num->flag |= NUM_F_LSIGN;
1239  }
1240  else if (num->lsign == NUM_LSIGN_NONE)
1241  {
1242  num->lsign = NUM_LSIGN_POST;
1243  num->need_locale = true;
1244  num->flag |= NUM_F_LSIGN;
1245  }
1246  break;
1247 
1248  case NUM_MI:
1249  if (IS_LSIGN(num))
1250  ereport(ERROR,
1251  (errcode(ERRCODE_SYNTAX_ERROR),
1252  errmsg("cannot use \"S\" and \"MI\" together")));
1253  num->flag |= NUM_F_MINUS;
1254  if (IS_DECIMAL(num))
1255  num->flag |= NUM_F_MINUS_POST;
1256  break;
1257 
1258  case NUM_PL:
1259  if (IS_LSIGN(num))
1260  ereport(ERROR,
1261  (errcode(ERRCODE_SYNTAX_ERROR),
1262  errmsg("cannot use \"S\" and \"PL\" together")));
1263  num->flag |= NUM_F_PLUS;
1264  if (IS_DECIMAL(num))
1265  num->flag |= NUM_F_PLUS_POST;
1266  break;
1267 
1268  case NUM_SG:
1269  if (IS_LSIGN(num))
1270  ereport(ERROR,
1271  (errcode(ERRCODE_SYNTAX_ERROR),
1272  errmsg("cannot use \"S\" and \"SG\" together")));
1273  num->flag |= NUM_F_MINUS;
1274  num->flag |= NUM_F_PLUS;
1275  break;
1276 
1277  case NUM_PR:
1278  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1279  ereport(ERROR,
1280  (errcode(ERRCODE_SYNTAX_ERROR),
1281  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1282  num->flag |= NUM_F_BRACKET;
1283  break;
1284 
1285  case NUM_rn:
1286  case NUM_RN:
1287  num->flag |= NUM_F_ROMAN;
1288  break;
1289 
1290  case NUM_L:
1291  case NUM_G:
1292  num->need_locale = true;
1293  break;
1294 
1295  case NUM_V:
1296  if (IS_DECIMAL(num))
1297  ereport(ERROR,
1298  (errcode(ERRCODE_SYNTAX_ERROR),
1299  errmsg("cannot use \"V\" and decimal point together")));
1300  num->flag |= NUM_F_MULTI;
1301  break;
1302 
1303  case NUM_E:
1304  if (IS_EEEE(num))
1305  ereport(ERROR,
1306  (errcode(ERRCODE_SYNTAX_ERROR),
1307  errmsg("cannot use \"EEEE\" twice")));
1308  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1309  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1310  IS_ROMAN(num) || IS_MULTI(num))
1311  ereport(ERROR,
1312  (errcode(ERRCODE_SYNTAX_ERROR),
1313  errmsg("\"EEEE\" is incompatible with other formats"),
1314  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1315  num->flag |= NUM_F_EEEE;
1316  break;
1317  }
1318 }
1319 
1320 /* ----------
1321  * Format parser, search small keywords and keyword's suffixes, and make
1322  * format-node tree.
1323  *
1324  * for DATE-TIME & NUMBER version
1325  * ----------
1326  */
1327 static void
1328 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1329  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1330 {
1331  FormatNode *n;
1332 
1333 #ifdef DEBUG_TO_FROM_CHAR
1334  elog(DEBUG_elog_output, "to_char/number(): run parser");
1335 #endif
1336 
1337  n = node;
1338 
1339  while (*str)
1340  {
1341  int suffix = 0;
1342  const KeySuffix *s;
1343 
1344  /*
1345  * Prefix
1346  */
1347  if ((flags & DCH_FLAG) &&
1348  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1349  {
1350  suffix |= s->id;
1351  if (s->len)
1352  str += s->len;
1353  }
1354 
1355  /*
1356  * Keyword
1357  */
1358  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1359  {
1360  n->type = NODE_TYPE_ACTION;
1361  n->suffix = suffix;
1362  if (n->key->len)
1363  str += n->key->len;
1364 
1365  /*
1366  * NUM version: Prepare global NUMDesc struct
1367  */
1368  if (flags & NUM_FLAG)
1369  NUMDesc_prepare(Num, n);
1370 
1371  /*
1372  * Postfix
1373  */
1374  if ((flags & DCH_FLAG) && *str &&
1375  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1376  {
1377  n->suffix |= s->id;
1378  if (s->len)
1379  str += s->len;
1380  }
1381 
1382  n++;
1383  }
1384  else if (*str)
1385  {
1386  int chlen;
1387 
1388  if ((flags & STD_FLAG) && *str != '"')
1389  {
1390  /*
1391  * Standard mode, allow only following separators: "-./,':; ".
1392  * However, we support double quotes even in standard mode
1393  * (see below). This is our extension of standard mode.
1394  */
1395  if (strchr("-./,':; ", *str) == NULL)
1396  ereport(ERROR,
1397  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1398  errmsg("invalid datetime format separator: \"%s\"",
1399  pnstrdup(str, pg_mblen(str)))));
1400 
1401  if (*str == ' ')
1402  n->type = NODE_TYPE_SPACE;
1403  else
1405 
1406  n->character[0] = *str;
1407  n->character[1] = '\0';
1408  n->key = NULL;
1409  n->suffix = 0;
1410  n++;
1411  str++;
1412  }
1413  else if (*str == '"')
1414  {
1415  /*
1416  * Process double-quoted literal string, if any
1417  */
1418  str++;
1419  while (*str)
1420  {
1421  if (*str == '"')
1422  {
1423  str++;
1424  break;
1425  }
1426  /* backslash quotes the next character, if any */
1427  if (*str == '\\' && *(str + 1))
1428  str++;
1429  chlen = pg_mblen(str);
1430  n->type = NODE_TYPE_CHAR;
1431  memcpy(n->character, str, chlen);
1432  n->character[chlen] = '\0';
1433  n->key = NULL;
1434  n->suffix = 0;
1435  n++;
1436  str += chlen;
1437  }
1438  }
1439  else
1440  {
1441  /*
1442  * Outside double-quoted strings, backslash is only special if
1443  * it immediately precedes a double quote.
1444  */
1445  if (*str == '\\' && *(str + 1) == '"')
1446  str++;
1447  chlen = pg_mblen(str);
1448 
1449  if ((flags & DCH_FLAG) && is_separator_char(str))
1451  else if (isspace((unsigned char) *str))
1452  n->type = NODE_TYPE_SPACE;
1453  else
1454  n->type = NODE_TYPE_CHAR;
1455 
1456  memcpy(n->character, str, chlen);
1457  n->character[chlen] = '\0';
1458  n->key = NULL;
1459  n->suffix = 0;
1460  n++;
1461  str += chlen;
1462  }
1463  }
1464  }
1465 
1466  n->type = NODE_TYPE_END;
1467  n->suffix = 0;
1468 }
1469 
1470 /* ----------
1471  * DEBUG: Dump the FormatNode Tree (debug)
1472  * ----------
1473  */
1474 #ifdef DEBUG_TO_FROM_CHAR
1475 
1476 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1477 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1478 
1479 static void
1480 dump_node(FormatNode *node, int max)
1481 {
1482  FormatNode *n;
1483  int a;
1484 
1485  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1486 
1487  for (a = 0, n = node; a <= max; n++, a++)
1488  {
1489  if (n->type == NODE_TYPE_ACTION)
1490  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1491  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1492  else if (n->type == NODE_TYPE_CHAR)
1493  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1494  a, n->character);
1495  else if (n->type == NODE_TYPE_END)
1496  {
1497  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1498  return;
1499  }
1500  else
1501  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1502  }
1503 }
1504 #endif /* DEBUG */
1505 
1506 /*****************************************************************************
1507  * Private utils
1508  *****************************************************************************/
1509 
1510 /* ----------
1511  * Return ST/ND/RD/TH for simple (1..9) numbers
1512  * type --> 0 upper, 1 lower
1513  * ----------
1514  */
1515 static const char *
1516 get_th(char *num, int type)
1517 {
1518  int len = strlen(num),
1519  last;
1520 
1521  last = *(num + (len - 1));
1522  if (!isdigit((unsigned char) last))
1523  ereport(ERROR,
1524  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1525  errmsg("\"%s\" is not a number", num)));
1526 
1527  /*
1528  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1529  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1530  */
1531  if ((len > 1) && (num[len - 2] == '1'))
1532  last = 0;
1533 
1534  switch (last)
1535  {
1536  case '1':
1537  if (type == TH_UPPER)
1538  return numTH[0];
1539  return numth[0];
1540  case '2':
1541  if (type == TH_UPPER)
1542  return numTH[1];
1543  return numth[1];
1544  case '3':
1545  if (type == TH_UPPER)
1546  return numTH[2];
1547  return numth[2];
1548  default:
1549  if (type == TH_UPPER)
1550  return numTH[3];
1551  return numth[3];
1552  }
1553 }
1554 
1555 /* ----------
1556  * Convert string-number to ordinal string-number
1557  * type --> 0 upper, 1 lower
1558  * ----------
1559  */
1560 static char *
1561 str_numth(char *dest, char *num, int type)
1562 {
1563  if (dest != num)
1564  strcpy(dest, num);
1565  strcat(dest, get_th(num, type));
1566  return dest;
1567 }
1568 
1569 /*****************************************************************************
1570  * upper/lower/initcap functions
1571  *****************************************************************************/
1572 
1573 #ifdef USE_ICU
1574 
1575 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1576  const UChar *src, int32_t srcLength,
1577  const char *locale,
1578  UErrorCode *pErrorCode);
1579 
1580 static int32_t
1581 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1582  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1583 {
1584  UErrorCode status;
1585  int32_t len_dest;
1586 
1587  len_dest = len_source; /* try first with same length */
1588  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1589  status = U_ZERO_ERROR;
1590  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1591  mylocale->info.icu.locale, &status);
1592  if (status == U_BUFFER_OVERFLOW_ERROR)
1593  {
1594  /* try again with adjusted length */
1595  pfree(*buff_dest);
1596  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1597  status = U_ZERO_ERROR;
1598  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1599  mylocale->info.icu.locale, &status);
1600  }
1601  if (U_FAILURE(status))
1602  ereport(ERROR,
1603  (errmsg("case conversion failed: %s", u_errorName(status))));
1604  return len_dest;
1605 }
1606 
1607 static int32_t
1608 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1609  const UChar *src, int32_t srcLength,
1610  const char *locale,
1611  UErrorCode *pErrorCode)
1612 {
1613  return u_strToTitle(dest, destCapacity, src, srcLength,
1614  NULL, locale, pErrorCode);
1615 }
1616 
1617 #endif /* USE_ICU */
1618 
1619 /*
1620  * If the system provides the needed functions for wide-character manipulation
1621  * (which are all standardized by C99), then we implement upper/lower/initcap
1622  * using wide-character functions, if necessary. Otherwise we use the
1623  * traditional <ctype.h> functions, which of course will not work as desired
1624  * in multibyte character sets. Note that in either case we are effectively
1625  * assuming that the database character encoding matches the encoding implied
1626  * by LC_CTYPE.
1627  */
1628 
1629 /*
1630  * collation-aware, wide-character-aware lower function
1631  *
1632  * We pass the number of bytes so we can pass varlena and char*
1633  * to this function. The result is a palloc'd, null-terminated string.
1634  */
1635 char *
1636 str_tolower(const char *buff, size_t nbytes, Oid collid)
1637 {
1638  char *result;
1639  pg_locale_t mylocale;
1640 
1641  if (!buff)
1642  return NULL;
1643 
1644  if (!OidIsValid(collid))
1645  {
1646  /*
1647  * This typically means that the parser could not resolve a conflict
1648  * of implicit collations, so report it that way.
1649  */
1650  ereport(ERROR,
1651  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1652  errmsg("could not determine which collation to use for %s function",
1653  "lower()"),
1654  errhint("Use the COLLATE clause to set the collation explicitly.")));
1655  }
1656 
1657  mylocale = pg_newlocale_from_collation(collid);
1658 
1659  /* C/POSIX collations use this path regardless of database encoding */
1660  if (mylocale->ctype_is_c)
1661  {
1662  result = asc_tolower(buff, nbytes);
1663  }
1664  else
1665  {
1666 #ifdef USE_ICU
1667  if (mylocale->provider == COLLPROVIDER_ICU)
1668  {
1669  int32_t len_uchar;
1670  int32_t len_conv;
1671  UChar *buff_uchar;
1672  UChar *buff_conv;
1673 
1674  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1675  len_conv = icu_convert_case(u_strToLower, mylocale,
1676  &buff_conv, buff_uchar, len_uchar);
1677  icu_from_uchar(&result, buff_conv, len_conv);
1678  pfree(buff_uchar);
1679  pfree(buff_conv);
1680  }
1681  else
1682 #endif
1683  if (mylocale->provider == COLLPROVIDER_BUILTIN)
1684  {
1685  const char *src = buff;
1686  size_t srclen = nbytes;
1687  size_t dstsize;
1688  char *dst;
1689  size_t needed;
1690 
1692 
1693  /* first try buffer of equal size plus terminating NUL */
1694  dstsize = srclen + 1;
1695  dst = palloc(dstsize);
1696 
1697  needed = unicode_strlower(dst, dstsize, src, srclen);
1698  if (needed + 1 > dstsize)
1699  {
1700  /* grow buffer if needed and retry */
1701  dstsize = needed + 1;
1702  dst = repalloc(dst, dstsize);
1703  needed = unicode_strlower(dst, dstsize, src, srclen);
1704  Assert(needed + 1 == dstsize);
1705  }
1706 
1707  Assert(dst[needed] == '\0');
1708  result = dst;
1709  }
1710  else
1711  {
1712  Assert(mylocale->provider == COLLPROVIDER_LIBC);
1713 
1715  {
1716  wchar_t *workspace;
1717  size_t curr_char;
1718  size_t result_size;
1719 
1720  /* Overflow paranoia */
1721  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1722  ereport(ERROR,
1723  (errcode(ERRCODE_OUT_OF_MEMORY),
1724  errmsg("out of memory")));
1725 
1726  /* Output workspace cannot have more codes than input bytes */
1727  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1728 
1729  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1730 
1731  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1732  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1733 
1734  /*
1735  * Make result large enough; case change might change number
1736  * of bytes
1737  */
1738  result_size = curr_char * pg_database_encoding_max_length() + 1;
1739  result = palloc(result_size);
1740 
1741  wchar2char(result, workspace, result_size, mylocale);
1742  pfree(workspace);
1743  }
1744  else
1745  {
1746  char *p;
1747 
1748  result = pnstrdup(buff, nbytes);
1749 
1750  /*
1751  * Note: we assume that tolower_l() will not be so broken as
1752  * to need an isupper_l() guard test. When using the default
1753  * collation, we apply the traditional Postgres behavior that
1754  * forces ASCII-style treatment of I/i, but in non-default
1755  * collations you get exactly what the collation says.
1756  */
1757  for (p = result; *p; p++)
1758  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1759  }
1760  }
1761  }
1762 
1763  return result;
1764 }
1765 
1766 /*
1767  * collation-aware, wide-character-aware upper function
1768  *
1769  * We pass the number of bytes so we can pass varlena and char*
1770  * to this function. The result is a palloc'd, null-terminated string.
1771  */
1772 char *
1773 str_toupper(const char *buff, size_t nbytes, Oid collid)
1774 {
1775  char *result;
1776  pg_locale_t mylocale;
1777 
1778  if (!buff)
1779  return NULL;
1780 
1781  if (!OidIsValid(collid))
1782  {
1783  /*
1784  * This typically means that the parser could not resolve a conflict
1785  * of implicit collations, so report it that way.
1786  */
1787  ereport(ERROR,
1788  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1789  errmsg("could not determine which collation to use for %s function",
1790  "upper()"),
1791  errhint("Use the COLLATE clause to set the collation explicitly.")));
1792  }
1793 
1794  mylocale = pg_newlocale_from_collation(collid);
1795 
1796  /* C/POSIX collations use this path regardless of database encoding */
1797  if (mylocale->ctype_is_c)
1798  {
1799  result = asc_toupper(buff, nbytes);
1800  }
1801  else
1802  {
1803 #ifdef USE_ICU
1804  if (mylocale->provider == COLLPROVIDER_ICU)
1805  {
1806  int32_t len_uchar,
1807  len_conv;
1808  UChar *buff_uchar;
1809  UChar *buff_conv;
1810 
1811  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1812  len_conv = icu_convert_case(u_strToUpper, mylocale,
1813  &buff_conv, buff_uchar, len_uchar);
1814  icu_from_uchar(&result, buff_conv, len_conv);
1815  pfree(buff_uchar);
1816  pfree(buff_conv);
1817  }
1818  else
1819 #endif
1820  if (mylocale->provider == COLLPROVIDER_BUILTIN)
1821  {
1822  const char *src = buff;
1823  size_t srclen = nbytes;
1824  size_t dstsize;
1825  char *dst;
1826  size_t needed;
1827 
1829 
1830  /* first try buffer of equal size plus terminating NUL */
1831  dstsize = srclen + 1;
1832  dst = palloc(dstsize);
1833 
1834  needed = unicode_strupper(dst, dstsize, src, srclen);
1835  if (needed + 1 > dstsize)
1836  {
1837  /* grow buffer if needed and retry */
1838  dstsize = needed + 1;
1839  dst = repalloc(dst, dstsize);
1840  needed = unicode_strupper(dst, dstsize, src, srclen);
1841  Assert(needed + 1 == dstsize);
1842  }
1843 
1844  Assert(dst[needed] == '\0');
1845  result = dst;
1846  }
1847  else
1848  {
1849  Assert(mylocale->provider == COLLPROVIDER_LIBC);
1850 
1852  {
1853  wchar_t *workspace;
1854  size_t curr_char;
1855  size_t result_size;
1856 
1857  /* Overflow paranoia */
1858  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1859  ereport(ERROR,
1860  (errcode(ERRCODE_OUT_OF_MEMORY),
1861  errmsg("out of memory")));
1862 
1863  /* Output workspace cannot have more codes than input bytes */
1864  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1865 
1866  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1867 
1868  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1869  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1870 
1871  /*
1872  * Make result large enough; case change might change number
1873  * of bytes
1874  */
1875  result_size = curr_char * pg_database_encoding_max_length() + 1;
1876  result = palloc(result_size);
1877 
1878  wchar2char(result, workspace, result_size, mylocale);
1879  pfree(workspace);
1880  }
1881  else
1882  {
1883  char *p;
1884 
1885  result = pnstrdup(buff, nbytes);
1886 
1887  /*
1888  * Note: we assume that toupper_l() will not be so broken as
1889  * to need an islower_l() guard test. When using the default
1890  * collation, we apply the traditional Postgres behavior that
1891  * forces ASCII-style treatment of I/i, but in non-default
1892  * collations you get exactly what the collation says.
1893  */
1894  for (p = result; *p; p++)
1895  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1896  }
1897  }
1898  }
1899 
1900  return result;
1901 }
1902 
1904 {
1905  const char *str;
1906  size_t len;
1907  size_t offset;
1908  bool init;
1910 };
1911 
1912 /*
1913  * Simple word boundary iterator that draws boundaries each time the result of
1914  * pg_u_isalnum() changes.
1915  */
1916 static size_t
1918 {
1919  struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
1920 
1921  while (wbstate->offset < wbstate->len &&
1922  wbstate->str[wbstate->offset] != '\0')
1923  {
1924  pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
1925  wbstate->offset);
1926  bool curr_alnum = pg_u_isalnum(u, true);
1927 
1928  if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
1929  {
1930  size_t prev_offset = wbstate->offset;
1931 
1932  wbstate->init = true;
1933  wbstate->offset += unicode_utf8len(u);
1934  wbstate->prev_alnum = curr_alnum;
1935  return prev_offset;
1936  }
1937 
1938  wbstate->offset += unicode_utf8len(u);
1939  }
1940 
1941  return wbstate->len;
1942 }
1943 
1944 /*
1945  * collation-aware, wide-character-aware initcap function
1946  *
1947  * We pass the number of bytes so we can pass varlena and char*
1948  * to this function. The result is a palloc'd, null-terminated string.
1949  */
1950 char *
1951 str_initcap(const char *buff, size_t nbytes, Oid collid)
1952 {
1953  char *result;
1954  int wasalnum = false;
1955  pg_locale_t mylocale;
1956 
1957  if (!buff)
1958  return NULL;
1959 
1960  if (!OidIsValid(collid))
1961  {
1962  /*
1963  * This typically means that the parser could not resolve a conflict
1964  * of implicit collations, so report it that way.
1965  */
1966  ereport(ERROR,
1967  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1968  errmsg("could not determine which collation to use for %s function",
1969  "initcap()"),
1970  errhint("Use the COLLATE clause to set the collation explicitly.")));
1971  }
1972 
1973  mylocale = pg_newlocale_from_collation(collid);
1974 
1975  /* C/POSIX collations use this path regardless of database encoding */
1976  if (mylocale->ctype_is_c)
1977  {
1978  result = asc_initcap(buff, nbytes);
1979  }
1980  else
1981  {
1982 #ifdef USE_ICU
1983  if (mylocale->provider == COLLPROVIDER_ICU)
1984  {
1985  int32_t len_uchar,
1986  len_conv;
1987  UChar *buff_uchar;
1988  UChar *buff_conv;
1989 
1990  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1991  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1992  &buff_conv, buff_uchar, len_uchar);
1993  icu_from_uchar(&result, buff_conv, len_conv);
1994  pfree(buff_uchar);
1995  pfree(buff_conv);
1996  }
1997  else
1998 #endif
1999  if (mylocale->provider == COLLPROVIDER_BUILTIN)
2000  {
2001  const char *src = buff;
2002  size_t srclen = nbytes;
2003  size_t dstsize;
2004  char *dst;
2005  size_t needed;
2006  struct WordBoundaryState wbstate = {
2007  .str = src,
2008  .len = srclen,
2009  .offset = 0,
2010  .init = false,
2011  .prev_alnum = false,
2012  };
2013 
2015 
2016  /* first try buffer of equal size plus terminating NUL */
2017  dstsize = srclen + 1;
2018  dst = palloc(dstsize);
2019 
2020  needed = unicode_strtitle(dst, dstsize, src, srclen,
2021  initcap_wbnext, &wbstate);
2022  if (needed + 1 > dstsize)
2023  {
2024  /* reset iterator */
2025  wbstate.offset = 0;
2026  wbstate.init = false;
2027 
2028  /* grow buffer if needed and retry */
2029  dstsize = needed + 1;
2030  dst = repalloc(dst, dstsize);
2031  needed = unicode_strtitle(dst, dstsize, src, srclen,
2032  initcap_wbnext, &wbstate);
2033  Assert(needed + 1 == dstsize);
2034  }
2035 
2036  result = dst;
2037  }
2038  else
2039  {
2040  Assert(mylocale->provider == COLLPROVIDER_LIBC);
2041 
2043  {
2044  wchar_t *workspace;
2045  size_t curr_char;
2046  size_t result_size;
2047 
2048  /* Overflow paranoia */
2049  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
2050  ereport(ERROR,
2051  (errcode(ERRCODE_OUT_OF_MEMORY),
2052  errmsg("out of memory")));
2053 
2054  /* Output workspace cannot have more codes than input bytes */
2055  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
2056 
2057  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
2058 
2059  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
2060  {
2061  if (wasalnum)
2062  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
2063  else
2064  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
2065  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
2066  }
2067 
2068  /*
2069  * Make result large enough; case change might change number
2070  * of bytes
2071  */
2072  result_size = curr_char * pg_database_encoding_max_length() + 1;
2073  result = palloc(result_size);
2074 
2075  wchar2char(result, workspace, result_size, mylocale);
2076  pfree(workspace);
2077  }
2078  else
2079  {
2080  char *p;
2081 
2082  result = pnstrdup(buff, nbytes);
2083 
2084  /*
2085  * Note: we assume that toupper_l()/tolower_l() will not be so
2086  * broken as to need guard tests. When using the default
2087  * collation, we apply the traditional Postgres behavior that
2088  * forces ASCII-style treatment of I/i, but in non-default
2089  * collations you get exactly what the collation says.
2090  */
2091  for (p = result; *p; p++)
2092  {
2093  if (wasalnum)
2094  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2095  else
2096  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2097  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2098  }
2099  }
2100  }
2101  }
2102 
2103  return result;
2104 }
2105 
2106 /*
2107  * ASCII-only lower function
2108  *
2109  * We pass the number of bytes so we can pass varlena and char*
2110  * to this function. The result is a palloc'd, null-terminated string.
2111  */
2112 char *
2113 asc_tolower(const char *buff, size_t nbytes)
2114 {
2115  char *result;
2116  char *p;
2117 
2118  if (!buff)
2119  return NULL;
2120 
2121  result = pnstrdup(buff, nbytes);
2122 
2123  for (p = result; *p; p++)
2124  *p = pg_ascii_tolower((unsigned char) *p);
2125 
2126  return result;
2127 }
2128 
2129 /*
2130  * ASCII-only upper function
2131  *
2132  * We pass the number of bytes so we can pass varlena and char*
2133  * to this function. The result is a palloc'd, null-terminated string.
2134  */
2135 char *
2136 asc_toupper(const char *buff, size_t nbytes)
2137 {
2138  char *result;
2139  char *p;
2140 
2141  if (!buff)
2142  return NULL;
2143 
2144  result = pnstrdup(buff, nbytes);
2145 
2146  for (p = result; *p; p++)
2147  *p = pg_ascii_toupper((unsigned char) *p);
2148 
2149  return result;
2150 }
2151 
2152 /*
2153  * ASCII-only initcap function
2154  *
2155  * We pass the number of bytes so we can pass varlena and char*
2156  * to this function. The result is a palloc'd, null-terminated string.
2157  */
2158 char *
2159 asc_initcap(const char *buff, size_t nbytes)
2160 {
2161  char *result;
2162  char *p;
2163  int wasalnum = false;
2164 
2165  if (!buff)
2166  return NULL;
2167 
2168  result = pnstrdup(buff, nbytes);
2169 
2170  for (p = result; *p; p++)
2171  {
2172  char c;
2173 
2174  if (wasalnum)
2175  *p = c = pg_ascii_tolower((unsigned char) *p);
2176  else
2177  *p = c = pg_ascii_toupper((unsigned char) *p);
2178  /* we don't trust isalnum() here */
2179  wasalnum = ((c >= 'A' && c <= 'Z') ||
2180  (c >= 'a' && c <= 'z') ||
2181  (c >= '0' && c <= '9'));
2182  }
2183 
2184  return result;
2185 }
2186 
2187 /* convenience routines for when the input is null-terminated */
2188 
2189 static char *
2190 str_tolower_z(const char *buff, Oid collid)
2191 {
2192  return str_tolower(buff, strlen(buff), collid);
2193 }
2194 
2195 static char *
2196 str_toupper_z(const char *buff, Oid collid)
2197 {
2198  return str_toupper(buff, strlen(buff), collid);
2199 }
2200 
2201 static char *
2202 str_initcap_z(const char *buff, Oid collid)
2203 {
2204  return str_initcap(buff, strlen(buff), collid);
2205 }
2206 
2207 static char *
2208 asc_tolower_z(const char *buff)
2209 {
2210  return asc_tolower(buff, strlen(buff));
2211 }
2212 
2213 static char *
2214 asc_toupper_z(const char *buff)
2215 {
2216  return asc_toupper(buff, strlen(buff));
2217 }
2218 
2219 /* asc_initcap_z is not currently needed */
2220 
2221 
2222 /* ----------
2223  * Skip TM / th in FROM_CHAR
2224  *
2225  * If S_THth is on, skip two chars, assuming there are two available
2226  * ----------
2227  */
2228 #define SKIP_THth(ptr, _suf) \
2229  do { \
2230  if (S_THth(_suf)) \
2231  { \
2232  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2233  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2234  } \
2235  } while (0)
2236 
2237 
2238 #ifdef DEBUG_TO_FROM_CHAR
2239 /* -----------
2240  * DEBUG: Call for debug and for index checking; (Show ASCII char
2241  * and defined keyword for each used position
2242  * ----------
2243  */
2244 static void
2245 dump_index(const KeyWord *k, const int *index)
2246 {
2247  int i,
2248  count = 0,
2249  free_i = 0;
2250 
2251  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2252 
2253  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2254  {
2255  if (index[i] != -1)
2256  {
2257  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2258  count++;
2259  }
2260  else
2261  {
2262  free_i++;
2263  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2264  }
2265  }
2266  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2267  count, free_i);
2268 }
2269 #endif /* DEBUG */
2270 
2271 /* ----------
2272  * Return true if next format picture is not digit value
2273  * ----------
2274  */
2275 static bool
2277 {
2278  if (n->type == NODE_TYPE_END)
2279  return false;
2280 
2281  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2282  return true;
2283 
2284  /*
2285  * Next node
2286  */
2287  n++;
2288 
2289  /* end of format string is treated like a non-digit separator */
2290  if (n->type == NODE_TYPE_END)
2291  return true;
2292 
2293  if (n->type == NODE_TYPE_ACTION)
2294  {
2295  if (n->key->is_digit)
2296  return false;
2297 
2298  return true;
2299  }
2300  else if (n->character[1] == '\0' &&
2301  isdigit((unsigned char) n->character[0]))
2302  return false;
2303 
2304  return true; /* some non-digit input (separator) */
2305 }
2306 
2307 
2308 static int
2310 {
2311  /*
2312  * Adjust all dates toward 2020; this is effectively what happens when we
2313  * assume '70' is 1970 and '69' is 2069.
2314  */
2315  /* Force 0-69 into the 2000's */
2316  if (year < 70)
2317  return year + 2000;
2318  /* Force 70-99 into the 1900's */
2319  else if (year < 100)
2320  return year + 1900;
2321  /* Force 100-519 into the 2000's */
2322  else if (year < 520)
2323  return year + 2000;
2324  /* Force 520-999 into the 1000's */
2325  else if (year < 1000)
2326  return year + 1000;
2327  else
2328  return year;
2329 }
2330 
2331 
2332 static int
2333 strspace_len(const char *str)
2334 {
2335  int len = 0;
2336 
2337  while (*str && isspace((unsigned char) *str))
2338  {
2339  str++;
2340  len++;
2341  }
2342  return len;
2343 }
2344 
2345 /*
2346  * Set the date mode of a from-char conversion.
2347  *
2348  * Puke if the date mode has already been set, and the caller attempts to set
2349  * it to a conflicting mode.
2350  *
2351  * Returns true on success, false on failure (if escontext points to an
2352  * ErrorSaveContext; otherwise errors are thrown).
2353  */
2354 static bool
2356  Node *escontext)
2357 {
2358  if (mode != FROM_CHAR_DATE_NONE)
2359  {
2360  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2361  tmfc->mode = mode;
2362  else if (tmfc->mode != mode)
2363  ereturn(escontext, false,
2364  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2365  errmsg("invalid combination of date conventions"),
2366  errhint("Do not mix Gregorian and ISO week date "
2367  "conventions in a formatting template.")));
2368  }
2369  return true;
2370 }
2371 
2372 /*
2373  * Set the integer pointed to by 'dest' to the given value.
2374  *
2375  * Puke if the destination integer has previously been set to some other
2376  * non-zero value.
2377  *
2378  * Returns true on success, false on failure (if escontext points to an
2379  * ErrorSaveContext; otherwise errors are thrown).
2380  */
2381 static bool
2382 from_char_set_int(int *dest, const int value, const FormatNode *node,
2383  Node *escontext)
2384 {
2385  if (*dest != 0 && *dest != value)
2386  ereturn(escontext, false,
2387  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2388  errmsg("conflicting values for \"%s\" field in formatting string",
2389  node->key->name),
2390  errdetail("This value contradicts a previous setting "
2391  "for the same field type.")));
2392  *dest = value;
2393  return true;
2394 }
2395 
2396 /*
2397  * Read a single integer from the source string, into the int pointed to by
2398  * 'dest'. If 'dest' is NULL, the result is discarded.
2399  *
2400  * In fixed-width mode (the node does not have the FM suffix), consume at most
2401  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2402  *
2403  * We use strtol() to recover the integer value from the source string, in
2404  * accordance with the given FormatNode.
2405  *
2406  * If the conversion completes successfully, src will have been advanced to
2407  * point at the character immediately following the last character used in the
2408  * conversion.
2409  *
2410  * Returns the number of characters consumed, or -1 on error (if escontext
2411  * points to an ErrorSaveContext; otherwise errors are thrown).
2412  *
2413  * Note that from_char_parse_int() provides a more convenient wrapper where
2414  * the length of the field is the same as the length of the format keyword (as
2415  * with DD and MI).
2416  */
2417 static int
2418 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2419  Node *escontext)
2420 {
2421  long result;
2422  char copy[DCH_MAX_ITEM_SIZ + 1];
2423  const char *init = *src;
2424  int used;
2425 
2426  /*
2427  * Skip any whitespace before parsing the integer.
2428  */
2429  *src += strspace_len(*src);
2430 
2432  used = (int) strlcpy(copy, *src, len + 1);
2433 
2434  if (S_FM(node->suffix) || is_next_separator(node))
2435  {
2436  /*
2437  * This node is in Fill Mode, or the next node is known to be a
2438  * non-digit value, so we just slurp as many characters as we can get.
2439  */
2440  char *endptr;
2441 
2442  errno = 0;
2443  result = strtol(init, &endptr, 10);
2444  *src = endptr;
2445  }
2446  else
2447  {
2448  /*
2449  * We need to pull exactly the number of characters given in 'len' out
2450  * of the string, and convert those.
2451  */
2452  char *last;
2453 
2454  if (used < len)
2455  ereturn(escontext, -1,
2456  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2457  errmsg("source string too short for \"%s\" formatting field",
2458  node->key->name),
2459  errdetail("Field requires %d characters, but only %d remain.",
2460  len, used),
2461  errhint("If your source string is not fixed-width, "
2462  "try using the \"FM\" modifier.")));
2463 
2464  errno = 0;
2465  result = strtol(copy, &last, 10);
2466  used = last - copy;
2467 
2468  if (used > 0 && used < len)
2469  ereturn(escontext, -1,
2470  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2471  errmsg("invalid value \"%s\" for \"%s\"",
2472  copy, node->key->name),
2473  errdetail("Field requires %d characters, but only %d could be parsed.",
2474  len, used),
2475  errhint("If your source string is not fixed-width, "
2476  "try using the \"FM\" modifier.")));
2477 
2478  *src += used;
2479  }
2480 
2481  if (*src == init)
2482  ereturn(escontext, -1,
2483  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2484  errmsg("invalid value \"%s\" for \"%s\"",
2485  copy, node->key->name),
2486  errdetail("Value must be an integer.")));
2487 
2488  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2489  ereturn(escontext, -1,
2490  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2491  errmsg("value for \"%s\" in source string is out of range",
2492  node->key->name),
2493  errdetail("Value must be in the range %d to %d.",
2494  INT_MIN, INT_MAX)));
2495 
2496  if (dest != NULL)
2497  {
2498  if (!from_char_set_int(dest, (int) result, node, escontext))
2499  return -1;
2500  }
2501 
2502  return *src - init;
2503 }
2504 
2505 /*
2506  * Call from_char_parse_int_len(), using the length of the format keyword as
2507  * the expected length of the field.
2508  *
2509  * Don't call this function if the field differs in length from the format
2510  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2511  * In such cases, call from_char_parse_int_len() instead to specify the
2512  * required length explicitly.
2513  */
2514 static int
2515 from_char_parse_int(int *dest, const char **src, FormatNode *node,
2516  Node *escontext)
2517 {
2518  return from_char_parse_int_len(dest, src, node->key->len, node, escontext);
2519 }
2520 
2521 /*
2522  * Sequentially search null-terminated "array" for a case-insensitive match
2523  * to the initial character(s) of "name".
2524  *
2525  * Returns array index of match, or -1 for no match.
2526  *
2527  * *len is set to the length of the match, or 0 for no match.
2528  *
2529  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2530  * suitable for comparisons to ASCII strings.
2531  */
2532 static int
2533 seq_search_ascii(const char *name, const char *const *array, int *len)
2534 {
2535  unsigned char firstc;
2536  const char *const *a;
2537 
2538  *len = 0;
2539 
2540  /* empty string can't match anything */
2541  if (!*name)
2542  return -1;
2543 
2544  /* we handle first char specially to gain some speed */
2545  firstc = pg_ascii_tolower((unsigned char) *name);
2546 
2547  for (a = array; *a != NULL; a++)
2548  {
2549  const char *p;
2550  const char *n;
2551 
2552  /* compare first chars */
2553  if (pg_ascii_tolower((unsigned char) **a) != firstc)
2554  continue;
2555 
2556  /* compare rest of string */
2557  for (p = *a + 1, n = name + 1;; p++, n++)
2558  {
2559  /* return success if we matched whole array entry */
2560  if (*p == '\0')
2561  {
2562  *len = n - name;
2563  return a - array;
2564  }
2565  /* else, must have another character in "name" ... */
2566  if (*n == '\0')
2567  break;
2568  /* ... and it must match */
2569  if (pg_ascii_tolower((unsigned char) *p) !=
2570  pg_ascii_tolower((unsigned char) *n))
2571  break;
2572  }
2573  }
2574 
2575  return -1;
2576 }
2577 
2578 /*
2579  * Sequentially search an array of possibly non-English words for
2580  * a case-insensitive match to the initial character(s) of "name".
2581  *
2582  * This has the same API as seq_search_ascii(), but we use a more general
2583  * case-folding transformation to achieve case-insensitivity. Case folding
2584  * is done per the rules of the collation identified by "collid".
2585  *
2586  * The array is treated as const, but we don't declare it that way because
2587  * the arrays exported by pg_locale.c aren't const.
2588  */
2589 static int
2590 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2591 {
2592  char **a;
2593  char *upper_name;
2594  char *lower_name;
2595 
2596  *len = 0;
2597 
2598  /* empty string can't match anything */
2599  if (!*name)
2600  return -1;
2601 
2602  /*
2603  * The case-folding processing done below is fairly expensive, so before
2604  * doing that, make a quick pass to see if there is an exact match.
2605  */
2606  for (a = array; *a != NULL; a++)
2607  {
2608  int element_len = strlen(*a);
2609 
2610  if (strncmp(name, *a, element_len) == 0)
2611  {
2612  *len = element_len;
2613  return a - array;
2614  }
2615  }
2616 
2617  /*
2618  * Fold to upper case, then to lower case, so that we can match reliably
2619  * even in languages in which case conversions are not injective.
2620  */
2621  upper_name = str_toupper(name, strlen(name), collid);
2622  lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2623  pfree(upper_name);
2624 
2625  for (a = array; *a != NULL; a++)
2626  {
2627  char *upper_element;
2628  char *lower_element;
2629  int element_len;
2630 
2631  /* Likewise upper/lower-case array element */
2632  upper_element = str_toupper(*a, strlen(*a), collid);
2633  lower_element = str_tolower(upper_element, strlen(upper_element),
2634  collid);
2635  pfree(upper_element);
2636  element_len = strlen(lower_element);
2637 
2638  /* Match? */
2639  if (strncmp(lower_name, lower_element, element_len) == 0)
2640  {
2641  *len = element_len;
2642  pfree(lower_element);
2643  pfree(lower_name);
2644  return a - array;
2645  }
2646  pfree(lower_element);
2647  }
2648 
2649  pfree(lower_name);
2650  return -1;
2651 }
2652 
2653 /*
2654  * Perform a sequential search in 'array' (or 'localized_array', if that's
2655  * not NULL) for an entry matching the first character(s) of the 'src'
2656  * string case-insensitively.
2657  *
2658  * The 'array' is presumed to be English words (all-ASCII), but
2659  * if 'localized_array' is supplied, that might be non-English
2660  * so we need a more expensive case-folding transformation
2661  * (which will follow the rules of the collation 'collid').
2662  *
2663  * If a match is found, copy the array index of the match into the integer
2664  * pointed to by 'dest' and advance 'src' to the end of the part of the string
2665  * which matched.
2666  *
2667  * Returns true on match, false on failure (if escontext points to an
2668  * ErrorSaveContext; otherwise errors are thrown).
2669  *
2670  * 'node' is used only for error reports: node->key->name identifies the
2671  * field type we were searching for.
2672  */
2673 static bool
2674 from_char_seq_search(int *dest, const char **src, const char *const *array,
2675  char **localized_array, Oid collid,
2676  FormatNode *node, Node *escontext)
2677 {
2678  int len;
2679 
2680  if (localized_array == NULL)
2681  *dest = seq_search_ascii(*src, array, &len);
2682  else
2683  *dest = seq_search_localized(*src, localized_array, &len, collid);
2684 
2685  if (len <= 0)
2686  {
2687  /*
2688  * In the error report, truncate the string at the next whitespace (if
2689  * any) to avoid including irrelevant data.
2690  */
2691  char *copy = pstrdup(*src);
2692  char *c;
2693 
2694  for (c = copy; *c; c++)
2695  {
2696  if (scanner_isspace(*c))
2697  {
2698  *c = '\0';
2699  break;
2700  }
2701  }
2702 
2703  ereturn(escontext, false,
2704  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2705  errmsg("invalid value \"%s\" for \"%s\"",
2706  copy, node->key->name),
2707  errdetail("The given value did not match any of "
2708  "the allowed values for this field.")));
2709  }
2710  *src += len;
2711  return true;
2712 }
2713 
2714 /* ----------
2715  * Process a TmToChar struct as denoted by a list of FormatNodes.
2716  * The formatted data is written to the string pointed to by 'out'.
2717  * ----------
2718  */
2719 static void
2720 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2721 {
2722  FormatNode *n;
2723  char *s;
2724  struct fmt_tm *tm = &in->tm;
2725  int i;
2726 
2727  /* cache localized days and months */
2729 
2730  s = out;
2731  for (n = node; n->type != NODE_TYPE_END; n++)
2732  {
2733  if (n->type != NODE_TYPE_ACTION)
2734  {
2735  strcpy(s, n->character);
2736  s += strlen(s);
2737  continue;
2738  }
2739 
2740  switch (n->key->id)
2741  {
2742  case DCH_A_M:
2743  case DCH_P_M:
2744  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2745  ? P_M_STR : A_M_STR);
2746  s += strlen(s);
2747  break;
2748  case DCH_AM:
2749  case DCH_PM:
2750  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2751  ? PM_STR : AM_STR);
2752  s += strlen(s);
2753  break;
2754  case DCH_a_m:
2755  case DCH_p_m:
2756  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2757  ? p_m_STR : a_m_STR);
2758  s += strlen(s);
2759  break;
2760  case DCH_am:
2761  case DCH_pm:
2762  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2763  ? pm_STR : am_STR);
2764  s += strlen(s);
2765  break;
2766  case DCH_HH:
2767  case DCH_HH12:
2768 
2769  /*
2770  * display time as shown on a 12-hour clock, even for
2771  * intervals
2772  */
2773  sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2774  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ?
2775  (long long) (HOURS_PER_DAY / 2) :
2776  (long long) (tm->tm_hour % (HOURS_PER_DAY / 2)));
2777  if (S_THth(n->suffix))
2778  str_numth(s, s, S_TH_TYPE(n->suffix));
2779  s += strlen(s);
2780  break;
2781  case DCH_HH24:
2782  sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2783  (long long) tm->tm_hour);
2784  if (S_THth(n->suffix))
2785  str_numth(s, s, S_TH_TYPE(n->suffix));
2786  s += strlen(s);
2787  break;
2788  case DCH_MI:
2789  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2790  tm->tm_min);
2791  if (S_THth(n->suffix))
2792  str_numth(s, s, S_TH_TYPE(n->suffix));
2793  s += strlen(s);
2794  break;
2795  case DCH_SS:
2796  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2797  tm->tm_sec);
2798  if (S_THth(n->suffix))
2799  str_numth(s, s, S_TH_TYPE(n->suffix));
2800  s += strlen(s);
2801  break;
2802 
2803 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2804  sprintf(s, frac_fmt, (int) (frac_val)); \
2805  if (S_THth(n->suffix)) \
2806  str_numth(s, s, S_TH_TYPE(n->suffix)); \
2807  s += strlen(s)
2808 
2809  case DCH_FF1: /* tenth of second */
2810  DCH_to_char_fsec("%01d", in->fsec / 100000);
2811  break;
2812  case DCH_FF2: /* hundredth of second */
2813  DCH_to_char_fsec("%02d", in->fsec / 10000);
2814  break;
2815  case DCH_FF3:
2816  case DCH_MS: /* millisecond */
2817  DCH_to_char_fsec("%03d", in->fsec / 1000);
2818  break;
2819  case DCH_FF4: /* tenth of a millisecond */
2820  DCH_to_char_fsec("%04d", in->fsec / 100);
2821  break;
2822  case DCH_FF5: /* hundredth of a millisecond */
2823  DCH_to_char_fsec("%05d", in->fsec / 10);
2824  break;
2825  case DCH_FF6:
2826  case DCH_US: /* microsecond */
2827  DCH_to_char_fsec("%06d", in->fsec);
2828  break;
2829 #undef DCH_to_char_fsec
2830  case DCH_SSSS:
2831  sprintf(s, "%lld",
2832  (long long) (tm->tm_hour * SECS_PER_HOUR +
2834  tm->tm_sec));
2835  if (S_THth(n->suffix))
2836  str_numth(s, s, S_TH_TYPE(n->suffix));
2837  s += strlen(s);
2838  break;
2839  case DCH_tz:
2841  if (tmtcTzn(in))
2842  {
2843  /* We assume here that timezone names aren't localized */
2844  char *p = asc_tolower_z(tmtcTzn(in));
2845 
2846  strcpy(s, p);
2847  pfree(p);
2848  s += strlen(s);
2849  }
2850  break;
2851  case DCH_TZ:
2853  if (tmtcTzn(in))
2854  {
2855  strcpy(s, tmtcTzn(in));
2856  s += strlen(s);
2857  }
2858  break;
2859  case DCH_TZH:
2861  sprintf(s, "%c%02d",
2862  (tm->tm_gmtoff >= 0) ? '+' : '-',
2863  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2864  s += strlen(s);
2865  break;
2866  case DCH_TZM:
2868  sprintf(s, "%02d",
2869  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2870  s += strlen(s);
2871  break;
2872  case DCH_OF:
2874  sprintf(s, "%c%0*d",
2875  (tm->tm_gmtoff >= 0) ? '+' : '-',
2876  S_FM(n->suffix) ? 0 : 2,
2877  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2878  s += strlen(s);
2879  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2880  {
2881  sprintf(s, ":%02d",
2882  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2883  s += strlen(s);
2884  }
2885  break;
2886  case DCH_A_D:
2887  case DCH_B_C:
2889  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2890  s += strlen(s);
2891  break;
2892  case DCH_AD:
2893  case DCH_BC:
2895  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2896  s += strlen(s);
2897  break;
2898  case DCH_a_d:
2899  case DCH_b_c:
2901  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2902  s += strlen(s);
2903  break;
2904  case DCH_ad:
2905  case DCH_bc:
2907  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2908  s += strlen(s);
2909  break;
2910  case DCH_MONTH:
2912  if (!tm->tm_mon)
2913  break;
2914  if (S_TM(n->suffix))
2915  {
2917 
2918  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2919  strcpy(s, str);
2920  else
2921  ereport(ERROR,
2922  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2923  errmsg("localized string format value too long")));
2924  }
2925  else
2926  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2928  s += strlen(s);
2929  break;
2930  case DCH_Month:
2932  if (!tm->tm_mon)
2933  break;
2934  if (S_TM(n->suffix))
2935  {
2937 
2938  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2939  strcpy(s, str);
2940  else
2941  ereport(ERROR,
2942  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2943  errmsg("localized string format value too long")));
2944  }
2945  else
2946  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2947  months_full[tm->tm_mon - 1]);
2948  s += strlen(s);
2949  break;
2950  case DCH_month:
2952  if (!tm->tm_mon)
2953  break;
2954  if (S_TM(n->suffix))
2955  {
2957 
2958  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2959  strcpy(s, str);
2960  else
2961  ereport(ERROR,
2962  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2963  errmsg("localized string format value too long")));
2964  }
2965  else
2966  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2968  s += strlen(s);
2969  break;
2970  case DCH_MON:
2972  if (!tm->tm_mon)
2973  break;
2974  if (S_TM(n->suffix))
2975  {
2977 
2978  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2979  strcpy(s, str);
2980  else
2981  ereport(ERROR,
2982  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2983  errmsg("localized string format value too long")));
2984  }
2985  else
2986  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2987  s += strlen(s);
2988  break;
2989  case DCH_Mon:
2991  if (!tm->tm_mon)
2992  break;
2993  if (S_TM(n->suffix))
2994  {
2996 
2997  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2998  strcpy(s, str);
2999  else
3000  ereport(ERROR,
3001  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3002  errmsg("localized string format value too long")));
3003  }
3004  else
3005  strcpy(s, months[tm->tm_mon - 1]);
3006  s += strlen(s);
3007  break;
3008  case DCH_mon:
3010  if (!tm->tm_mon)
3011  break;
3012  if (S_TM(n->suffix))
3013  {
3015 
3016  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3017  strcpy(s, str);
3018  else
3019  ereport(ERROR,
3020  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3021  errmsg("localized string format value too long")));
3022  }
3023  else
3024  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
3025  s += strlen(s);
3026  break;
3027  case DCH_MM:
3028  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
3029  tm->tm_mon);
3030  if (S_THth(n->suffix))
3031  str_numth(s, s, S_TH_TYPE(n->suffix));
3032  s += strlen(s);
3033  break;
3034  case DCH_DAY:
3036  if (S_TM(n->suffix))
3037  {
3039 
3040  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3041  strcpy(s, str);
3042  else
3043  ereport(ERROR,
3044  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3045  errmsg("localized string format value too long")));
3046  }
3047  else
3048  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3050  s += strlen(s);
3051  break;
3052  case DCH_Day:
3054  if (S_TM(n->suffix))
3055  {
3057 
3058  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3059  strcpy(s, str);
3060  else
3061  ereport(ERROR,
3062  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3063  errmsg("localized string format value too long")));
3064  }
3065  else
3066  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3067  days[tm->tm_wday]);
3068  s += strlen(s);
3069  break;
3070  case DCH_day:
3072  if (S_TM(n->suffix))
3073  {
3075 
3076  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3077  strcpy(s, str);
3078  else
3079  ereport(ERROR,
3080  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3081  errmsg("localized string format value too long")));
3082  }
3083  else
3084  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3086  s += strlen(s);
3087  break;
3088  case DCH_DY:
3090  if (S_TM(n->suffix))
3091  {
3093 
3094  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3095  strcpy(s, str);
3096  else
3097  ereport(ERROR,
3098  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3099  errmsg("localized string format value too long")));
3100  }
3101  else
3102  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3103  s += strlen(s);
3104  break;
3105  case DCH_Dy:
3107  if (S_TM(n->suffix))
3108  {
3110 
3111  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3112  strcpy(s, str);
3113  else
3114  ereport(ERROR,
3115  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3116  errmsg("localized string format value too long")));
3117  }
3118  else
3119  strcpy(s, days_short[tm->tm_wday]);
3120  s += strlen(s);
3121  break;
3122  case DCH_dy:
3124  if (S_TM(n->suffix))
3125  {
3127 
3128  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3129  strcpy(s, str);
3130  else
3131  ereport(ERROR,
3132  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3133  errmsg("localized string format value too long")));
3134  }
3135  else
3136  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3137  s += strlen(s);
3138  break;
3139  case DCH_DDD:
3140  case DCH_IDDD:
3141  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3142  (n->key->id == DCH_DDD) ?
3143  tm->tm_yday :
3145  if (S_THth(n->suffix))
3146  str_numth(s, s, S_TH_TYPE(n->suffix));
3147  s += strlen(s);
3148  break;
3149  case DCH_DD:
3150  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3151  if (S_THth(n->suffix))
3152  str_numth(s, s, S_TH_TYPE(n->suffix));
3153  s += strlen(s);
3154  break;
3155  case DCH_D:
3157  sprintf(s, "%d", tm->tm_wday + 1);
3158  if (S_THth(n->suffix))
3159  str_numth(s, s, S_TH_TYPE(n->suffix));
3160  s += strlen(s);
3161  break;
3162  case DCH_ID:
3164  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3165  if (S_THth(n->suffix))
3166  str_numth(s, s, S_TH_TYPE(n->suffix));
3167  s += strlen(s);
3168  break;
3169  case DCH_WW:
3170  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3171  (tm->tm_yday - 1) / 7 + 1);
3172  if (S_THth(n->suffix))
3173  str_numth(s, s, S_TH_TYPE(n->suffix));
3174  s += strlen(s);
3175  break;
3176  case DCH_IW:
3177  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3179  if (S_THth(n->suffix))
3180  str_numth(s, s, S_TH_TYPE(n->suffix));
3181  s += strlen(s);
3182  break;
3183  case DCH_Q:
3184  if (!tm->tm_mon)
3185  break;
3186  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3187  if (S_THth(n->suffix))
3188  str_numth(s, s, S_TH_TYPE(n->suffix));
3189  s += strlen(s);
3190  break;
3191  case DCH_CC:
3192  if (is_interval) /* straight calculation */
3193  i = tm->tm_year / 100;
3194  else
3195  {
3196  if (tm->tm_year > 0)
3197  /* Century 20 == 1901 - 2000 */
3198  i = (tm->tm_year - 1) / 100 + 1;
3199  else
3200  /* Century 6BC == 600BC - 501BC */
3201  i = tm->tm_year / 100 - 1;
3202  }
3203  if (i <= 99 && i >= -99)
3204  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3205  else
3206  sprintf(s, "%d", i);
3207  if (S_THth(n->suffix))
3208  str_numth(s, s, S_TH_TYPE(n->suffix));
3209  s += strlen(s);
3210  break;
3211  case DCH_Y_YYY:
3212  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3213  sprintf(s, "%d,%03d", i,
3214  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3215  if (S_THth(n->suffix))
3216  str_numth(s, s, S_TH_TYPE(n->suffix));
3217  s += strlen(s);
3218  break;
3219  case DCH_YYYY:
3220  case DCH_IYYY:
3221  sprintf(s, "%0*d",
3222  S_FM(n->suffix) ? 0 :
3223  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3224  (n->key->id == DCH_YYYY ?
3225  ADJUST_YEAR(tm->tm_year, is_interval) :
3227  tm->tm_mon,
3228  tm->tm_mday),
3229  is_interval)));
3230  if (S_THth(n->suffix))
3231  str_numth(s, s, S_TH_TYPE(n->suffix));
3232  s += strlen(s);
3233  break;
3234  case DCH_YYY:
3235  case DCH_IYY:
3236  sprintf(s, "%0*d",
3237  S_FM(n->suffix) ? 0 :
3238  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3239  (n->key->id == DCH_YYY ?
3240  ADJUST_YEAR(tm->tm_year, is_interval) :
3242  tm->tm_mon,
3243  tm->tm_mday),
3244  is_interval)) % 1000);
3245  if (S_THth(n->suffix))
3246  str_numth(s, s, S_TH_TYPE(n->suffix));
3247  s += strlen(s);
3248  break;
3249  case DCH_YY:
3250  case DCH_IY:
3251  sprintf(s, "%0*d",
3252  S_FM(n->suffix) ? 0 :
3253  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3254  (n->key->id == DCH_YY ?
3255  ADJUST_YEAR(tm->tm_year, is_interval) :
3257  tm->tm_mon,
3258  tm->tm_mday),
3259  is_interval)) % 100);
3260  if (S_THth(n->suffix))
3261  str_numth(s, s, S_TH_TYPE(n->suffix));
3262  s += strlen(s);
3263  break;
3264  case DCH_Y:
3265  case DCH_I:
3266  sprintf(s, "%1d",
3267  (n->key->id == DCH_Y ?
3268  ADJUST_YEAR(tm->tm_year, is_interval) :
3270  tm->tm_mon,
3271  tm->tm_mday),
3272  is_interval)) % 10);
3273  if (S_THth(n->suffix))
3274  str_numth(s, s, S_TH_TYPE(n->suffix));
3275  s += strlen(s);
3276  break;
3277  case DCH_RM:
3278  /* FALLTHROUGH */
3279  case DCH_rm:
3280 
3281  /*
3282  * For intervals, values like '12 month' will be reduced to 0
3283  * month and some years. These should be processed.
3284  */
3285  if (!tm->tm_mon && !tm->tm_year)
3286  break;
3287  else
3288  {
3289  int mon = 0;
3290  const char *const *months;
3291 
3292  if (n->key->id == DCH_RM)
3294  else
3296 
3297  /*
3298  * Compute the position in the roman-numeral array. Note
3299  * that the contents of the array are reversed, December
3300  * being first and January last.
3301  */
3302  if (tm->tm_mon == 0)
3303  {
3304  /*
3305  * This case is special, and tracks the case of full
3306  * interval years.
3307  */
3308  mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3309  }
3310  else if (tm->tm_mon < 0)
3311  {
3312  /*
3313  * Negative case. In this case, the calculation is
3314  * reversed, where -1 means December, -2 November,
3315  * etc.
3316  */
3317  mon = -1 * (tm->tm_mon + 1);
3318  }
3319  else
3320  {
3321  /*
3322  * Common case, with a strictly positive value. The
3323  * position in the array matches with the value of
3324  * tm_mon.
3325  */
3326  mon = MONTHS_PER_YEAR - tm->tm_mon;
3327  }
3328 
3329  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3330  months[mon]);
3331  s += strlen(s);
3332  }
3333  break;
3334  case DCH_W:
3335  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3336  if (S_THth(n->suffix))
3337  str_numth(s, s, S_TH_TYPE(n->suffix));
3338  s += strlen(s);
3339  break;
3340  case DCH_J:
3341  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3342  if (S_THth(n->suffix))
3343  str_numth(s, s, S_TH_TYPE(n->suffix));
3344  s += strlen(s);
3345  break;
3346  }
3347  }
3348 
3349  *s = '\0';
3350 }
3351 
3352 /*
3353  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3354  * The TmFromChar struct pointed to by 'out' is populated with the results.
3355  *
3356  * 'collid' identifies the collation to use, if needed.
3357  * 'std' specifies standard parsing mode.
3358  *
3359  * If escontext points to an ErrorSaveContext, data errors will be reported
3360  * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
3361  * whether an error occurred. Otherwise, errors are thrown.
3362  *
3363  * Note: we currently don't have any to_interval() function, so there
3364  * is no need here for INVALID_FOR_INTERVAL checks.
3365  */
3366 static void
3367 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3368  Oid collid, bool std, Node *escontext)
3369 {
3370  FormatNode *n;
3371  const char *s;
3372  int len,
3373  value;
3374  bool fx_mode = std;
3375 
3376  /* number of extra skipped characters (more than given in format string) */
3377  int extra_skip = 0;
3378 
3379  /* cache localized days and months */
3381 
3382  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3383  {
3384  /*
3385  * Ignore spaces at the beginning of the string and before fields when
3386  * not in FX (fixed width) mode.
3387  */
3388  if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3389  (n->type == NODE_TYPE_ACTION || n == node))
3390  {
3391  while (*s != '\0' && isspace((unsigned char) *s))
3392  {
3393  s++;
3394  extra_skip++;
3395  }
3396  }
3397 
3398  if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3399  {
3400  if (std)
3401  {
3402  /*
3403  * Standard mode requires strict matching between format
3404  * string separators/spaces and input string.
3405  */
3406  Assert(n->character[0] && !n->character[1]);
3407 
3408  if (*s == n->character[0])
3409  s++;
3410  else
3411  ereturn(escontext,,
3412  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3413  errmsg("unmatched format separator \"%c\"",
3414  n->character[0])));
3415  }
3416  else if (!fx_mode)
3417  {
3418  /*
3419  * In non FX (fixed format) mode one format string space or
3420  * separator match to one space or separator in input string.
3421  * Or match nothing if there is no space or separator in the
3422  * current position of input string.
3423  */
3424  extra_skip--;
3425  if (isspace((unsigned char) *s) || is_separator_char(s))
3426  {
3427  s++;
3428  extra_skip++;
3429  }
3430  }
3431  else
3432  {
3433  /*
3434  * In FX mode, on format string space or separator we consume
3435  * exactly one character from input string. Notice we don't
3436  * insist that the consumed character match the format's
3437  * character.
3438  */
3439  s += pg_mblen(s);
3440  }
3441  continue;
3442  }
3443  else if (n->type != NODE_TYPE_ACTION)
3444  {
3445  /*
3446  * Text character, so consume one character from input string.
3447  * Notice we don't insist that the consumed character match the
3448  * format's character.
3449  */
3450  if (!fx_mode)
3451  {
3452  /*
3453  * In non FX mode we might have skipped some extra characters
3454  * (more than specified in format string) before. In this
3455  * case we don't skip input string character, because it might
3456  * be part of field.
3457  */
3458  if (extra_skip > 0)
3459  extra_skip--;
3460  else
3461  s += pg_mblen(s);
3462  }
3463  else
3464  {
3465  int chlen = pg_mblen(s);
3466 
3467  /*
3468  * Standard mode requires strict match of format characters.
3469  */
3470  if (std && n->type == NODE_TYPE_CHAR &&
3471  strncmp(s, n->character, chlen) != 0)
3472  ereturn(escontext,,
3473  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3474  errmsg("unmatched format character \"%s\"",
3475  n->character)));
3476 
3477  s += chlen;
3478  }
3479  continue;
3480  }
3481 
3482  if (!from_char_set_mode(out, n->key->date_mode, escontext))
3483  return;
3484 
3485  switch (n->key->id)
3486  {
3487  case DCH_FX:
3488  fx_mode = true;
3489  break;
3490  case DCH_A_M:
3491  case DCH_P_M:
3492  case DCH_a_m:
3493  case DCH_p_m:
3495  NULL, InvalidOid,
3496  n, escontext))
3497  return;
3498  if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3499  return;
3500  out->clock = CLOCK_12_HOUR;
3501  break;
3502  case DCH_AM:
3503  case DCH_PM:
3504  case DCH_am:
3505  case DCH_pm:
3507  NULL, InvalidOid,
3508  n, escontext))
3509  return;
3510  if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3511  return;
3512  out->clock = CLOCK_12_HOUR;
3513  break;
3514  case DCH_HH:
3515  case DCH_HH12:
3516  if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3517  return;
3518  out->clock = CLOCK_12_HOUR;
3519  SKIP_THth(s, n->suffix);
3520  break;
3521  case DCH_HH24:
3522  if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3523  return;
3524  SKIP_THth(s, n->suffix);
3525  break;
3526  case DCH_MI:
3527  if (from_char_parse_int(&out->mi, &s, n, escontext) < 0)
3528  return;
3529  SKIP_THth(s, n->suffix);
3530  break;
3531  case DCH_SS:
3532  if (from_char_parse_int(&out->ss, &s, n, escontext) < 0)
3533  return;
3534  SKIP_THth(s, n->suffix);
3535  break;
3536  case DCH_MS: /* millisecond */
3537  len = from_char_parse_int_len(&out->ms, &s, 3, n, escontext);
3538  if (len < 0)
3539  return;
3540 
3541  /*
3542  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3543  */
3544  out->ms *= len == 1 ? 100 :
3545  len == 2 ? 10 : 1;
3546 
3547  SKIP_THth(s, n->suffix);
3548  break;
3549  case DCH_FF1:
3550  case DCH_FF2:
3551  case DCH_FF3:
3552  case DCH_FF4:
3553  case DCH_FF5:
3554  case DCH_FF6:
3555  out->ff = n->key->id - DCH_FF1 + 1;
3556  /* FALLTHROUGH */
3557  case DCH_US: /* microsecond */
3558  len = from_char_parse_int_len(&out->us, &s,
3559  n->key->id == DCH_US ? 6 :
3560  out->ff, n, escontext);
3561  if (len < 0)
3562  return;
3563 
3564  out->us *= len == 1 ? 100000 :
3565  len == 2 ? 10000 :
3566  len == 3 ? 1000 :
3567  len == 4 ? 100 :
3568  len == 5 ? 10 : 1;
3569 
3570  SKIP_THth(s, n->suffix);
3571  break;
3572  case DCH_SSSS:
3573  if (from_char_parse_int(&out->ssss, &s, n, escontext) < 0)
3574  return;
3575  SKIP_THth(s, n->suffix);
3576  break;
3577  case DCH_tz:
3578  case DCH_TZ:
3579  {
3580  int tzlen;
3581 
3582  tzlen = DecodeTimezoneAbbrevPrefix(s,
3583  &out->gmtoffset,
3584  &out->tzp);
3585  if (tzlen > 0)
3586  {
3587  out->has_tz = true;
3588  /* we only need the zone abbrev for DYNTZ case */
3589  if (out->tzp)
3590  out->abbrev = pnstrdup(s, tzlen);
3591  out->tzsign = 0; /* drop any earlier TZH/TZM info */
3592  s += tzlen;
3593  break;
3594  }
3595  else if (isalpha((unsigned char) *s))
3596  {
3597  /*
3598  * It doesn't match any abbreviation, but it starts
3599  * with a letter. OF format certainly won't succeed;
3600  * assume it's a misspelled abbreviation and complain
3601  * accordingly.
3602  */
3603  ereturn(escontext,,
3604  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3605  errmsg("invalid value \"%s\" for \"%s\"",
3606  s, n->key->name),
3607  errdetail("Time zone abbreviation is not recognized.")));
3608  }
3609  /* otherwise parse it like OF */
3610  }
3611  /* FALLTHROUGH */
3612  case DCH_OF:
3613  /* OF is equivalent to TZH or TZH:TZM */
3614  /* see TZH comments below */
3615  if (*s == '+' || *s == '-' || *s == ' ')
3616  {
3617  out->tzsign = *s == '-' ? -1 : +1;
3618  s++;
3619  }
3620  else
3621  {
3622  if (extra_skip > 0 && *(s - 1) == '-')
3623  out->tzsign = -1;
3624  else
3625  out->tzsign = +1;
3626  }
3627  if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3628  return;
3629  if (*s == ':')
3630  {
3631  s++;
3632  if (from_char_parse_int_len(&out->tzm, &s, 2, n,
3633  escontext) < 0)
3634  return;
3635  }
3636  break;
3637  case DCH_TZH:
3638 
3639  /*
3640  * Value of TZH might be negative. And the issue is that we
3641  * might swallow minus sign as the separator. So, if we have
3642  * skipped more characters than specified in the format
3643  * string, then we consider prepending last skipped minus to
3644  * TZH.
3645  */
3646  if (*s == '+' || *s == '-' || *s == ' ')
3647  {
3648  out->tzsign = *s == '-' ? -1 : +1;
3649  s++;
3650  }
3651  else
3652  {
3653  if (extra_skip > 0 && *(s - 1) == '-')
3654  out->tzsign = -1;
3655  else
3656  out->tzsign = +1;
3657  }
3658 
3659  if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3660  return;
3661  break;
3662  case DCH_TZM:
3663  /* assign positive timezone sign if TZH was not seen before */
3664  if (!out->tzsign)
3665  out->tzsign = +1;
3666  if (from_char_parse_int_len(&out->tzm, &s, 2, n, escontext) < 0)
3667  return;
3668  break;
3669  case DCH_A_D:
3670  case DCH_B_C:
3671  case DCH_a_d:
3672  case DCH_b_c:
3674  NULL, InvalidOid,
3675  n, escontext))
3676  return;
3677  if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3678  return;
3679  break;
3680  case DCH_AD:
3681  case DCH_BC:
3682  case DCH_ad:
3683  case DCH_bc:
3685  NULL, InvalidOid,
3686  n, escontext))
3687  return;
3688  if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3689  return;
3690  break;
3691  case DCH_MONTH:
3692  case DCH_Month:
3693  case DCH_month:
3695  S_TM(n->suffix) ? localized_full_months : NULL,
3696  collid,
3697  n, escontext))
3698  return;
3699  if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3700  return;
3701  break;
3702  case DCH_MON:
3703  case DCH_Mon:
3704  case DCH_mon:
3705  if (!from_char_seq_search(&value, &s, months,
3706  S_TM(n->suffix) ? localized_abbrev_months : NULL,
3707  collid,
3708  n, escontext))
3709  return;
3710  if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3711  return;
3712  break;
3713  case DCH_MM:
3714  if (from_char_parse_int(&out->mm, &s, n, escontext) < 0)
3715  return;
3716  SKIP_THth(s, n->suffix);
3717  break;
3718  case DCH_DAY:
3719  case DCH_Day:
3720  case DCH_day:
3721  if (!from_char_seq_search(&value, &s, days,
3722  S_TM(n->suffix) ? localized_full_days : NULL,
3723  collid,
3724  n, escontext))
3725  return;
3726  if (!from_char_set_int(&out->d, value, n, escontext))
3727  return;
3728  out->d++;
3729  break;
3730  case DCH_DY:
3731  case DCH_Dy:
3732  case DCH_dy:
3734  S_TM(n->suffix) ? localized_abbrev_days : NULL,
3735  collid,
3736  n, escontext))
3737  return;
3738  if (!from_char_set_int(&out->d, value, n, escontext))
3739  return;
3740  out->d++;
3741  break;
3742  case DCH_DDD:
3743  if (from_char_parse_int(&out->ddd, &s, n, escontext) < 0)
3744  return;
3745  SKIP_THth(s, n->suffix);
3746  break;
3747  case DCH_IDDD:
3748  if (from_char_parse_int_len(&out->ddd, &s, 3, n, escontext) < 0)
3749  return;
3750  SKIP_THth(s, n->suffix);
3751  break;
3752  case DCH_DD:
3753  if (from_char_parse_int(&out->dd, &s, n, escontext) < 0)
3754  return;
3755  SKIP_THth(s, n->suffix);
3756  break;
3757  case DCH_D:
3758  if (from_char_parse_int(&out->d, &s, n, escontext) < 0)
3759  return;
3760  SKIP_THth(s, n->suffix);
3761  break;
3762  case DCH_ID:
3763  if (from_char_parse_int_len(&out->d, &s, 1, n, escontext) < 0)
3764  return;
3765  /* Shift numbering to match Gregorian where Sunday = 1 */
3766  if (++out->d > 7)
3767  out->d = 1;
3768  SKIP_THth(s, n->suffix);
3769  break;
3770  case DCH_WW:
3771  case DCH_IW:
3772  if (from_char_parse_int(&out->ww, &s, n, escontext) < 0)
3773  return;
3774  SKIP_THth(s, n->suffix);
3775  break;
3776  case DCH_Q:
3777 
3778  /*
3779  * We ignore 'Q' when converting to date because it is unclear
3780  * which date in the quarter to use, and some people specify
3781  * both quarter and month, so if it was honored it might
3782  * conflict with the supplied month. That is also why we don't
3783  * throw an error.
3784  *
3785  * We still parse the source string for an integer, but it
3786  * isn't stored anywhere in 'out'.
3787  */
3788  if (from_char_parse_int((int *) NULL, &s, n, escontext) < 0)
3789  return;
3790  SKIP_THth(s, n->suffix);
3791  break;
3792  case DCH_CC:
3793  if (from_char_parse_int(&out->cc, &s, n, escontext) < 0)
3794  return;
3795  SKIP_THth(s, n->suffix);
3796  break;
3797  case DCH_Y_YYY:
3798  {
3799  int matched,
3800  years,
3801  millennia,
3802  nch;
3803 
3804  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3805  if (matched < 2)
3806  ereturn(escontext,,
3807  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3808  errmsg("invalid input string for \"Y,YYY\"")));
3809  years += (millennia * 1000);
3810  if (!from_char_set_int(&out->year, years, n, escontext))
3811  return;
3812  out->yysz = 4;
3813  s += nch;
3814  SKIP_THth(s, n->suffix);
3815  }
3816  break;
3817  case DCH_YYYY:
3818  case DCH_IYYY:
3819  if (from_char_parse_int(&out->year, &s, n, escontext) < 0)
3820  return;
3821  out->yysz = 4;
3822  SKIP_THth(s, n->suffix);
3823  break;
3824  case DCH_YYY:
3825  case DCH_IYY:
3826  len = from_char_parse_int(&out->year, &s, n, escontext);
3827  if (len < 0)
3828  return;
3829  if (len < 4)
3830  out->year = adjust_partial_year_to_2020(out->year);
3831  out->yysz = 3;
3832  SKIP_THth(s, n->suffix);
3833  break;
3834  case DCH_YY:
3835  case DCH_IY:
3836  len = from_char_parse_int(&out->year, &s, n, escontext);
3837  if (len < 0)
3838  return;
3839  if (len < 4)
3840  out->year = adjust_partial_year_to_2020(out->year);
3841  out->yysz = 2;
3842  SKIP_THth(s, n->suffix);
3843  break;
3844  case DCH_Y:
3845  case DCH_I:
3846  len = from_char_parse_int(&out->year, &s, n, escontext);
3847  if (len < 0)
3848  return;
3849  if (len < 4)
3850  out->year = adjust_partial_year_to_2020(out->year);
3851  out->yysz = 1;
3852  SKIP_THth(s, n->suffix);
3853  break;
3854  case DCH_RM:
3855  case DCH_rm:
3857  NULL, InvalidOid,
3858  n, escontext))
3859  return;
3860  if (!from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n,
3861  escontext))
3862  return;
3863  break;
3864  case DCH_W:
3865  if (from_char_parse_int(&out->w, &s, n, escontext) < 0)
3866  return;
3867  SKIP_THth(s, n->suffix);
3868  break;
3869  case DCH_J:
3870  if (from_char_parse_int(&out->j, &s, n, escontext) < 0)
3871  return;
3872  SKIP_THth(s, n->suffix);
3873  break;
3874  }
3875 
3876  /* Ignore all spaces after fields */
3877  if (!fx_mode)
3878  {
3879  extra_skip = 0;
3880  while (*s != '\0' && isspace((unsigned char) *s))
3881  {
3882  s++;
3883  extra_skip++;
3884  }
3885  }
3886  }
3887 
3888  /*
3889  * Standard parsing mode doesn't allow unmatched format patterns or
3890  * trailing characters in the input string.
3891  */
3892  if (std)
3893  {
3894  if (n->type != NODE_TYPE_END)
3895  ereturn(escontext,,
3896  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3897  errmsg("input string is too short for datetime format")));
3898 
3899  while (*s != '\0' && isspace((unsigned char) *s))
3900  s++;
3901 
3902  if (*s != '\0')
3903  ereturn(escontext,,
3904  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3905  errmsg("trailing characters remain in input string after datetime format")));
3906  }
3907 }
3908 
3909 /*
3910  * The invariant for DCH cache entry management is that DCHCounter is equal
3911  * to the maximum age value among the existing entries, and we increment it
3912  * whenever an access occurs. If we approach overflow, deal with that by
3913  * halving all the age values, so that we retain a fairly accurate idea of
3914  * which entries are oldest.
3915  */
3916 static inline void
3918 {
3919  if (DCHCounter >= (INT_MAX - 1))
3920  {
3921  for (int i = 0; i < n_DCHCache; i++)
3922  DCHCache[i]->age >>= 1;
3923  DCHCounter >>= 1;
3924  }
3925 }
3926 
3927 /*
3928  * Get mask of date/time/zone components present in format nodes.
3929  */
3930 static int
3932 {
3933  FormatNode *n;
3934  int flags = 0;
3935 
3936  for (n = node; n->type != NODE_TYPE_END; n++)
3937  {
3938  if (n->type != NODE_TYPE_ACTION)
3939  continue;
3940 
3941  switch (n->key->id)
3942  {
3943  case DCH_FX:
3944  break;
3945  case DCH_A_M:
3946  case DCH_P_M:
3947  case DCH_a_m:
3948  case DCH_p_m:
3949  case DCH_AM:
3950  case DCH_PM:
3951  case DCH_am:
3952  case DCH_pm:
3953  case DCH_HH:
3954  case DCH_HH12:
3955  case DCH_HH24:
3956  case DCH_MI:
3957  case DCH_SS:
3958  case DCH_MS: /* millisecond */
3959  case DCH_US: /* microsecond */
3960  case DCH_FF1:
3961  case DCH_FF2:
3962  case DCH_FF3:
3963  case DCH_FF4:
3964  case DCH_FF5:
3965  case DCH_FF6:
3966  case DCH_SSSS:
3967  flags |= DCH_TIMED;
3968  break;
3969  case DCH_tz:
3970  case DCH_TZ:
3971  case DCH_OF:
3972  case DCH_TZH:
3973  case DCH_TZM:
3974  flags |= DCH_ZONED;
3975  break;
3976  case DCH_A_D:
3977  case DCH_B_C:
3978  case DCH_a_d:
3979  case DCH_b_c:
3980  case DCH_AD:
3981  case DCH_BC:
3982  case DCH_ad:
3983  case DCH_bc:
3984  case DCH_MONTH:
3985  case DCH_Month:
3986  case DCH_month:
3987  case DCH_MON:
3988  case DCH_Mon:
3989  case DCH_mon:
3990  case DCH_MM:
3991  case DCH_DAY:
3992  case DCH_Day:
3993  case DCH_day:
3994  case DCH_DY:
3995  case DCH_Dy:
3996  case DCH_dy:
3997  case DCH_DDD:
3998  case DCH_IDDD:
3999  case DCH_DD:
4000  case DCH_D:
4001  case DCH_ID:
4002  case DCH_WW:
4003  case DCH_Q:
4004  case DCH_CC:
4005  case DCH_Y_YYY:
4006  case DCH_YYYY:
4007  case DCH_IYYY:
4008  case DCH_YYY:
4009  case DCH_IYY:
4010  case DCH_YY:
4011  case DCH_IY:
4012  case DCH_Y:
4013  case DCH_I:
4014  case DCH_RM:
4015  case DCH_rm:
4016  case DCH_W:
4017  case DCH_J:
4018  flags |= DCH_DATED;
4019  break;
4020  }
4021  }
4022 
4023  return flags;
4024 }
4025 
4026 /* select a DCHCacheEntry to hold the given format picture */
4027 static DCHCacheEntry *
4028 DCH_cache_getnew(const char *str, bool std)
4029 {
4030  DCHCacheEntry *ent;
4031 
4032  /* Ensure we can advance DCHCounter below */
4034 
4035  /*
4036  * If cache is full, remove oldest entry (or recycle first not-valid one)
4037  */
4039  {
4040  DCHCacheEntry *old = DCHCache[0];
4041 
4042 #ifdef DEBUG_TO_FROM_CHAR
4043  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
4044 #endif
4045  if (old->valid)
4046  {
4047  for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
4048  {
4049  ent = DCHCache[i];
4050  if (!ent->valid)
4051  {
4052  old = ent;
4053  break;
4054  }
4055  if (ent->age < old->age)
4056  old = ent;
4057  }
4058  }
4059 #ifdef DEBUG_TO_FROM_CHAR
4060  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
4061 #endif
4062  old->valid = false;
4063  strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
4064  old->age = (++DCHCounter);
4065  /* caller is expected to fill format, then set valid */
4066  return old;
4067  }
4068  else
4069  {
4070 #ifdef DEBUG_TO_FROM_CHAR
4071  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
4072 #endif
4073  Assert(DCHCache[n_DCHCache] == NULL);
4074  DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
4076  ent->valid = false;
4077  strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
4078  ent->std = std;
4079  ent->age = (++DCHCounter);
4080  /* caller is expected to fill format, then set valid */
4081  ++n_DCHCache;
4082  return ent;
4083  }
4084 }
4085 
4086 /* look for an existing DCHCacheEntry matching the given format picture */
4087 static DCHCacheEntry *
4088 DCH_cache_search(const char *str, bool std)
4089 {
4090  /* Ensure we can advance DCHCounter below */
4092 
4093  for (int i = 0; i < n_DCHCache; i++)
4094  {
4095  DCHCacheEntry *ent = DCHCache[i];
4096 
4097  if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
4098  {
4099  ent->age = (++DCHCounter);
4100  return ent;
4101  }
4102  }
4103 
4104  return NULL;
4105 }
4106 
4107 /* Find or create a DCHCacheEntry for the given format picture */
4108 static DCHCacheEntry *
4109 DCH_cache_fetch(const char *str, bool std)
4110 {
4111  DCHCacheEntry *ent;
4112 
4113  if ((ent = DCH_cache_search(str, std)) == NULL)
4114  {
4115  /*
4116  * Not in the cache, must run parser and save a new format-picture to
4117  * the cache. Do not mark the cache entry valid until parsing
4118  * succeeds.
4119  */
4120  ent = DCH_cache_getnew(str, std);
4121 
4123  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4124 
4125  ent->valid = true;
4126  }
4127  return ent;
4128 }
4129 
4130 /*
4131  * Format a date/time or interval into a string according to fmt.
4132  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
4133  * for formatting.
4134  */
4135 static text *
4136 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4137 {
4138  FormatNode *format;
4139  char *fmt_str,
4140  *result;
4141  bool incache;
4142  int fmt_len;
4143  text *res;
4144 
4145  /*
4146  * Convert fmt to C string
4147  */
4148  fmt_str = text_to_cstring(fmt);
4149  fmt_len = strlen(fmt_str);
4150 
4151  /*
4152  * Allocate workspace for result as C string
4153  */
4154  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4155  *result = '\0';
4156 
4157  if (fmt_len > DCH_CACHE_SIZE)
4158  {
4159  /*
4160  * Allocate new memory if format picture is bigger than static cache
4161  * and do not use cache (call parser always)
4162  */
4163  incache = false;
4164 
4165  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4166 
4167  parse_format(format, fmt_str, DCH_keywords,
4168  DCH_suff, DCH_index, DCH_FLAG, NULL);
4169  }
4170  else
4171  {
4172  /*
4173  * Use cache buffers
4174  */
4175  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4176 
4177  incache = true;
4178  format = ent->format;
4179  }
4180 
4181  /* The real work is here */
4182  DCH_to_char(format, is_interval, tmtc, result, collid);
4183 
4184  if (!incache)
4185  pfree(format);
4186 
4187  pfree(fmt_str);
4188 
4189  /* convert C-string result to TEXT format */
4190  res = cstring_to_text(result);
4191 
4192  pfree(result);
4193  return res;
4194 }
4195 
4196 /****************************************************************************
4197  * Public routines
4198  ***************************************************************************/
4199 
4200 /* -------------------
4201  * TIMESTAMP to_char()
4202  * -------------------
4203  */
4204 Datum
4206 {
4208  text *fmt = PG_GETARG_TEXT_PP(1),
4209  *res;
4210  TmToChar tmtc;
4211  struct pg_tm tt;
4212  struct fmt_tm *tm;
4213  int thisdate;
4214 
4215  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4216  PG_RETURN_NULL();
4217 
4218  ZERO_tmtc(&tmtc);
4219  tm = tmtcTm(&tmtc);
4220 
4221  if (timestamp2tm(dt, NULL, &tt, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4222  ereport(ERROR,
4223  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4224  errmsg("timestamp out of range")));
4225 
4226  /* calculate wday and yday, because timestamp2tm doesn't */
4227  thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4228  tt.tm_wday = (thisdate + 1) % 7;
4229  tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4230 
4231  COPY_tm(tm, &tt);
4232 
4233  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4234  PG_RETURN_NULL();
4235 
4237 }
4238 
4239 Datum
4241 {
4243  text *fmt = PG_GETARG_TEXT_PP(1),
4244  *res;
4245  TmToChar tmtc;
4246  int tz;
4247  struct pg_tm tt;
4248  struct fmt_tm *tm;
4249  int thisdate;
4250 
4251  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4252  PG_RETURN_NULL();
4253 
4254  ZERO_tmtc(&tmtc);
4255  tm = tmtcTm(&tmtc);
4256 
4257  if (timestamp2tm(dt, &tz, &tt, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4258  ereport(ERROR,
4259  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4260  errmsg("timestamp out of range")));
4261 
4262  /* calculate wday and yday, because timestamp2tm doesn't */
4263  thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4264  tt.tm_wday = (thisdate + 1) % 7;
4265  tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4266 
4267  COPY_tm(tm, &tt);
4268 
4269  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4270  PG_RETURN_NULL();
4271 
4273 }
4274 
4275 
4276 /* -------------------
4277  * INTERVAL to_char()
4278  * -------------------
4279  */
4280 Datum
4282 {
4283  Interval *it = PG_GETARG_INTERVAL_P(0);
4284  text *fmt = PG_GETARG_TEXT_PP(1),
4285  *res;
4286  TmToChar tmtc;
4287  struct fmt_tm *tm;
4288  struct pg_itm tt,
4289  *itm = &tt;
4290 
4291  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || INTERVAL_NOT_FINITE(it))
4292  PG_RETURN_NULL();
4293 
4294  ZERO_tmtc(&tmtc);
4295  tm = tmtcTm(&tmtc);
4296 
4297  interval2itm(*it, itm);
4298  tmtc.fsec = itm->tm_usec;
4299  tm->tm_sec = itm->tm_sec;
4300  tm->tm_min = itm->tm_min;
4301  tm->tm_hour = itm->tm_hour;
4302  tm->tm_mday = itm->tm_mday;
4303  tm->tm_mon = itm->tm_mon;
4304  tm->tm_year = itm->tm_year;
4305 
4306  /* wday is meaningless, yday approximates the total span in days */
4308 
4309  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4310  PG_RETURN_NULL();
4311 
4313 }
4314 
4315 /* ---------------------
4316  * TO_TIMESTAMP()
4317  *
4318  * Make Timestamp from date_str which is formatted at argument 'fmt'
4319  * ( to_timestamp is reverse to_char() )
4320  * ---------------------
4321  */
4322 Datum
4324 {
4325  text *date_txt = PG_GETARG_TEXT_PP(0);
4326  text *fmt = PG_GETARG_TEXT_PP(1);
4328  Timestamp result;
4329  int tz;
4330  struct pg_tm tm;
4331  struct fmt_tz ftz;
4332  fsec_t fsec;
4333  int fprec;
4334 
4335  do_to_timestamp(date_txt, fmt, collid, false,
4336  &tm, &fsec, &ftz, &fprec, NULL, NULL);
4337 
4338  /* Use the specified time zone, if any. */
4339  if (ftz.has_tz)
4340  tz = ftz.gmtoffset;
4341  else
4343 
4344  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4345  ereport(ERROR,
4346  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4347  errmsg("timestamp out of range")));
4348 
4349  /* Use the specified fractional precision, if any. */
4350  if (fprec)
4351  AdjustTimestampForTypmod(&result, fprec, NULL);
4352 
4353  PG_RETURN_TIMESTAMP(result);
4354 }
4355 
4356 /* ----------
4357  * TO_DATE
4358  * Make Date from date_str which is formatted at argument 'fmt'
4359  * ----------
4360  */
4361 Datum
4363 {
4364  text *date_txt = PG_GETARG_TEXT_PP(0);
4365  text *fmt = PG_GETARG_TEXT_PP(1);
4367  DateADT result;
4368  struct pg_tm tm;
4369  struct fmt_tz ftz;
4370  fsec_t fsec;
4371 
4372  do_to_timestamp(date_txt, fmt, collid, false,
4373  &tm, &fsec, &ftz, NULL, NULL, NULL);
4374 
4375  /* Prevent overflow in Julian-day routines */
4377  ereport(ERROR,
4378  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4379  errmsg("date out of range: \"%s\"",
4380  text_to_cstring(date_txt))));
4381 
4383 
4384  /* Now check for just-out-of-range dates */
4385  if (!IS_VALID_DATE(result))
4386  ereport(ERROR,
4387  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4388  errmsg("date out of range: \"%s\"",
4389  text_to_cstring(date_txt))));
4390 
4391  PG_RETURN_DATEADT(result);
4392 }
4393 
4394 /*
4395  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4396  * as a format string. The collation 'collid' may be used for case-folding
4397  * rules in some cases. 'strict' specifies standard parsing mode.
4398  *
4399  * The actual data type (returned in 'typid', 'typmod') is determined by
4400  * the presence of date/time/zone components in the format string.
4401  *
4402  * When a timezone component is present, the corresponding offset is
4403  * returned in '*tz'.
4404  *
4405  * If escontext points to an ErrorSaveContext, data errors will be reported
4406  * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
4407  * whether an error occurred. Otherwise, errors are thrown.
4408  */
4409 Datum
4410 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4411  Oid *typid, int32 *typmod, int *tz,
4412  Node *escontext)
4413 {
4414  struct pg_tm tm;
4415  struct fmt_tz ftz;
4416  fsec_t fsec;
4417  int fprec;
4418  uint32 flags;
4419 
4420  if (!do_to_timestamp(date_txt, fmt, collid, strict,
4421  &tm, &fsec, &ftz, &fprec, &flags, escontext))
4422  return (Datum) 0;
4423 
4424  *typmod = fprec ? fprec : -1; /* fractional part precision */
4425 
4426  if (flags & DCH_DATED)
4427  {
4428  if (flags & DCH_TIMED)
4429  {
4430  if (flags & DCH_ZONED)
4431  {
4432  TimestampTz result;
4433 
4434  if (ftz.has_tz)
4435  {
4436  *tz = ftz.gmtoffset;
4437  }
4438  else
4439  {
4440  /*
4441  * Time zone is present in format string, but not in input
4442  * string. Assuming do_to_timestamp() triggers no error
4443  * this should be possible only in non-strict case.
4444  */
4445  Assert(!strict);
4446 
4447  ereturn(escontext, (Datum) 0,
4448  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4449  errmsg("missing time zone in input string for type timestamptz")));
4450  }
4451 
4452  if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4453  ereturn(escontext, (Datum) 0,
4454  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4455  errmsg("timestamptz out of range")));
4456 
4457  AdjustTimestampForTypmod(&result, *typmod, escontext);
4458 
4459  *typid = TIMESTAMPTZOID;
4460  return TimestampTzGetDatum(result);
4461  }
4462  else
4463  {
4464  Timestamp result;
4465 
4466  if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4467  ereturn(escontext, (Datum) 0,
4468  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4469  errmsg("timestamp out of range")));
4470 
4471  AdjustTimestampForTypmod(&result, *typmod, escontext);
4472 
4473  *typid = TIMESTAMPOID;
4474  return TimestampGetDatum(result);
4475  }
4476  }
4477  else
4478  {
4479  if (flags & DCH_ZONED)
4480  {
4481  ereturn(escontext, (Datum) 0,
4482  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4483  errmsg("datetime format is zoned but not timed")));
4484  }
4485  else
4486  {
4487  DateADT result;
4488 
4489  /* Prevent overflow in Julian-day routines */
4491  ereturn(escontext, (Datum) 0,
4492  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4493  errmsg("date out of range: \"%s\"",
4494  text_to_cstring(date_txt))));
4495 
4496  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4498 
4499  /* Now check for just-out-of-range dates */
4500  if (!IS_VALID_DATE(result))
4501  ereturn(escontext, (Datum) 0,
4502  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4503  errmsg("date out of range: \"%s\"",
4504  text_to_cstring(date_txt))));
4505 
4506  *typid = DATEOID;
4507  return DateADTGetDatum(result);
4508  }
4509  }
4510  }
4511  else if (flags & DCH_TIMED)
4512  {
4513  if (flags & DCH_ZONED)
4514  {
4515  TimeTzADT *result = palloc(sizeof(TimeTzADT));
4516 
4517  if (ftz.has_tz)
4518  {
4519  *tz = ftz.gmtoffset;
4520  }
4521  else
4522  {
4523  /*
4524  * Time zone is present in format string, but not in input
4525  * string. Assuming do_to_timestamp() triggers no error this
4526  * should be possible only in non-strict case.
4527  */
4528  Assert(!strict);
4529 
4530  ereturn(escontext, (Datum) 0,
4531  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4532  errmsg("missing time zone in input string for type timetz")));
4533  }
4534 
4535  if (tm2timetz(&tm, fsec, *tz, result) != 0)
4536  ereturn(escontext, (Datum) 0,
4537  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4538  errmsg("timetz out of range")));
4539 
4540  AdjustTimeForTypmod(&result->time, *typmod);
4541 
4542  *typid = TIMETZOID;
4543  return TimeTzADTPGetDatum(result);
4544  }
4545  else
4546  {
4547  TimeADT result;
4548 
4549  if (tm2time(&tm, fsec, &result) != 0)
4550  ereturn(escontext, (Datum) 0,
4551  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4552  errmsg("time out of range")));
4553 
4554  AdjustTimeForTypmod(&result, *typmod);
4555 
4556  *typid = TIMEOID;
4557  return TimeADTGetDatum(result);
4558  }
4559  }
4560  else
4561  {
4562  ereturn(escontext, (Datum) 0,
4563  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4564  errmsg("datetime format is not dated and not timed")));
4565  }
4566 }
4567 
4568 /*
4569  * Parses the datetime format string in 'fmt_str' and returns true if it
4570  * contains a timezone specifier, false if not.
4571  */
4572 bool
4573 datetime_format_has_tz(const char *fmt_str)
4574 {
4575  bool incache;
4576  int fmt_len = strlen(fmt_str);
4577  int result;
4578  FormatNode *format;
4579 
4580  if (fmt_len > DCH_CACHE_SIZE)
4581  {
4582  /*
4583  * Allocate new memory if format picture is bigger than static cache
4584  * and do not use cache (call parser always)
4585  */
4586  incache = false;
4587 
4588  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4589 
4590  parse_format(format, fmt_str, DCH_keywords,
4591  DCH_suff, DCH_index, DCH_FLAG, NULL);
4592  }
4593  else
4594  {
4595  /*
4596  * Use cache buffers
4597  */
4598  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4599 
4600  incache = true;
4601  format = ent->format;
4602  }
4603 
4604  result = DCH_datetime_type(format);
4605 
4606  if (!incache)
4607  pfree(format);
4608 
4609  return result & DCH_ZONED;
4610 }
4611 
4612 /*
4613  * do_to_timestamp: shared code for to_timestamp and to_date
4614  *
4615  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4616  * fractional seconds, struct fmt_tz, and fractional precision.
4617  *
4618  * 'collid' identifies the collation to use, if needed.
4619  * 'std' specifies standard parsing mode.
4620  *
4621  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4622  * if that is not NULL.
4623  *
4624  * Returns true on success, false on failure (if escontext points to an
4625  * ErrorSaveContext; otherwise errors are thrown). Note that currently,
4626  * soft-error behavior is provided for bad data but not bad format.
4627  *
4628  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4629  * DCH_from_char to populate a TmFromChar with the parsed contents of
4630  * 'date_txt'.
4631  *
4632  * The TmFromChar is then analysed and converted into the final results in
4633  * struct 'tm', 'fsec', struct 'tz', and 'fprec'.
4634  */
4635 static bool
4636 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4637  struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
4638  int *fprec, uint32 *flags, Node *escontext)
4639 {
4640  FormatNode *format = NULL;
4641  TmFromChar tmfc;
4642  int fmt_len;
4643  char *date_str;
4644  int fmask;
4645  bool incache = false;
4646 
4647  Assert(tm != NULL);
4648  Assert(fsec != NULL);
4649 
4650  date_str = text_to_cstring(date_txt);
4651 
4652  ZERO_tmfc(&tmfc);
4653  ZERO_tm(tm);
4654  *fsec = 0;
4655  tz->has_tz = false;
4656  if (fprec)
4657  *fprec = 0;
4658  if (flags)
4659  *flags = 0;
4660  fmask = 0; /* bit mask for ValidateDate() */
4661 
4662  fmt_len = VARSIZE_ANY_EXHDR(fmt);
4663 
4664  if (fmt_len)
4665  {
4666  char *fmt_str;
4667 
4668  fmt_str = text_to_cstring(fmt);
4669 
4670  if (fmt_len > DCH_CACHE_SIZE)
4671  {
4672  /*
4673  * Allocate new memory if format picture is bigger than static
4674  * cache and do not use cache (call parser always)
4675  */
4676  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4677 
4679  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4680  }
4681  else
4682  {
4683  /*
4684  * Use cache buffers
4685  */
4686  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4687 
4688  incache = true;
4689  format = ent->format;
4690  }
4691 
4692 #ifdef DEBUG_TO_FROM_CHAR
4693  /* dump_node(format, fmt_len); */
4694  /* dump_index(DCH_keywords, DCH_index); */
4695 #endif
4696 
4697  DCH_from_char(format, date_str, &tmfc, collid, std, escontext);
4698  pfree(fmt_str);
4699  if (SOFT_ERROR_OCCURRED(escontext))
4700  goto fail;
4701 
4702  if (flags)
4703  *flags = DCH_datetime_type(format);
4704 
4705  if (!incache)
4706  {
4707  pfree(format);
4708  format = NULL;
4709  }
4710  }
4711 
4712  DEBUG_TMFC(&tmfc);
4713 
4714  /*
4715  * Convert to_date/to_timestamp input fields to standard 'tm'
4716  */
4717  if (tmfc.ssss)
4718  {
4719  int x = tmfc.ssss;
4720 
4721  tm->tm_hour = x / SECS_PER_HOUR;
4722  x %= SECS_PER_HOUR;
4723  tm->tm_min = x / SECS_PER_MINUTE;
4724  x %= SECS_PER_MINUTE;
4725  tm->tm_sec = x;
4726  }
4727 
4728  if (tmfc.ss)
4729  tm->tm_sec = tmfc.ss;
4730  if (tmfc.mi)
4731  tm->tm_min = tmfc.mi;
4732  if (tmfc.hh)
4733  tm->tm_hour = tmfc.hh;
4734 
4735  if (tmfc.clock == CLOCK_12_HOUR)
4736  {
4737  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4738  {
4739  errsave(escontext,
4740  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4741  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4742  tm->tm_hour),
4743  errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
4744  goto fail;
4745  }
4746 
4747  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4748  tm->tm_hour += HOURS_PER_DAY / 2;
4749  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4750  tm->tm_hour = 0;
4751  }
4752 
4753  if (tmfc.year)
4754  {
4755  /*
4756  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4757  * the year in the given century. Keep in mind that the 21st century
4758  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4759  * 600BC to 501BC.
4760  */
4761  if (tmfc.cc && tmfc.yysz <= 2)
4762  {
4763  if (tmfc.bc)
4764  tmfc.cc = -tmfc.cc;
4765  tm->tm_year = tmfc.year % 100;
4766  if (tm->tm_year)
4767  {
4768  if (tmfc.cc >= 0)
4769  tm->tm_year += (tmfc.cc - 1) * 100;
4770  else
4771  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4772  }
4773  else
4774  {
4775  /* find century year for dates ending in "00" */
4776  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4777  }
4778  }
4779  else
4780  {
4781  /* If a 4-digit year is provided, we use that and ignore CC. */
4782  tm->tm_year = tmfc.year;
4783  if (tmfc.bc)
4784  tm->tm_year = -tm->tm_year;
4785  /* correct for our representation of BC years */
4786  if (tm->tm_year < 0)
4787  tm->tm_year++;
4788  }
4789  fmask |= DTK_M(YEAR);
4790  }
4791  else if (tmfc.cc)
4792  {
4793  /* use first year of century */
4794  if (tmfc.bc)
4795  tmfc.cc = -tmfc.cc;
4796  if (tmfc.cc >= 0)
4797  /* +1 because 21st century started in 2001 */
4798  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4799  else
4800  /* +1 because year == 599 is 600 BC */
4801  tm->tm_year = tmfc.cc * 100 + 1;
4802  fmask |= DTK_M(YEAR);
4803  }
4804 
4805  if (tmfc.j)
4806  {
4807  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4808  fmask |= DTK_DATE_M;
4809  }
4810 
4811  if (tmfc.ww)
4812  {
4813  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4814  {
4815  /*
4816  * If tmfc.d is not set, then the date is left at the beginning of
4817  * the ISO week (Monday).
4818  */
4819  if (tmfc.d)
4820  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4821  else
4822  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4823  fmask |= DTK_DATE_M;
4824  }
4825  else
4826  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4827  }
4828 
4829  if (tmfc.w)
4830  tmfc.dd = (tmfc.w - 1) * 7 + 1;
4831  if (tmfc.dd)
4832  {
4833  tm->tm_mday = tmfc.dd;
4834  fmask |= DTK_M(DAY);
4835  }
4836  if (tmfc.mm)
4837  {
4838  tm->tm_mon = tmfc.mm;
4839  fmask |= DTK_M(MONTH);
4840  }
4841 
4842  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4843  {
4844  /*
4845  * The month and day field have not been set, so we use the
4846  * day-of-year field to populate them. Depending on the date mode,
4847  * this field may be interpreted as a Gregorian day-of-year, or an ISO
4848  * week date day-of-year.
4849  */
4850 
4851  if (!tm->tm_year && !tmfc.bc)
4852  {
4853  errsave(escontext,
4854  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4855  errmsg("cannot calculate day of year without year information")));
4856  goto fail;
4857  }
4858 
4859  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4860  {
4861  int j0; /* zeroth day of the ISO year, in Julian */
4862 
4863  j0 = isoweek2j(tm->tm_year, 1) - 1;
4864 
4865  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4866  fmask |= DTK_DATE_M;
4867  }