PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2020, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for roman number to standard number conversion
53  * - add support for number spelling
54  * - add support for string to string formatting (we must be better
55  * than Oracle :-),
56  * to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 
73 /*
74  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75  * declare them in <wchar.h>, so include that too.
76  */
77 #include <wchar.h>
78 #ifdef HAVE_WCTYPE_H
79 #include <wctype.h>
80 #endif
81 
82 #ifdef USE_ICU
83 #include <unicode/ustring.h>
84 #endif
85 
86 #include "catalog/pg_collation.h"
87 #include "catalog/pg_type.h"
88 #include "mb/pg_wchar.h"
89 #include "parser/scansup.h"
90 #include "utils/builtins.h"
91 #include "utils/date.h"
92 #include "utils/datetime.h"
93 #include "utils/float.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/memutils.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99 
100 /* ----------
101  * Convenience macros for error handling
102  * ----------
103  *
104  * Two macros below help to handle errors in functions that take
105  * 'bool *have_error' argument. When this argument is not NULL, it's expected
106  * that function will suppress ereports when possible. Instead it should
107  * return some default value and set *have_error flag.
108  *
109  * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error
110  * function argument is not NULL, then instead of ereport'ing we set
111  * *have_error flag and go to on_error label. It's supposed that jump
112  * resources will be freed and some 'default' value returned.
113  *
114  * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
115  * It's supposed to be used for immediate exit from the function on error
116  * after call of another function with 'bool *have_error' argument.
117  */
118 #define RETURN_ERROR(throw_error) \
119 do { \
120  if (have_error) \
121  { \
122  *have_error = true; \
123  goto on_error; \
124  } \
125  else \
126  { \
127  throw_error; \
128  } \
129 } while (0)
130 
131 #define CHECK_ERROR \
132 do { \
133  if (have_error && *have_error) \
134  goto on_error; \
135 } while (0)
136 
137 /* ----------
138  * Routines flags
139  * ----------
140  */
141 #define DCH_FLAG 0x1 /* DATE-TIME flag */
142 #define NUM_FLAG 0x2 /* NUMBER flag */
143 #define STD_FLAG 0x4 /* STANDARD flag */
144 
145 /* ----------
146  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
147  * ----------
148  */
149 #define KeyWord_INDEX_SIZE ('~' - ' ')
150 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
151 
152 /* ----------
153  * Maximal length of one node
154  * ----------
155  */
156 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
157 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
158 
159 
160 /* ----------
161  * Format parser structs
162  * ----------
163  */
164 typedef struct
165 {
166  const char *name; /* suffix string */
167  int len, /* suffix length */
168  id, /* used in node->suffix */
169  type; /* prefix / postfix */
170 } KeySuffix;
171 
172 /* ----------
173  * FromCharDateMode
174  * ----------
175  *
176  * This value is used to nominate one of several distinct (and mutually
177  * exclusive) date conventions that a keyword can belong to.
178  */
179 typedef enum
180 {
181  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
182  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
183  FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
185 
186 typedef struct
187 {
188  const char *name;
189  int len;
190  int id;
191  bool is_digit;
193 } KeyWord;
194 
195 typedef struct
196 {
197  uint8 type; /* NODE_TYPE_XXX, see below */
198  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
199  uint8 suffix; /* keyword prefix/suffix code, if any */
200  const KeyWord *key; /* if type is ACTION */
201 } FormatNode;
202 
203 #define NODE_TYPE_END 1
204 #define NODE_TYPE_ACTION 2
205 #define NODE_TYPE_CHAR 3
206 #define NODE_TYPE_SEPARATOR 4
207 #define NODE_TYPE_SPACE 5
208 
209 #define SUFFTYPE_PREFIX 1
210 #define SUFFTYPE_POSTFIX 2
211 
212 #define CLOCK_24_HOUR 0
213 #define CLOCK_12_HOUR 1
214 
215 
216 /* ----------
217  * Full months
218  * ----------
219  */
220 static const char *const months_full[] = {
221  "January", "February", "March", "April", "May", "June", "July",
222  "August", "September", "October", "November", "December", NULL
223 };
224 
225 static const char *const days_short[] = {
226  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
227 };
228 
229 /* ----------
230  * AD / BC
231  * ----------
232  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
233  * positive and map year == -1 to year zero, and shift all negative
234  * years up one. For interval years, we just return the year.
235  */
236 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
237 
238 #define A_D_STR "A.D."
239 #define a_d_STR "a.d."
240 #define AD_STR "AD"
241 #define ad_STR "ad"
242 
243 #define B_C_STR "B.C."
244 #define b_c_STR "b.c."
245 #define BC_STR "BC"
246 #define bc_STR "bc"
247 
248 /*
249  * AD / BC strings for seq_search.
250  *
251  * These are given in two variants, a long form with periods and a standard
252  * form without.
253  *
254  * The array is laid out such that matches for AD have an even index, and
255  * matches for BC have an odd index. So the boolean value for BC is given by
256  * taking the array index of the match, modulo 2.
257  */
258 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
259 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
260 
261 /* ----------
262  * AM / PM
263  * ----------
264  */
265 #define A_M_STR "A.M."
266 #define a_m_STR "a.m."
267 #define AM_STR "AM"
268 #define am_STR "am"
269 
270 #define P_M_STR "P.M."
271 #define p_m_STR "p.m."
272 #define PM_STR "PM"
273 #define pm_STR "pm"
274 
275 /*
276  * AM / PM strings for seq_search.
277  *
278  * These are given in two variants, a long form with periods and a standard
279  * form without.
280  *
281  * The array is laid out such that matches for AM have an even index, and
282  * matches for PM have an odd index. So the boolean value for PM is given by
283  * taking the array index of the match, modulo 2.
284  */
285 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
286 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
287 
288 /* ----------
289  * Months in roman-numeral
290  * (Must be in reverse order for seq_search (in FROM_CHAR), because
291  * 'VIII' must have higher precedence than 'V')
292  * ----------
293  */
294 static const char *const rm_months_upper[] =
295 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
296 
297 static const char *const rm_months_lower[] =
298 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
299 
300 /* ----------
301  * Roman numbers
302  * ----------
303  */
304 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
305 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
306 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
307 
308 /* ----------
309  * Ordinal postfixes
310  * ----------
311  */
312 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
313 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
314 
315 /* ----------
316  * Flags & Options:
317  * ----------
318  */
319 #define TH_UPPER 1
320 #define TH_LOWER 2
321 
322 /* ----------
323  * Number description struct
324  * ----------
325  */
326 typedef struct
327 {
328  int pre, /* (count) numbers before decimal */
329  post, /* (count) numbers after decimal */
330  lsign, /* want locales sign */
331  flag, /* number parameters */
332  pre_lsign_num, /* tmp value for lsign */
333  multi, /* multiplier for 'V' */
334  zero_start, /* position of first zero */
335  zero_end, /* position of last zero */
336  need_locale; /* needs it locale */
337 } NUMDesc;
338 
339 /* ----------
340  * Flags for NUMBER version
341  * ----------
342  */
343 #define NUM_F_DECIMAL (1 << 1)
344 #define NUM_F_LDECIMAL (1 << 2)
345 #define NUM_F_ZERO (1 << 3)
346 #define NUM_F_BLANK (1 << 4)
347 #define NUM_F_FILLMODE (1 << 5)
348 #define NUM_F_LSIGN (1 << 6)
349 #define NUM_F_BRACKET (1 << 7)
350 #define NUM_F_MINUS (1 << 8)
351 #define NUM_F_PLUS (1 << 9)
352 #define NUM_F_ROMAN (1 << 10)
353 #define NUM_F_MULTI (1 << 11)
354 #define NUM_F_PLUS_POST (1 << 12)
355 #define NUM_F_MINUS_POST (1 << 13)
356 #define NUM_F_EEEE (1 << 14)
357 
358 #define NUM_LSIGN_PRE (-1)
359 #define NUM_LSIGN_POST 1
360 #define NUM_LSIGN_NONE 0
361 
362 /* ----------
363  * Tests
364  * ----------
365  */
366 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
367 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
368 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
369 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
370 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
371 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
372 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
373 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
374 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
375 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
376 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
377 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
378 
379 /* ----------
380  * Format picture cache
381  *
382  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
383  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
384  *
385  * For simplicity, the cache entries are fixed-size, so they allow for the
386  * worst case of a FormatNode for each byte in the picture string.
387  *
388  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
389  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
390  * we don't waste too much space by palloc'ing them individually. Be sure
391  * to adjust those macros if you add fields to those structs.
392  *
393  * The max number of entries in each cache is DCH_CACHE_ENTRIES
394  * resp. NUM_CACHE_ENTRIES.
395  * ----------
396  */
397 #define DCH_CACHE_OVERHEAD \
398  MAXALIGN(sizeof(bool) + sizeof(int))
399 #define NUM_CACHE_OVERHEAD \
400  MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
401 
402 #define DCH_CACHE_SIZE \
403  ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
404 #define NUM_CACHE_SIZE \
405  ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
406 
407 #define DCH_CACHE_ENTRIES 20
408 #define NUM_CACHE_ENTRIES 20
409 
410 typedef struct
411 {
413  char str[DCH_CACHE_SIZE + 1];
414  bool std;
415  bool valid;
416  int age;
417 } DCHCacheEntry;
418 
419 typedef struct
420 {
422  char str[NUM_CACHE_SIZE + 1];
423  bool valid;
424  int age;
426 } NUMCacheEntry;
427 
428 /* global cache for date/time format pictures */
430 static int n_DCHCache = 0; /* current number of entries */
431 static int DCHCounter = 0; /* aging-event counter */
432 
433 /* global cache for number format pictures */
435 static int n_NUMCache = 0; /* current number of entries */
436 static int NUMCounter = 0; /* aging-event counter */
437 
438 /* ----------
439  * For char->date/time conversion
440  * ----------
441  */
442 typedef struct
443 {
445  int hh,
446  pm,
447  mi,
448  ss,
449  ssss,
450  d, /* stored as 1-7, Sunday = 1, 0 means missing */
451  dd,
452  ddd,
453  mm,
454  ms,
455  year,
456  bc,
457  ww,
458  w,
459  cc,
460  j,
461  us,
462  yysz, /* is it YY or YYYY ? */
463  clock, /* 12 or 24 hour clock? */
464  tzsign, /* +1, -1 or 0 if timezone info is absent */
465  tzh,
466  tzm,
467  ff; /* fractional precision */
468 } TmFromChar;
469 
470 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
471 
472 /* ----------
473  * Debug
474  * ----------
475  */
476 #ifdef DEBUG_TO_FROM_CHAR
477 #define DEBUG_TMFC(_X) \
478  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
479  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
480  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
481  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
482  (_X)->yysz, (_X)->clock)
483 #define DEBUG_TM(_X) \
484  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
485  (_X)->tm_sec, (_X)->tm_year,\
486  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
487  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
488 #else
489 #define DEBUG_TMFC(_X)
490 #define DEBUG_TM(_X)
491 #endif
492 
493 /* ----------
494  * Datetime to char conversion
495  * ----------
496  */
497 typedef struct TmToChar
498 {
499  struct pg_tm tm; /* classic 'tm' struct */
500  fsec_t fsec; /* fractional seconds */
501  const char *tzn; /* timezone */
502 } TmToChar;
503 
504 #define tmtcTm(_X) (&(_X)->tm)
505 #define tmtcTzn(_X) ((_X)->tzn)
506 #define tmtcFsec(_X) ((_X)->fsec)
507 
508 #define ZERO_tm(_X) \
509 do { \
510  (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
511  (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
512  (_X)->tm_mday = (_X)->tm_mon = 1; \
513  (_X)->tm_zone = NULL; \
514 } while(0)
515 
516 #define ZERO_tmtc(_X) \
517 do { \
518  ZERO_tm( tmtcTm(_X) ); \
519  tmtcFsec(_X) = 0; \
520  tmtcTzn(_X) = NULL; \
521 } while(0)
522 
523 /*
524  * to_char(time) appears to to_char() as an interval, so this check
525  * is really for interval and time data types.
526  */
527 #define INVALID_FOR_INTERVAL \
528 do { \
529  if (is_interval) \
530  ereport(ERROR, \
531  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
532  errmsg("invalid format specification for an interval value"), \
533  errhint("Intervals are not tied to specific calendar dates."))); \
534 } while(0)
535 
536 /*****************************************************************************
537  * KeyWord definitions
538  *****************************************************************************/
539 
540 /* ----------
541  * Suffixes (FormatNode.suffix is an OR of these codes)
542  * ----------
543  */
544 #define DCH_S_FM 0x01
545 #define DCH_S_TH 0x02
546 #define DCH_S_th 0x04
547 #define DCH_S_SP 0x08
548 #define DCH_S_TM 0x10
549 
550 /* ----------
551  * Suffix tests
552  * ----------
553  */
554 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
555 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
556 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
557 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
558 
559 /* Oracle toggles FM behavior, we don't; see docs. */
560 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
561 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
562 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
563 
564 /* ----------
565  * Suffixes definition for DATE-TIME TO/FROM CHAR
566  * ----------
567  */
568 #define TM_SUFFIX_LEN 2
569 
570 static const KeySuffix DCH_suff[] = {
571  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
573  {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
574  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
575  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
576  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
577  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
578  /* last */
579  {NULL, 0, 0, 0}
580 };
581 
582 
583 /* ----------
584  * Format-pictures (KeyWord).
585  *
586  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
587  * complicated -to-> easy:
588  *
589  * (example: "DDD","DD","Day","D" )
590  *
591  * (this specific sort needs the algorithm for sequential search for strings,
592  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
593  * or "HH12"? You must first try "HH12", because "HH" is in string, but
594  * it is not good.
595  *
596  * (!)
597  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
598  * (!)
599  *
600  * For fast search is used the 'int index[]', index is ascii table from position
601  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
602  * position or -1 if char is not used in the KeyWord. Search example for
603  * string "MM":
604  * 1) see in index to index['M' - 32],
605  * 2) take keywords position (enum DCH_MI) from index
606  * 3) run sequential search in keywords[] from this position
607  *
608  * ----------
609  */
610 
611 typedef enum
612 {
633  DCH_FX, /* global suffix */
722 
723  /* last */
725 } DCH_poz;
726 
727 typedef enum
728 {
765 
766  /* last */
768 } NUM_poz;
769 
770 /* ----------
771  * KeyWords for DATE-TIME version
772  * ----------
773  */
774 static const KeyWord DCH_keywords[] = {
775 /* name, len, id, is_digit, date_mode */
776  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
777  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
778  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
779  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
780  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
781  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
782  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
783  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
784  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
787  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
788  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
789  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
790  {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
791  {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
792  {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
793  {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
794  {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
795  {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
796  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
797  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
798  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
799  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
800  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
801  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
802  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
803  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
804  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
805  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
806  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
807  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
808  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
809  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
810  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
811  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
812  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
813  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
814  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
815  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
816  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
817  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
818  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
819  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
820  {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
821  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
822  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
823  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
824  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
825  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
826  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
827  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
828  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
829  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
830  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
831  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
832  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
833  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
834  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
835  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
836  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
837  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
838  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
839  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
840  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
841  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
842  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
843  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
844  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
845  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
846  {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
847  {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
848  {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
849  {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
850  {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
851  {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
852  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
853  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
854  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
855  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
856  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
857  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
858  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
859  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
860  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
861  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
862  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
863  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
864  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
865  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
866  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
867  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
868  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
869  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
870  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
871  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
872  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
873  {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
874  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
875  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
876  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
877  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
878  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
879  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
880  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
881  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
882  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
883  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
884  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
885 
886  /* last */
887  {NULL, 0, 0, 0, 0}
888 };
889 
890 /* ----------
891  * KeyWords for NUMBER version
892  *
893  * The is_digit and date_mode fields are not relevant here.
894  * ----------
895  */
896 static const KeyWord NUM_keywords[] = {
897 /* name, len, id is in Index */
898  {",", 1, NUM_COMMA}, /* , */
899  {".", 1, NUM_DEC}, /* . */
900  {"0", 1, NUM_0}, /* 0 */
901  {"9", 1, NUM_9}, /* 9 */
902  {"B", 1, NUM_B}, /* B */
903  {"C", 1, NUM_C}, /* C */
904  {"D", 1, NUM_D}, /* D */
905  {"EEEE", 4, NUM_E}, /* E */
906  {"FM", 2, NUM_FM}, /* F */
907  {"G", 1, NUM_G}, /* G */
908  {"L", 1, NUM_L}, /* L */
909  {"MI", 2, NUM_MI}, /* M */
910  {"PL", 2, NUM_PL}, /* P */
911  {"PR", 2, NUM_PR},
912  {"RN", 2, NUM_RN}, /* R */
913  {"SG", 2, NUM_SG}, /* S */
914  {"SP", 2, NUM_SP},
915  {"S", 1, NUM_S},
916  {"TH", 2, NUM_TH}, /* T */
917  {"V", 1, NUM_V}, /* V */
918  {"b", 1, NUM_B}, /* b */
919  {"c", 1, NUM_C}, /* c */
920  {"d", 1, NUM_D}, /* d */
921  {"eeee", 4, NUM_E}, /* e */
922  {"fm", 2, NUM_FM}, /* f */
923  {"g", 1, NUM_G}, /* g */
924  {"l", 1, NUM_L}, /* l */
925  {"mi", 2, NUM_MI}, /* m */
926  {"pl", 2, NUM_PL}, /* p */
927  {"pr", 2, NUM_PR},
928  {"rn", 2, NUM_rn}, /* r */
929  {"sg", 2, NUM_SG}, /* s */
930  {"sp", 2, NUM_SP},
931  {"s", 1, NUM_S},
932  {"th", 2, NUM_th}, /* t */
933  {"v", 1, NUM_V}, /* v */
934 
935  /* last */
936  {NULL, 0, 0}
937 };
938 
939 
940 /* ----------
941  * KeyWords index for DATE-TIME version
942  * ----------
943  */
944 static const int DCH_index[KeyWord_INDEX_SIZE] = {
945 /*
946 0 1 2 3 4 5 6 7 8 9
947 */
948  /*---- first 0..31 chars are skipped ----*/
949 
950  -1, -1, -1, -1, -1, -1, -1, -1,
951  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
952  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
953  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
954  DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
956  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
957  DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
958  -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
959  -1, DCH_y_yyy, -1, -1, -1, -1
960 
961  /*---- chars over 126 are skipped ----*/
962 };
963 
964 /* ----------
965  * KeyWords index for NUMBER version
966  * ----------
967  */
968 static const int NUM_index[KeyWord_INDEX_SIZE] = {
969 /*
970 0 1 2 3 4 5 6 7 8 9
971 */
972  /*---- first 0..31 chars are skipped ----*/
973 
974  -1, -1, -1, -1, -1, -1, -1, -1,
975  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
976  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
977  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
978  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
979  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
980  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
981  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
982  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
983  -1, -1, -1, -1, -1, -1
984 
985  /*---- chars over 126 are skipped ----*/
986 };
987 
988 /* ----------
989  * Number processor struct
990  * ----------
991  */
992 typedef struct NUMProc
993 {
995  NUMDesc *Num; /* number description */
996 
997  int sign, /* '-' or '+' */
998  sign_wrote, /* was sign write */
999  num_count, /* number of write digits */
1000  num_in, /* is inside number */
1001  num_curr, /* current position in number */
1002  out_pre_spaces, /* spaces before first digit */
1003 
1004  read_dec, /* to_number - was read dec. point */
1005  read_post, /* to_number - number of dec. digit */
1006  read_pre; /* to_number - number non-dec. digit */
1007 
1008  char *number, /* string with number */
1009  *number_p, /* pointer to current number position */
1010  *inout, /* in / out buffer */
1011  *inout_p, /* pointer to current inout position */
1012  *last_relevant, /* last relevant number after decimal point */
1013 
1014  *L_negative_sign, /* Locale */
1015  *L_positive_sign,
1016  *decimal,
1017  *L_thousands_sep,
1018  *L_currency_symbol;
1019 } NUMProc;
1020 
1021 /* Return flags for DCH_from_char() */
1022 #define DCH_DATED 0x01
1023 #define DCH_TIMED 0x02
1024 #define DCH_ZONED 0x04
1025 
1026 /* ----------
1027  * Functions
1028  * ----------
1029  */
1030 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1031  const int *index);
1032 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1033 static bool is_separator_char(const char *str);
1034 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1035 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1036  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1037 
1038 static void DCH_to_char(FormatNode *node, bool is_interval,
1039  TmToChar *in, char *out, Oid collid);
1040 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1041  Oid collid, bool std, bool *have_error);
1042 
1043 #ifdef DEBUG_TO_FROM_CHAR
1044 static void dump_index(const KeyWord *k, const int *index);
1045 static void dump_node(FormatNode *node, int max);
1046 #endif
1047 
1048 static const char *get_th(char *num, int type);
1049 static char *str_numth(char *dest, char *num, int type);
1050 static int adjust_partial_year_to_2020(int year);
1051 static int strspace_len(const char *str);
1052 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1053  bool *have_error);
1054 static void from_char_set_int(int *dest, const int value, const FormatNode *node,
1055  bool *have_error);
1056 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1057  FormatNode *node, bool *have_error);
1058 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1059  bool *have_error);
1060 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1061 static int seq_search_localized(const char *name, char **array, int *len,
1062  Oid collid);
1063 static int from_char_seq_search(int *dest, const char **src,
1064  const char *const *array,
1065  char **localized_array, Oid collid,
1066  FormatNode *node, bool *have_error);
1067 static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1068  struct pg_tm *tm, fsec_t *fsec, int *fprec,
1069  uint32 *flags, bool *have_error);
1070 static char *fill_str(char *str, int c, int max);
1071 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1072 static char *int_to_roman(int number);
1073 static void NUM_prepare_locale(NUMProc *Np);
1074 static char *get_last_relevant_decnum(char *num);
1075 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1076 static void NUM_numpart_to_char(NUMProc *Np, int id);
1077 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1078  char *number, int input_len, int to_char_out_pre_spaces,
1079  int sign, bool is_to_char, Oid collid);
1080 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1081 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1082 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1083 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1084 static NUMCacheEntry *NUM_cache_search(const char *str);
1085 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1086 
1087 
1088 /* ----------
1089  * Fast sequential search, use index for data selection which
1090  * go to seq. cycle (it is very fast for unwanted strings)
1091  * (can't be used binary search in format parsing)
1092  * ----------
1093  */
1094 static const KeyWord *
1095 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1096 {
1097  int poz;
1098 
1099  if (!KeyWord_INDEX_FILTER(*str))
1100  return NULL;
1101 
1102  if ((poz = *(index + (*str - ' '))) > -1)
1103  {
1104  const KeyWord *k = kw + poz;
1105 
1106  do
1107  {
1108  if (strncmp(str, k->name, k->len) == 0)
1109  return k;
1110  k++;
1111  if (!k->name)
1112  return NULL;
1113  } while (*str == *k->name);
1114  }
1115  return NULL;
1116 }
1117 
1118 static const KeySuffix *
1119 suff_search(const char *str, const KeySuffix *suf, int type)
1120 {
1121  const KeySuffix *s;
1122 
1123  for (s = suf; s->name != NULL; s++)
1124  {
1125  if (s->type != type)
1126  continue;
1127 
1128  if (strncmp(str, s->name, s->len) == 0)
1129  return s;
1130  }
1131  return NULL;
1132 }
1133 
1134 static bool
1136 {
1137  /* ASCII printable character, but not letter or digit */
1138  return (*str > 0x20 && *str < 0x7F &&
1139  !(*str >= 'A' && *str <= 'Z') &&
1140  !(*str >= 'a' && *str <= 'z') &&
1141  !(*str >= '0' && *str <= '9'));
1142 }
1143 
1144 /* ----------
1145  * Prepare NUMDesc (number description struct) via FormatNode struct
1146  * ----------
1147  */
1148 static void
1150 {
1151  if (n->type != NODE_TYPE_ACTION)
1152  return;
1153 
1154  if (IS_EEEE(num) && n->key->id != NUM_E)
1155  ereport(ERROR,
1156  (errcode(ERRCODE_SYNTAX_ERROR),
1157  errmsg("\"EEEE\" must be the last pattern used")));
1158 
1159  switch (n->key->id)
1160  {
1161  case NUM_9:
1162  if (IS_BRACKET(num))
1163  ereport(ERROR,
1164  (errcode(ERRCODE_SYNTAX_ERROR),
1165  errmsg("\"9\" must be ahead of \"PR\"")));
1166  if (IS_MULTI(num))
1167  {
1168  ++num->multi;
1169  break;
1170  }
1171  if (IS_DECIMAL(num))
1172  ++num->post;
1173  else
1174  ++num->pre;
1175  break;
1176 
1177  case NUM_0:
1178  if (IS_BRACKET(num))
1179  ereport(ERROR,
1180  (errcode(ERRCODE_SYNTAX_ERROR),
1181  errmsg("\"0\" must be ahead of \"PR\"")));
1182  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1183  {
1184  num->flag |= NUM_F_ZERO;
1185  num->zero_start = num->pre + 1;
1186  }
1187  if (!IS_DECIMAL(num))
1188  ++num->pre;
1189  else
1190  ++num->post;
1191 
1192  num->zero_end = num->pre + num->post;
1193  break;
1194 
1195  case NUM_B:
1196  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1197  num->flag |= NUM_F_BLANK;
1198  break;
1199 
1200  case NUM_D:
1201  num->flag |= NUM_F_LDECIMAL;
1202  num->need_locale = true;
1203  /* FALLTHROUGH */
1204  case NUM_DEC:
1205  if (IS_DECIMAL(num))
1206  ereport(ERROR,
1207  (errcode(ERRCODE_SYNTAX_ERROR),
1208  errmsg("multiple decimal points")));
1209  if (IS_MULTI(num))
1210  ereport(ERROR,
1211  (errcode(ERRCODE_SYNTAX_ERROR),
1212  errmsg("cannot use \"V\" and decimal point together")));
1213  num->flag |= NUM_F_DECIMAL;
1214  break;
1215 
1216  case NUM_FM:
1217  num->flag |= NUM_F_FILLMODE;
1218  break;
1219 
1220  case NUM_S:
1221  if (IS_LSIGN(num))
1222  ereport(ERROR,
1223  (errcode(ERRCODE_SYNTAX_ERROR),
1224  errmsg("cannot use \"S\" twice")));
1225  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1226  ereport(ERROR,
1227  (errcode(ERRCODE_SYNTAX_ERROR),
1228  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1229  if (!IS_DECIMAL(num))
1230  {
1231  num->lsign = NUM_LSIGN_PRE;
1232  num->pre_lsign_num = num->pre;
1233  num->need_locale = true;
1234  num->flag |= NUM_F_LSIGN;
1235  }
1236  else if (num->lsign == NUM_LSIGN_NONE)
1237  {
1238  num->lsign = NUM_LSIGN_POST;
1239  num->need_locale = true;
1240  num->flag |= NUM_F_LSIGN;
1241  }
1242  break;
1243 
1244  case NUM_MI:
1245  if (IS_LSIGN(num))
1246  ereport(ERROR,
1247  (errcode(ERRCODE_SYNTAX_ERROR),
1248  errmsg("cannot use \"S\" and \"MI\" together")));
1249  num->flag |= NUM_F_MINUS;
1250  if (IS_DECIMAL(num))
1251  num->flag |= NUM_F_MINUS_POST;
1252  break;
1253 
1254  case NUM_PL:
1255  if (IS_LSIGN(num))
1256  ereport(ERROR,
1257  (errcode(ERRCODE_SYNTAX_ERROR),
1258  errmsg("cannot use \"S\" and \"PL\" together")));
1259  num->flag |= NUM_F_PLUS;
1260  if (IS_DECIMAL(num))
1261  num->flag |= NUM_F_PLUS_POST;
1262  break;
1263 
1264  case NUM_SG:
1265  if (IS_LSIGN(num))
1266  ereport(ERROR,
1267  (errcode(ERRCODE_SYNTAX_ERROR),
1268  errmsg("cannot use \"S\" and \"SG\" together")));
1269  num->flag |= NUM_F_MINUS;
1270  num->flag |= NUM_F_PLUS;
1271  break;
1272 
1273  case NUM_PR:
1274  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1275  ereport(ERROR,
1276  (errcode(ERRCODE_SYNTAX_ERROR),
1277  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1278  num->flag |= NUM_F_BRACKET;
1279  break;
1280 
1281  case NUM_rn:
1282  case NUM_RN:
1283  num->flag |= NUM_F_ROMAN;
1284  break;
1285 
1286  case NUM_L:
1287  case NUM_G:
1288  num->need_locale = true;
1289  break;
1290 
1291  case NUM_V:
1292  if (IS_DECIMAL(num))
1293  ereport(ERROR,
1294  (errcode(ERRCODE_SYNTAX_ERROR),
1295  errmsg("cannot use \"V\" and decimal point together")));
1296  num->flag |= NUM_F_MULTI;
1297  break;
1298 
1299  case NUM_E:
1300  if (IS_EEEE(num))
1301  ereport(ERROR,
1302  (errcode(ERRCODE_SYNTAX_ERROR),
1303  errmsg("cannot use \"EEEE\" twice")));
1304  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1305  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1306  IS_ROMAN(num) || IS_MULTI(num))
1307  ereport(ERROR,
1308  (errcode(ERRCODE_SYNTAX_ERROR),
1309  errmsg("\"EEEE\" is incompatible with other formats"),
1310  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1311  num->flag |= NUM_F_EEEE;
1312  break;
1313  }
1314 }
1315 
1316 /* ----------
1317  * Format parser, search small keywords and keyword's suffixes, and make
1318  * format-node tree.
1319  *
1320  * for DATE-TIME & NUMBER version
1321  * ----------
1322  */
1323 static void
1324 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1325  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1326 {
1327  FormatNode *n;
1328 
1329 #ifdef DEBUG_TO_FROM_CHAR
1330  elog(DEBUG_elog_output, "to_char/number(): run parser");
1331 #endif
1332 
1333  n = node;
1334 
1335  while (*str)
1336  {
1337  int suffix = 0;
1338  const KeySuffix *s;
1339 
1340  /*
1341  * Prefix
1342  */
1343  if ((flags & DCH_FLAG) &&
1344  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1345  {
1346  suffix |= s->id;
1347  if (s->len)
1348  str += s->len;
1349  }
1350 
1351  /*
1352  * Keyword
1353  */
1354  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1355  {
1356  n->type = NODE_TYPE_ACTION;
1357  n->suffix = suffix;
1358  if (n->key->len)
1359  str += n->key->len;
1360 
1361  /*
1362  * NUM version: Prepare global NUMDesc struct
1363  */
1364  if (flags & NUM_FLAG)
1365  NUMDesc_prepare(Num, n);
1366 
1367  /*
1368  * Postfix
1369  */
1370  if ((flags & DCH_FLAG) && *str &&
1371  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1372  {
1373  n->suffix |= s->id;
1374  if (s->len)
1375  str += s->len;
1376  }
1377 
1378  n++;
1379  }
1380  else if (*str)
1381  {
1382  int chlen;
1383 
1384  if (flags & STD_FLAG)
1385  {
1386  /*
1387  * Standard mode, allow only following separators: "-./,':; "
1388  */
1389  if (strchr("-./,':; ", *str) == NULL)
1390  ereport(ERROR,
1391  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1392  errmsg("invalid datetime format separator: \"%s\"",
1393  pnstrdup(str, pg_mblen(str)))));
1394 
1395  if (*str == ' ')
1396  n->type = NODE_TYPE_SPACE;
1397  else
1399 
1400  n->character[0] = *str;
1401  n->character[1] = '\0';
1402  n->key = NULL;
1403  n->suffix = 0;
1404  n++;
1405  str++;
1406  }
1407  else if (*str == '"')
1408  {
1409  /*
1410  * Process double-quoted literal string, if any
1411  */
1412  str++;
1413  while (*str)
1414  {
1415  if (*str == '"')
1416  {
1417  str++;
1418  break;
1419  }
1420  /* backslash quotes the next character, if any */
1421  if (*str == '\\' && *(str + 1))
1422  str++;
1423  chlen = pg_mblen(str);
1424  n->type = NODE_TYPE_CHAR;
1425  memcpy(n->character, str, chlen);
1426  n->character[chlen] = '\0';
1427  n->key = NULL;
1428  n->suffix = 0;
1429  n++;
1430  str += chlen;
1431  }
1432  }
1433  else
1434  {
1435  /*
1436  * Outside double-quoted strings, backslash is only special if
1437  * it immediately precedes a double quote.
1438  */
1439  if (*str == '\\' && *(str + 1) == '"')
1440  str++;
1441  chlen = pg_mblen(str);
1442 
1443  if ((flags & DCH_FLAG) && is_separator_char(str))
1445  else if (isspace((unsigned char) *str))
1446  n->type = NODE_TYPE_SPACE;
1447  else
1448  n->type = NODE_TYPE_CHAR;
1449 
1450  memcpy(n->character, str, chlen);
1451  n->character[chlen] = '\0';
1452  n->key = NULL;
1453  n->suffix = 0;
1454  n++;
1455  str += chlen;
1456  }
1457  }
1458  }
1459 
1460  n->type = NODE_TYPE_END;
1461  n->suffix = 0;
1462 }
1463 
1464 /* ----------
1465  * DEBUG: Dump the FormatNode Tree (debug)
1466  * ----------
1467  */
1468 #ifdef DEBUG_TO_FROM_CHAR
1469 
1470 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1471 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1472 
1473 static void
1474 dump_node(FormatNode *node, int max)
1475 {
1476  FormatNode *n;
1477  int a;
1478 
1479  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1480 
1481  for (a = 0, n = node; a <= max; n++, a++)
1482  {
1483  if (n->type == NODE_TYPE_ACTION)
1484  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1485  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1486  else if (n->type == NODE_TYPE_CHAR)
1487  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1488  a, n->character);
1489  else if (n->type == NODE_TYPE_END)
1490  {
1491  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1492  return;
1493  }
1494  else
1495  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1496  }
1497 }
1498 #endif /* DEBUG */
1499 
1500 /*****************************************************************************
1501  * Private utils
1502  *****************************************************************************/
1503 
1504 /* ----------
1505  * Return ST/ND/RD/TH for simple (1..9) numbers
1506  * type --> 0 upper, 1 lower
1507  * ----------
1508  */
1509 static const char *
1510 get_th(char *num, int type)
1511 {
1512  int len = strlen(num),
1513  last,
1514  seclast;
1515 
1516  last = *(num + (len - 1));
1517  if (!isdigit((unsigned char) last))
1518  ereport(ERROR,
1519  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1520  errmsg("\"%s\" is not a number", num)));
1521 
1522  /*
1523  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1524  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1525  */
1526  if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1527  last = 0;
1528 
1529  switch (last)
1530  {
1531  case '1':
1532  if (type == TH_UPPER)
1533  return numTH[0];
1534  return numth[0];
1535  case '2':
1536  if (type == TH_UPPER)
1537  return numTH[1];
1538  return numth[1];
1539  case '3':
1540  if (type == TH_UPPER)
1541  return numTH[2];
1542  return numth[2];
1543  default:
1544  if (type == TH_UPPER)
1545  return numTH[3];
1546  return numth[3];
1547  }
1548 }
1549 
1550 /* ----------
1551  * Convert string-number to ordinal string-number
1552  * type --> 0 upper, 1 lower
1553  * ----------
1554  */
1555 static char *
1556 str_numth(char *dest, char *num, int type)
1557 {
1558  if (dest != num)
1559  strcpy(dest, num);
1560  strcat(dest, get_th(num, type));
1561  return dest;
1562 }
1563 
1564 /*****************************************************************************
1565  * upper/lower/initcap functions
1566  *****************************************************************************/
1567 
1568 #ifdef USE_ICU
1569 
1570 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1571  const UChar *src, int32_t srcLength,
1572  const char *locale,
1573  UErrorCode *pErrorCode);
1574 
1575 static int32_t
1576 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1577  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1578 {
1579  UErrorCode status;
1580  int32_t len_dest;
1581 
1582  len_dest = len_source; /* try first with same length */
1583  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1584  status = U_ZERO_ERROR;
1585  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1586  mylocale->info.icu.locale, &status);
1587  if (status == U_BUFFER_OVERFLOW_ERROR)
1588  {
1589  /* try again with adjusted length */
1590  pfree(*buff_dest);
1591  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1592  status = U_ZERO_ERROR;
1593  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1594  mylocale->info.icu.locale, &status);
1595  }
1596  if (U_FAILURE(status))
1597  ereport(ERROR,
1598  (errmsg("case conversion failed: %s", u_errorName(status))));
1599  return len_dest;
1600 }
1601 
1602 static int32_t
1603 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1604  const UChar *src, int32_t srcLength,
1605  const char *locale,
1606  UErrorCode *pErrorCode)
1607 {
1608  return u_strToTitle(dest, destCapacity, src, srcLength,
1609  NULL, locale, pErrorCode);
1610 }
1611 
1612 #endif /* USE_ICU */
1613 
1614 /*
1615  * If the system provides the needed functions for wide-character manipulation
1616  * (which are all standardized by C99), then we implement upper/lower/initcap
1617  * using wide-character functions, if necessary. Otherwise we use the
1618  * traditional <ctype.h> functions, which of course will not work as desired
1619  * in multibyte character sets. Note that in either case we are effectively
1620  * assuming that the database character encoding matches the encoding implied
1621  * by LC_CTYPE.
1622  *
1623  * If the system provides locale_t and associated functions (which are
1624  * standardized by Open Group's XBD), we can support collations that are
1625  * neither default nor C. The code is written to handle both combinations
1626  * of have-wide-characters and have-locale_t, though it's rather unlikely
1627  * a platform would have the latter without the former.
1628  */
1629 
1630 /*
1631  * collation-aware, wide-character-aware lower function
1632  *
1633  * We pass the number of bytes so we can pass varlena and char*
1634  * to this function. The result is a palloc'd, null-terminated string.
1635  */
1636 char *
1637 str_tolower(const char *buff, size_t nbytes, Oid collid)
1638 {
1639  char *result;
1640 
1641  if (!buff)
1642  return NULL;
1643 
1644  /* C/POSIX collations use this path regardless of database encoding */
1645  if (lc_ctype_is_c(collid))
1646  {
1647  result = asc_tolower(buff, nbytes);
1648  }
1649  else
1650  {
1651  pg_locale_t mylocale = 0;
1652 
1653  if (collid != DEFAULT_COLLATION_OID)
1654  {
1655  if (!OidIsValid(collid))
1656  {
1657  /*
1658  * This typically means that the parser could not resolve a
1659  * conflict of implicit collations, so report it that way.
1660  */
1661  ereport(ERROR,
1662  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1663  errmsg("could not determine which collation to use for %s function",
1664  "lower()"),
1665  errhint("Use the COLLATE clause to set the collation explicitly.")));
1666  }
1667  mylocale = pg_newlocale_from_collation(collid);
1668  }
1669 
1670 #ifdef USE_ICU
1671  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1672  {
1673  int32_t len_uchar;
1674  int32_t len_conv;
1675  UChar *buff_uchar;
1676  UChar *buff_conv;
1677 
1678  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1679  len_conv = icu_convert_case(u_strToLower, mylocale,
1680  &buff_conv, buff_uchar, len_uchar);
1681  icu_from_uchar(&result, buff_conv, len_conv);
1682  pfree(buff_uchar);
1683  pfree(buff_conv);
1684  }
1685  else
1686 #endif
1687  {
1689  {
1690  wchar_t *workspace;
1691  size_t curr_char;
1692  size_t result_size;
1693 
1694  /* Overflow paranoia */
1695  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1696  ereport(ERROR,
1697  (errcode(ERRCODE_OUT_OF_MEMORY),
1698  errmsg("out of memory")));
1699 
1700  /* Output workspace cannot have more codes than input bytes */
1701  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1702 
1703  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1704 
1705  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1706  {
1707 #ifdef HAVE_LOCALE_T
1708  if (mylocale)
1709  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1710  else
1711 #endif
1712  workspace[curr_char] = towlower(workspace[curr_char]);
1713  }
1714 
1715  /*
1716  * Make result large enough; case change might change number
1717  * of bytes
1718  */
1719  result_size = curr_char * pg_database_encoding_max_length() + 1;
1720  result = palloc(result_size);
1721 
1722  wchar2char(result, workspace, result_size, mylocale);
1723  pfree(workspace);
1724  }
1725  else
1726  {
1727  char *p;
1728 
1729  result = pnstrdup(buff, nbytes);
1730 
1731  /*
1732  * Note: we assume that tolower_l() will not be so broken as
1733  * to need an isupper_l() guard test. When using the default
1734  * collation, we apply the traditional Postgres behavior that
1735  * forces ASCII-style treatment of I/i, but in non-default
1736  * collations you get exactly what the collation says.
1737  */
1738  for (p = result; *p; p++)
1739  {
1740 #ifdef HAVE_LOCALE_T
1741  if (mylocale)
1742  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1743  else
1744 #endif
1745  *p = pg_tolower((unsigned char) *p);
1746  }
1747  }
1748  }
1749  }
1750 
1751  return result;
1752 }
1753 
1754 /*
1755  * collation-aware, wide-character-aware upper function
1756  *
1757  * We pass the number of bytes so we can pass varlena and char*
1758  * to this function. The result is a palloc'd, null-terminated string.
1759  */
1760 char *
1761 str_toupper(const char *buff, size_t nbytes, Oid collid)
1762 {
1763  char *result;
1764 
1765  if (!buff)
1766  return NULL;
1767 
1768  /* C/POSIX collations use this path regardless of database encoding */
1769  if (lc_ctype_is_c(collid))
1770  {
1771  result = asc_toupper(buff, nbytes);
1772  }
1773  else
1774  {
1775  pg_locale_t mylocale = 0;
1776 
1777  if (collid != DEFAULT_COLLATION_OID)
1778  {
1779  if (!OidIsValid(collid))
1780  {
1781  /*
1782  * This typically means that the parser could not resolve a
1783  * conflict of implicit collations, so report it that way.
1784  */
1785  ereport(ERROR,
1786  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1787  errmsg("could not determine which collation to use for %s function",
1788  "upper()"),
1789  errhint("Use the COLLATE clause to set the collation explicitly.")));
1790  }
1791  mylocale = pg_newlocale_from_collation(collid);
1792  }
1793 
1794 #ifdef USE_ICU
1795  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1796  {
1797  int32_t len_uchar,
1798  len_conv;
1799  UChar *buff_uchar;
1800  UChar *buff_conv;
1801 
1802  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1803  len_conv = icu_convert_case(u_strToUpper, mylocale,
1804  &buff_conv, buff_uchar, len_uchar);
1805  icu_from_uchar(&result, buff_conv, len_conv);
1806  pfree(buff_uchar);
1807  pfree(buff_conv);
1808  }
1809  else
1810 #endif
1811  {
1813  {
1814  wchar_t *workspace;
1815  size_t curr_char;
1816  size_t result_size;
1817 
1818  /* Overflow paranoia */
1819  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1820  ereport(ERROR,
1821  (errcode(ERRCODE_OUT_OF_MEMORY),
1822  errmsg("out of memory")));
1823 
1824  /* Output workspace cannot have more codes than input bytes */
1825  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1826 
1827  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1828 
1829  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1830  {
1831 #ifdef HAVE_LOCALE_T
1832  if (mylocale)
1833  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1834  else
1835 #endif
1836  workspace[curr_char] = towupper(workspace[curr_char]);
1837  }
1838 
1839  /*
1840  * Make result large enough; case change might change number
1841  * of bytes
1842  */
1843  result_size = curr_char * pg_database_encoding_max_length() + 1;
1844  result = palloc(result_size);
1845 
1846  wchar2char(result, workspace, result_size, mylocale);
1847  pfree(workspace);
1848  }
1849  else
1850  {
1851  char *p;
1852 
1853  result = pnstrdup(buff, nbytes);
1854 
1855  /*
1856  * Note: we assume that toupper_l() will not be so broken as
1857  * to need an islower_l() guard test. When using the default
1858  * collation, we apply the traditional Postgres behavior that
1859  * forces ASCII-style treatment of I/i, but in non-default
1860  * collations you get exactly what the collation says.
1861  */
1862  for (p = result; *p; p++)
1863  {
1864 #ifdef HAVE_LOCALE_T
1865  if (mylocale)
1866  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1867  else
1868 #endif
1869  *p = pg_toupper((unsigned char) *p);
1870  }
1871  }
1872  }
1873  }
1874 
1875  return result;
1876 }
1877 
1878 /*
1879  * collation-aware, wide-character-aware initcap function
1880  *
1881  * We pass the number of bytes so we can pass varlena and char*
1882  * to this function. The result is a palloc'd, null-terminated string.
1883  */
1884 char *
1885 str_initcap(const char *buff, size_t nbytes, Oid collid)
1886 {
1887  char *result;
1888  int wasalnum = false;
1889 
1890  if (!buff)
1891  return NULL;
1892 
1893  /* C/POSIX collations use this path regardless of database encoding */
1894  if (lc_ctype_is_c(collid))
1895  {
1896  result = asc_initcap(buff, nbytes);
1897  }
1898  else
1899  {
1900  pg_locale_t mylocale = 0;
1901 
1902  if (collid != DEFAULT_COLLATION_OID)
1903  {
1904  if (!OidIsValid(collid))
1905  {
1906  /*
1907  * This typically means that the parser could not resolve a
1908  * conflict of implicit collations, so report it that way.
1909  */
1910  ereport(ERROR,
1911  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1912  errmsg("could not determine which collation to use for %s function",
1913  "initcap()"),
1914  errhint("Use the COLLATE clause to set the collation explicitly.")));
1915  }
1916  mylocale = pg_newlocale_from_collation(collid);
1917  }
1918 
1919 #ifdef USE_ICU
1920  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1921  {
1922  int32_t len_uchar,
1923  len_conv;
1924  UChar *buff_uchar;
1925  UChar *buff_conv;
1926 
1927  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1928  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1929  &buff_conv, buff_uchar, len_uchar);
1930  icu_from_uchar(&result, buff_conv, len_conv);
1931  pfree(buff_uchar);
1932  pfree(buff_conv);
1933  }
1934  else
1935 #endif
1936  {
1938  {
1939  wchar_t *workspace;
1940  size_t curr_char;
1941  size_t result_size;
1942 
1943  /* Overflow paranoia */
1944  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1945  ereport(ERROR,
1946  (errcode(ERRCODE_OUT_OF_MEMORY),
1947  errmsg("out of memory")));
1948 
1949  /* Output workspace cannot have more codes than input bytes */
1950  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1951 
1952  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1953 
1954  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1955  {
1956 #ifdef HAVE_LOCALE_T
1957  if (mylocale)
1958  {
1959  if (wasalnum)
1960  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1961  else
1962  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1963  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1964  }
1965  else
1966 #endif
1967  {
1968  if (wasalnum)
1969  workspace[curr_char] = towlower(workspace[curr_char]);
1970  else
1971  workspace[curr_char] = towupper(workspace[curr_char]);
1972  wasalnum = iswalnum(workspace[curr_char]);
1973  }
1974  }
1975 
1976  /*
1977  * Make result large enough; case change might change number
1978  * of bytes
1979  */
1980  result_size = curr_char * pg_database_encoding_max_length() + 1;
1981  result = palloc(result_size);
1982 
1983  wchar2char(result, workspace, result_size, mylocale);
1984  pfree(workspace);
1985  }
1986  else
1987  {
1988  char *p;
1989 
1990  result = pnstrdup(buff, nbytes);
1991 
1992  /*
1993  * Note: we assume that toupper_l()/tolower_l() will not be so
1994  * broken as to need guard tests. When using the default
1995  * collation, we apply the traditional Postgres behavior that
1996  * forces ASCII-style treatment of I/i, but in non-default
1997  * collations you get exactly what the collation says.
1998  */
1999  for (p = result; *p; p++)
2000  {
2001 #ifdef HAVE_LOCALE_T
2002  if (mylocale)
2003  {
2004  if (wasalnum)
2005  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2006  else
2007  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2008  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2009  }
2010  else
2011 #endif
2012  {
2013  if (wasalnum)
2014  *p = pg_tolower((unsigned char) *p);
2015  else
2016  *p = pg_toupper((unsigned char) *p);
2017  wasalnum = isalnum((unsigned char) *p);
2018  }
2019  }
2020  }
2021  }
2022  }
2023 
2024  return result;
2025 }
2026 
2027 /*
2028  * ASCII-only lower function
2029  *
2030  * We pass the number of bytes so we can pass varlena and char*
2031  * to this function. The result is a palloc'd, null-terminated string.
2032  */
2033 char *
2034 asc_tolower(const char *buff, size_t nbytes)
2035 {
2036  char *result;
2037  char *p;
2038 
2039  if (!buff)
2040  return NULL;
2041 
2042  result = pnstrdup(buff, nbytes);
2043 
2044  for (p = result; *p; p++)
2045  *p = pg_ascii_tolower((unsigned char) *p);
2046 
2047  return result;
2048 }
2049 
2050 /*
2051  * ASCII-only upper function
2052  *
2053  * We pass the number of bytes so we can pass varlena and char*
2054  * to this function. The result is a palloc'd, null-terminated string.
2055  */
2056 char *
2057 asc_toupper(const char *buff, size_t nbytes)
2058 {
2059  char *result;
2060  char *p;
2061 
2062  if (!buff)
2063  return NULL;
2064 
2065  result = pnstrdup(buff, nbytes);
2066 
2067  for (p = result; *p; p++)
2068  *p = pg_ascii_toupper((unsigned char) *p);
2069 
2070  return result;
2071 }
2072 
2073 /*
2074  * ASCII-only initcap function
2075  *
2076  * We pass the number of bytes so we can pass varlena and char*
2077  * to this function. The result is a palloc'd, null-terminated string.
2078  */
2079 char *
2080 asc_initcap(const char *buff, size_t nbytes)
2081 {
2082  char *result;
2083  char *p;
2084  int wasalnum = false;
2085 
2086  if (!buff)
2087  return NULL;
2088 
2089  result = pnstrdup(buff, nbytes);
2090 
2091  for (p = result; *p; p++)
2092  {
2093  char c;
2094 
2095  if (wasalnum)
2096  *p = c = pg_ascii_tolower((unsigned char) *p);
2097  else
2098  *p = c = pg_ascii_toupper((unsigned char) *p);
2099  /* we don't trust isalnum() here */
2100  wasalnum = ((c >= 'A' && c <= 'Z') ||
2101  (c >= 'a' && c <= 'z') ||
2102  (c >= '0' && c <= '9'));
2103  }
2104 
2105  return result;
2106 }
2107 
2108 /* convenience routines for when the input is null-terminated */
2109 
2110 static char *
2111 str_tolower_z(const char *buff, Oid collid)
2112 {
2113  return str_tolower(buff, strlen(buff), collid);
2114 }
2115 
2116 static char *
2117 str_toupper_z(const char *buff, Oid collid)
2118 {
2119  return str_toupper(buff, strlen(buff), collid);
2120 }
2121 
2122 static char *
2123 str_initcap_z(const char *buff, Oid collid)
2124 {
2125  return str_initcap(buff, strlen(buff), collid);
2126 }
2127 
2128 static char *
2129 asc_tolower_z(const char *buff)
2130 {
2131  return asc_tolower(buff, strlen(buff));
2132 }
2133 
2134 static char *
2135 asc_toupper_z(const char *buff)
2136 {
2137  return asc_toupper(buff, strlen(buff));
2138 }
2139 
2140 /* asc_initcap_z is not currently needed */
2141 
2142 
2143 /* ----------
2144  * Skip TM / th in FROM_CHAR
2145  *
2146  * If S_THth is on, skip two chars, assuming there are two available
2147  * ----------
2148  */
2149 #define SKIP_THth(ptr, _suf) \
2150  do { \
2151  if (S_THth(_suf)) \
2152  { \
2153  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2154  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2155  } \
2156  } while (0)
2157 
2158 
2159 #ifdef DEBUG_TO_FROM_CHAR
2160 /* -----------
2161  * DEBUG: Call for debug and for index checking; (Show ASCII char
2162  * and defined keyword for each used position
2163  * ----------
2164  */
2165 static void
2166 dump_index(const KeyWord *k, const int *index)
2167 {
2168  int i,
2169  count = 0,
2170  free_i = 0;
2171 
2172  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2173 
2174  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2175  {
2176  if (index[i] != -1)
2177  {
2178  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2179  count++;
2180  }
2181  else
2182  {
2183  free_i++;
2184  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2185  }
2186  }
2187  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2188  count, free_i);
2189 }
2190 #endif /* DEBUG */
2191 
2192 /* ----------
2193  * Return true if next format picture is not digit value
2194  * ----------
2195  */
2196 static bool
2198 {
2199  if (n->type == NODE_TYPE_END)
2200  return false;
2201 
2202  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2203  return true;
2204 
2205  /*
2206  * Next node
2207  */
2208  n++;
2209 
2210  /* end of format string is treated like a non-digit separator */
2211  if (n->type == NODE_TYPE_END)
2212  return true;
2213 
2214  if (n->type == NODE_TYPE_ACTION)
2215  {
2216  if (n->key->is_digit)
2217  return false;
2218 
2219  return true;
2220  }
2221  else if (n->character[1] == '\0' &&
2222  isdigit((unsigned char) n->character[0]))
2223  return false;
2224 
2225  return true; /* some non-digit input (separator) */
2226 }
2227 
2228 
2229 static int
2231 {
2232  /*
2233  * Adjust all dates toward 2020; this is effectively what happens when we
2234  * assume '70' is 1970 and '69' is 2069.
2235  */
2236  /* Force 0-69 into the 2000's */
2237  if (year < 70)
2238  return year + 2000;
2239  /* Force 70-99 into the 1900's */
2240  else if (year < 100)
2241  return year + 1900;
2242  /* Force 100-519 into the 2000's */
2243  else if (year < 520)
2244  return year + 2000;
2245  /* Force 520-999 into the 1000's */
2246  else if (year < 1000)
2247  return year + 1000;
2248  else
2249  return year;
2250 }
2251 
2252 
2253 static int
2254 strspace_len(const char *str)
2255 {
2256  int len = 0;
2257 
2258  while (*str && isspace((unsigned char) *str))
2259  {
2260  str++;
2261  len++;
2262  }
2263  return len;
2264 }
2265 
2266 /*
2267  * Set the date mode of a from-char conversion.
2268  *
2269  * Puke if the date mode has already been set, and the caller attempts to set
2270  * it to a conflicting mode.
2271  *
2272  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2273  */
2274 static void
2275 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
2276 {
2277  if (mode != FROM_CHAR_DATE_NONE)
2278  {
2279  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2280  tmfc->mode = mode;
2281  else if (tmfc->mode != mode)
2283  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2284  errmsg("invalid combination of date conventions"),
2285  errhint("Do not mix Gregorian and ISO week date "
2286  "conventions in a formatting template."))));
2287  }
2288 
2289 on_error:
2290  return;
2291 }
2292 
2293 /*
2294  * Set the integer pointed to by 'dest' to the given value.
2295  *
2296  * Puke if the destination integer has previously been set to some other
2297  * non-zero value.
2298  *
2299  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2300  */
2301 static void
2302 from_char_set_int(int *dest, const int value, const FormatNode *node,
2303  bool *have_error)
2304 {
2305  if (*dest != 0 && *dest != value)
2307  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2308  errmsg("conflicting values for \"%s\" field in "
2309  "formatting string",
2310  node->key->name),
2311  errdetail("This value contradicts a previous setting "
2312  "for the same field type."))));
2313  *dest = value;
2314 
2315 on_error:
2316  return;
2317 }
2318 
2319 /*
2320  * Read a single integer from the source string, into the int pointed to by
2321  * 'dest'. If 'dest' is NULL, the result is discarded.
2322  *
2323  * In fixed-width mode (the node does not have the FM suffix), consume at most
2324  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2325  *
2326  * We use strtol() to recover the integer value from the source string, in
2327  * accordance with the given FormatNode.
2328  *
2329  * If the conversion completes successfully, src will have been advanced to
2330  * point at the character immediately following the last character used in the
2331  * conversion.
2332  *
2333  * Return the number of characters consumed.
2334  *
2335  * Note that from_char_parse_int() provides a more convenient wrapper where
2336  * the length of the field is the same as the length of the format keyword (as
2337  * with DD and MI).
2338  *
2339  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
2340  * and -1 is returned.
2341  */
2342 static int
2343 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2344  bool *have_error)
2345 {
2346  long result;
2347  char copy[DCH_MAX_ITEM_SIZ + 1];
2348  const char *init = *src;
2349  int used;
2350 
2351  /*
2352  * Skip any whitespace before parsing the integer.
2353  */
2354  *src += strspace_len(*src);
2355 
2356  Assert(len <= DCH_MAX_ITEM_SIZ);
2357  used = (int) strlcpy(copy, *src, len + 1);
2358 
2359  if (S_FM(node->suffix) || is_next_separator(node))
2360  {
2361  /*
2362  * This node is in Fill Mode, or the next node is known to be a
2363  * non-digit value, so we just slurp as many characters as we can get.
2364  */
2365  char *endptr;
2366 
2367  errno = 0;
2368  result = strtol(init, &endptr, 10);
2369  *src = endptr;
2370  }
2371  else
2372  {
2373  /*
2374  * We need to pull exactly the number of characters given in 'len' out
2375  * of the string, and convert those.
2376  */
2377  char *last;
2378 
2379  if (used < len)
2381  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2382  errmsg("source string too short for \"%s\" "
2383  "formatting field",
2384  node->key->name),
2385  errdetail("Field requires %d characters, "
2386  "but only %d remain.",
2387  len, used),
2388  errhint("If your source string is not fixed-width, "
2389  "try using the \"FM\" modifier."))));
2390 
2391  errno = 0;
2392  result = strtol(copy, &last, 10);
2393  used = last - copy;
2394 
2395  if (used > 0 && used < len)
2397  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2398  errmsg("invalid value \"%s\" for \"%s\"",
2399  copy, node->key->name),
2400  errdetail("Field requires %d characters, "
2401  "but only %d could be parsed.",
2402  len, used),
2403  errhint("If your source string is not fixed-width, "
2404  "try using the \"FM\" modifier."))));
2405 
2406  *src += used;
2407  }
2408 
2409  if (*src == init)
2411  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2412  errmsg("invalid value \"%s\" for \"%s\"",
2413  copy, node->key->name),
2414  errdetail("Value must be an integer."))));
2415 
2416  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2418  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2419  errmsg("value for \"%s\" in source string is out of range",
2420  node->key->name),
2421  errdetail("Value must be in the range %d to %d.",
2422  INT_MIN, INT_MAX))));
2423 
2424  if (dest != NULL)
2425  {
2426  from_char_set_int(dest, (int) result, node, have_error);
2427  CHECK_ERROR;
2428  }
2429 
2430  return *src - init;
2431 
2432 on_error:
2433  return -1;
2434 }
2435 
2436 /*
2437  * Call from_char_parse_int_len(), using the length of the format keyword as
2438  * the expected length of the field.
2439  *
2440  * Don't call this function if the field differs in length from the format
2441  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2442  * In such cases, call from_char_parse_int_len() instead to specify the
2443  * required length explicitly.
2444  */
2445 static int
2446 from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
2447 {
2448  return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
2449 }
2450 
2451 /*
2452  * Sequentially search null-terminated "array" for a case-insensitive match
2453  * to the initial character(s) of "name".
2454  *
2455  * Returns array index of match, or -1 for no match.
2456  *
2457  * *len is set to the length of the match, or 0 for no match.
2458  *
2459  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2460  * suitable for comparisons to ASCII strings.
2461  */
2462 static int
2463 seq_search_ascii(const char *name, const char *const *array, int *len)
2464 {
2465  unsigned char firstc;
2466  const char *const *a;
2467 
2468  *len = 0;
2469 
2470  /* empty string can't match anything */
2471  if (!*name)
2472  return -1;
2473 
2474  /* we handle first char specially to gain some speed */
2475  firstc = pg_ascii_tolower((unsigned char) *name);
2476 
2477  for (a = array; *a != NULL; a++)
2478  {
2479  const char *p;
2480  const char *n;
2481 
2482  /* compare first chars */
2483  if (pg_ascii_tolower((unsigned char) **a) != firstc)
2484  continue;
2485 
2486  /* compare rest of string */
2487  for (p = *a + 1, n = name + 1;; p++, n++)
2488  {
2489  /* return success if we matched whole array entry */
2490  if (*p == '\0')
2491  {
2492  *len = n - name;
2493  return a - array;
2494  }
2495  /* else, must have another character in "name" ... */
2496  if (*n == '\0')
2497  break;
2498  /* ... and it must match */
2499  if (pg_ascii_tolower((unsigned char) *p) !=
2500  pg_ascii_tolower((unsigned char) *n))
2501  break;
2502  }
2503  }
2504 
2505  return -1;
2506 }
2507 
2508 /*
2509  * Sequentially search an array of possibly non-English words for
2510  * a case-insensitive match to the initial character(s) of "name".
2511  *
2512  * This has the same API as seq_search_ascii(), but we use a more general
2513  * case-folding transformation to achieve case-insensitivity. Case folding
2514  * is done per the rules of the collation identified by "collid".
2515  *
2516  * The array is treated as const, but we don't declare it that way because
2517  * the arrays exported by pg_locale.c aren't const.
2518  */
2519 static int
2520 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2521 {
2522  char **a;
2523  char *upper_name;
2524  char *lower_name;
2525 
2526  *len = 0;
2527 
2528  /* empty string can't match anything */
2529  if (!*name)
2530  return -1;
2531 
2532  /*
2533  * The case-folding processing done below is fairly expensive, so before
2534  * doing that, make a quick pass to see if there is an exact match.
2535  */
2536  for (a = array; *a != NULL; a++)
2537  {
2538  int element_len = strlen(*a);
2539 
2540  if (strncmp(name, *a, element_len) == 0)
2541  {
2542  *len = element_len;
2543  return a - array;
2544  }
2545  }
2546 
2547  /*
2548  * Fold to upper case, then to lower case, so that we can match reliably
2549  * even in languages in which case conversions are not injective.
2550  */
2551  upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2552  lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2553  pfree(upper_name);
2554 
2555  for (a = array; *a != NULL; a++)
2556  {
2557  char *upper_element;
2558  char *lower_element;
2559  int element_len;
2560 
2561  /* Likewise upper/lower-case array element */
2562  upper_element = str_toupper(*a, strlen(*a), collid);
2563  lower_element = str_tolower(upper_element, strlen(upper_element),
2564  collid);
2565  pfree(upper_element);
2566  element_len = strlen(lower_element);
2567 
2568  /* Match? */
2569  if (strncmp(lower_name, lower_element, element_len) == 0)
2570  {
2571  *len = element_len;
2572  pfree(lower_element);
2573  pfree(lower_name);
2574  return a - array;
2575  }
2576  pfree(lower_element);
2577  }
2578 
2579  pfree(lower_name);
2580  return -1;
2581 }
2582 
2583 /*
2584  * Perform a sequential search in 'array' (or 'localized_array', if that's
2585  * not NULL) for an entry matching the first character(s) of the 'src'
2586  * string case-insensitively.
2587  *
2588  * The 'array' is presumed to be English words (all-ASCII), but
2589  * if 'localized_array' is supplied, that might be non-English
2590  * so we need a more expensive case-folding transformation
2591  * (which will follow the rules of the collation 'collid').
2592  *
2593  * If a match is found, copy the array index of the match into the integer
2594  * pointed to by 'dest', advance 'src' to the end of the part of the string
2595  * which matched, and return the number of characters consumed.
2596  *
2597  * If the string doesn't match, throw an error if 'have_error' is NULL,
2598  * otherwise set '*have_error' and return -1.
2599  *
2600  * 'node' is used only for error reports: node->key->name identifies the
2601  * field type we were searching for.
2602  */
2603 static int
2604 from_char_seq_search(int *dest, const char **src, const char *const *array,
2605  char **localized_array, Oid collid,
2606  FormatNode *node, bool *have_error)
2607 {
2608  int len;
2609 
2610  if (localized_array == NULL)
2611  *dest = seq_search_ascii(*src, array, &len);
2612  else
2613  *dest = seq_search_localized(*src, localized_array, &len, collid);
2614 
2615  if (len <= 0)
2616  {
2617  /*
2618  * In the error report, truncate the string at the next whitespace (if
2619  * any) to avoid including irrelevant data.
2620  */
2621  char *copy = pstrdup(*src);
2622  char *c;
2623 
2624  for (c = copy; *c; c++)
2625  {
2626  if (scanner_isspace(*c))
2627  {
2628  *c = '\0';
2629  break;
2630  }
2631  }
2632 
2634  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2635  errmsg("invalid value \"%s\" for \"%s\"",
2636  copy, node->key->name),
2637  errdetail("The given value did not match any of "
2638  "the allowed values for this field."))));
2639  }
2640  *src += len;
2641  return len;
2642 
2643 on_error:
2644  return -1;
2645 }
2646 
2647 /* ----------
2648  * Process a TmToChar struct as denoted by a list of FormatNodes.
2649  * The formatted data is written to the string pointed to by 'out'.
2650  * ----------
2651  */
2652 static void
2653 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2654 {
2655  FormatNode *n;
2656  char *s;
2657  struct pg_tm *tm = &in->tm;
2658  int i;
2659 
2660  /* cache localized days and months */
2662 
2663  s = out;
2664  for (n = node; n->type != NODE_TYPE_END; n++)
2665  {
2666  if (n->type != NODE_TYPE_ACTION)
2667  {
2668  strcpy(s, n->character);
2669  s += strlen(s);
2670  continue;
2671  }
2672 
2673  switch (n->key->id)
2674  {
2675  case DCH_A_M:
2676  case DCH_P_M:
2677  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2678  ? P_M_STR : A_M_STR);
2679  s += strlen(s);
2680  break;
2681  case DCH_AM:
2682  case DCH_PM:
2683  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2684  ? PM_STR : AM_STR);
2685  s += strlen(s);
2686  break;
2687  case DCH_a_m:
2688  case DCH_p_m:
2689  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2690  ? p_m_STR : a_m_STR);
2691  s += strlen(s);
2692  break;
2693  case DCH_am:
2694  case DCH_pm:
2695  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2696  ? pm_STR : am_STR);
2697  s += strlen(s);
2698  break;
2699  case DCH_HH:
2700  case DCH_HH12:
2701 
2702  /*
2703  * display time as shown on a 12-hour clock, even for
2704  * intervals
2705  */
2706  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2707  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2708  tm->tm_hour % (HOURS_PER_DAY / 2));
2709  if (S_THth(n->suffix))
2710  str_numth(s, s, S_TH_TYPE(n->suffix));
2711  s += strlen(s);
2712  break;
2713  case DCH_HH24:
2714  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2715  tm->tm_hour);
2716  if (S_THth(n->suffix))
2717  str_numth(s, s, S_TH_TYPE(n->suffix));
2718  s += strlen(s);
2719  break;
2720  case DCH_MI:
2721  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2722  tm->tm_min);
2723  if (S_THth(n->suffix))
2724  str_numth(s, s, S_TH_TYPE(n->suffix));
2725  s += strlen(s);
2726  break;
2727  case DCH_SS:
2728  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2729  tm->tm_sec);
2730  if (S_THth(n->suffix))
2731  str_numth(s, s, S_TH_TYPE(n->suffix));
2732  s += strlen(s);
2733  break;
2734 
2735 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2736  sprintf(s, frac_fmt, (int) (frac_val)); \
2737  if (S_THth(n->suffix)) \
2738  str_numth(s, s, S_TH_TYPE(n->suffix)); \
2739  s += strlen(s)
2740 
2741  case DCH_FF1: /* tenth of second */
2742  DCH_to_char_fsec("%01d", in->fsec / 100000);
2743  break;
2744  case DCH_FF2: /* hundredth of second */
2745  DCH_to_char_fsec("%02d", in->fsec / 10000);
2746  break;
2747  case DCH_FF3:
2748  case DCH_MS: /* millisecond */
2749  DCH_to_char_fsec("%03d", in->fsec / 1000);
2750  break;
2751  case DCH_FF4: /* tenth of a millisecond */
2752  DCH_to_char_fsec("%04d", in->fsec / 100);
2753  break;
2754  case DCH_FF5: /* hundredth of a millisecond */
2755  DCH_to_char_fsec("%05d", in->fsec / 10);
2756  break;
2757  case DCH_FF6:
2758  case DCH_US: /* microsecond */
2759  DCH_to_char_fsec("%06d", in->fsec);
2760  break;
2761 #undef DCH_to_char_fsec
2762  case DCH_SSSS:
2763  sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2764  tm->tm_min * SECS_PER_MINUTE +
2765  tm->tm_sec);
2766  if (S_THth(n->suffix))
2767  str_numth(s, s, S_TH_TYPE(n->suffix));
2768  s += strlen(s);
2769  break;
2770  case DCH_tz:
2772  if (tmtcTzn(in))
2773  {
2774  /* We assume here that timezone names aren't localized */
2775  char *p = asc_tolower_z(tmtcTzn(in));
2776 
2777  strcpy(s, p);
2778  pfree(p);
2779  s += strlen(s);
2780  }
2781  break;
2782  case DCH_TZ:
2784  if (tmtcTzn(in))
2785  {
2786  strcpy(s, tmtcTzn(in));
2787  s += strlen(s);
2788  }
2789  break;
2790  case DCH_TZH:
2792  sprintf(s, "%c%02d",
2793  (tm->tm_gmtoff >= 0) ? '+' : '-',
2794  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2795  s += strlen(s);
2796  break;
2797  case DCH_TZM:
2799  sprintf(s, "%02d",
2800  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2801  s += strlen(s);
2802  break;
2803  case DCH_OF:
2805  sprintf(s, "%c%0*d",
2806  (tm->tm_gmtoff >= 0) ? '+' : '-',
2807  S_FM(n->suffix) ? 0 : 2,
2808  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2809  s += strlen(s);
2810  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2811  {
2812  sprintf(s, ":%02d",
2813  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2814  s += strlen(s);
2815  }
2816  break;
2817  case DCH_A_D:
2818  case DCH_B_C:
2820  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2821  s += strlen(s);
2822  break;
2823  case DCH_AD:
2824  case DCH_BC:
2826  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2827  s += strlen(s);
2828  break;
2829  case DCH_a_d:
2830  case DCH_b_c:
2832  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2833  s += strlen(s);
2834  break;
2835  case DCH_ad:
2836  case DCH_bc:
2838  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2839  s += strlen(s);
2840  break;
2841  case DCH_MONTH:
2843  if (!tm->tm_mon)
2844  break;
2845  if (S_TM(n->suffix))
2846  {
2847  char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2848 
2849  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2850  strcpy(s, str);
2851  else
2852  ereport(ERROR,
2853  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2854  errmsg("localized string format value too long")));
2855  }
2856  else
2857  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2858  asc_toupper_z(months_full[tm->tm_mon - 1]));
2859  s += strlen(s);
2860  break;
2861  case DCH_Month:
2863  if (!tm->tm_mon)
2864  break;
2865  if (S_TM(n->suffix))
2866  {
2867  char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2868 
2869  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2870  strcpy(s, str);
2871  else
2872  ereport(ERROR,
2873  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2874  errmsg("localized string format value too long")));
2875  }
2876  else
2877  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2878  months_full[tm->tm_mon - 1]);
2879  s += strlen(s);
2880  break;
2881  case DCH_month:
2883  if (!tm->tm_mon)
2884  break;
2885  if (S_TM(n->suffix))
2886  {
2887  char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2888 
2889  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2890  strcpy(s, str);
2891  else
2892  ereport(ERROR,
2893  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2894  errmsg("localized string format value too long")));
2895  }
2896  else
2897  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2898  asc_tolower_z(months_full[tm->tm_mon - 1]));
2899  s += strlen(s);
2900  break;
2901  case DCH_MON:
2903  if (!tm->tm_mon)
2904  break;
2905  if (S_TM(n->suffix))
2906  {
2907  char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2908 
2909  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2910  strcpy(s, str);
2911  else
2912  ereport(ERROR,
2913  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2914  errmsg("localized string format value too long")));
2915  }
2916  else
2917  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2918  s += strlen(s);
2919  break;
2920  case DCH_Mon:
2922  if (!tm->tm_mon)
2923  break;
2924  if (S_TM(n->suffix))
2925  {
2926  char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2927 
2928  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2929  strcpy(s, str);
2930  else
2931  ereport(ERROR,
2932  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2933  errmsg("localized string format value too long")));
2934  }
2935  else
2936  strcpy(s, months[tm->tm_mon - 1]);
2937  s += strlen(s);
2938  break;
2939  case DCH_mon:
2941  if (!tm->tm_mon)
2942  break;
2943  if (S_TM(n->suffix))
2944  {
2945  char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2946 
2947  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2948  strcpy(s, str);
2949  else
2950  ereport(ERROR,
2951  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2952  errmsg("localized string format value too long")));
2953  }
2954  else
2955  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2956  s += strlen(s);
2957  break;
2958  case DCH_MM:
2959  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2960  tm->tm_mon);
2961  if (S_THth(n->suffix))
2962  str_numth(s, s, S_TH_TYPE(n->suffix));
2963  s += strlen(s);
2964  break;
2965  case DCH_DAY:
2967  if (S_TM(n->suffix))
2968  {
2969  char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2970 
2971  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2972  strcpy(s, str);
2973  else
2974  ereport(ERROR,
2975  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2976  errmsg("localized string format value too long")));
2977  }
2978  else
2979  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2980  asc_toupper_z(days[tm->tm_wday]));
2981  s += strlen(s);
2982  break;
2983  case DCH_Day:
2985  if (S_TM(n->suffix))
2986  {
2987  char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2988 
2989  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2990  strcpy(s, str);
2991  else
2992  ereport(ERROR,
2993  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2994  errmsg("localized string format value too long")));
2995  }
2996  else
2997  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2998  days[tm->tm_wday]);
2999  s += strlen(s);
3000  break;
3001  case DCH_day:
3003  if (S_TM(n->suffix))
3004  {
3005  char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3006 
3007  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3008  strcpy(s, str);
3009  else
3010  ereport(ERROR,
3011  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3012  errmsg("localized string format value too long")));
3013  }
3014  else
3015  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3016  asc_tolower_z(days[tm->tm_wday]));
3017  s += strlen(s);
3018  break;
3019  case DCH_DY:
3021  if (S_TM(n->suffix))
3022  {
3023  char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3024 
3025  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3026  strcpy(s, str);
3027  else
3028  ereport(ERROR,
3029  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3030  errmsg("localized string format value too long")));
3031  }
3032  else
3033  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3034  s += strlen(s);
3035  break;
3036  case DCH_Dy:
3038  if (S_TM(n->suffix))
3039  {
3040  char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3041 
3042  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3043  strcpy(s, str);
3044  else
3045  ereport(ERROR,
3046  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3047  errmsg("localized string format value too long")));
3048  }
3049  else
3050  strcpy(s, days_short[tm->tm_wday]);
3051  s += strlen(s);
3052  break;
3053  case DCH_dy:
3055  if (S_TM(n->suffix))
3056  {
3057  char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3058 
3059  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3060  strcpy(s, str);
3061  else
3062  ereport(ERROR,
3063  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3064  errmsg("localized string format value too long")));
3065  }
3066  else
3067  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3068  s += strlen(s);
3069  break;
3070  case DCH_DDD:
3071  case DCH_IDDD:
3072  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3073  (n->key->id == DCH_DDD) ?
3074  tm->tm_yday :
3075  date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3076  if (S_THth(n->suffix))
3077  str_numth(s, s, S_TH_TYPE(n->suffix));
3078  s += strlen(s);
3079  break;
3080  case DCH_DD:
3081  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3082  if (S_THth(n->suffix))
3083  str_numth(s, s, S_TH_TYPE(n->suffix));
3084  s += strlen(s);
3085  break;
3086  case DCH_D:
3088  sprintf(s, "%d", tm->tm_wday + 1);
3089  if (S_THth(n->suffix))
3090  str_numth(s, s, S_TH_TYPE(n->suffix));
3091  s += strlen(s);
3092  break;
3093  case DCH_ID:
3095  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3096  if (S_THth(n->suffix))
3097  str_numth(s, s, S_TH_TYPE(n->suffix));
3098  s += strlen(s);
3099  break;
3100  case DCH_WW:
3101  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3102  (tm->tm_yday - 1) / 7 + 1);
3103  if (S_THth(n->suffix))
3104  str_numth(s, s, S_TH_TYPE(n->suffix));
3105  s += strlen(s);
3106  break;
3107  case DCH_IW:
3108  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3109  date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3110  if (S_THth(n->suffix))
3111  str_numth(s, s, S_TH_TYPE(n->suffix));
3112  s += strlen(s);
3113  break;
3114  case DCH_Q:
3115  if (!tm->tm_mon)
3116  break;
3117  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3118  if (S_THth(n->suffix))
3119  str_numth(s, s, S_TH_TYPE(n->suffix));
3120  s += strlen(s);
3121  break;
3122  case DCH_CC:
3123  if (is_interval) /* straight calculation */
3124  i = tm->tm_year / 100;
3125  else
3126  {
3127  if (tm->tm_year > 0)
3128  /* Century 20 == 1901 - 2000 */
3129  i = (tm->tm_year - 1) / 100 + 1;
3130  else
3131  /* Century 6BC == 600BC - 501BC */
3132  i = tm->tm_year / 100 - 1;
3133  }
3134  if (i <= 99 && i >= -99)
3135  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3136  else
3137  sprintf(s, "%d", i);
3138  if (S_THth(n->suffix))
3139  str_numth(s, s, S_TH_TYPE(n->suffix));
3140  s += strlen(s);
3141  break;
3142  case DCH_Y_YYY:
3143  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3144  sprintf(s, "%d,%03d", i,
3145  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3146  if (S_THth(n->suffix))
3147  str_numth(s, s, S_TH_TYPE(n->suffix));
3148  s += strlen(s);
3149  break;
3150  case DCH_YYYY:
3151  case DCH_IYYY:
3152  sprintf(s, "%0*d",
3153  S_FM(n->suffix) ? 0 :
3154  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3155  (n->key->id == DCH_YYYY ?
3156  ADJUST_YEAR(tm->tm_year, is_interval) :
3158  tm->tm_mon,
3159  tm->tm_mday),
3160  is_interval)));
3161  if (S_THth(n->suffix))
3162  str_numth(s, s, S_TH_TYPE(n->suffix));
3163  s += strlen(s);
3164  break;
3165  case DCH_YYY:
3166  case DCH_IYY:
3167  sprintf(s, "%0*d",
3168  S_FM(n->suffix) ? 0 :
3169  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3170  (n->key->id == DCH_YYY ?
3171  ADJUST_YEAR(tm->tm_year, is_interval) :
3173  tm->tm_mon,
3174  tm->tm_mday),
3175  is_interval)) % 1000);
3176  if (S_THth(n->suffix))
3177  str_numth(s, s, S_TH_TYPE(n->suffix));
3178  s += strlen(s);
3179  break;
3180  case DCH_YY:
3181  case DCH_IY:
3182  sprintf(s, "%0*d",
3183  S_FM(n->suffix) ? 0 :
3184  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3185  (n->key->id == DCH_YY ?
3186  ADJUST_YEAR(tm->tm_year, is_interval) :
3188  tm->tm_mon,
3189  tm->tm_mday),
3190  is_interval)) % 100);
3191  if (S_THth(n->suffix))
3192  str_numth(s, s, S_TH_TYPE(n->suffix));
3193  s += strlen(s);
3194  break;
3195  case DCH_Y:
3196  case DCH_I:
3197  sprintf(s, "%1d",
3198  (n->key->id == DCH_Y ?
3199  ADJUST_YEAR(tm->tm_year, is_interval) :
3201  tm->tm_mon,
3202  tm->tm_mday),
3203  is_interval)) % 10);
3204  if (S_THth(n->suffix))
3205  str_numth(s, s, S_TH_TYPE(n->suffix));
3206  s += strlen(s);
3207  break;
3208  case DCH_RM:
3209  if (!tm->tm_mon)
3210  break;
3211  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3213  s += strlen(s);
3214  break;
3215  case DCH_rm:
3216  if (!tm->tm_mon)
3217  break;
3218  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3220  s += strlen(s);
3221  break;
3222  case DCH_W:
3223  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3224  if (S_THth(n->suffix))
3225  str_numth(s, s, S_TH_TYPE(n->suffix));
3226  s += strlen(s);
3227  break;
3228  case DCH_J:
3229  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3230  if (S_THth(n->suffix))
3231  str_numth(s, s, S_TH_TYPE(n->suffix));
3232  s += strlen(s);
3233  break;
3234  }
3235  }
3236 
3237  *s = '\0';
3238 }
3239 
3240 /*
3241  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3242  * The TmFromChar struct pointed to by 'out' is populated with the results.
3243  *
3244  * 'collid' identifies the collation to use, if needed.
3245  * 'std' specifies standard parsing mode.
3246  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3247  *
3248  * Note: we currently don't have any to_interval() function, so there
3249  * is no need here for INVALID_FOR_INTERVAL checks.
3250  */
3251 static void
3252 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3253  Oid collid, bool std, bool *have_error)
3254 {
3255  FormatNode *n;
3256  const char *s;
3257  int len,
3258  value;
3259  bool fx_mode = std;
3260 
3261  /* number of extra skipped characters (more than given in format string) */
3262  int extra_skip = 0;
3263 
3264  /* cache localized days and months */
3266 
3267  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3268  {
3269  /*
3270  * Ignore spaces at the beginning of the string and before fields when
3271  * not in FX (fixed width) mode.
3272  */
3273  if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3274  (n->type == NODE_TYPE_ACTION || n == node))
3275  {
3276  while (*s != '\0' && isspace((unsigned char) *s))
3277  {
3278  s++;
3279  extra_skip++;
3280  }
3281  }
3282 
3283  if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3284  {
3285  if (std)
3286  {
3287  /*
3288  * Standard mode requires strict matching between format
3289  * string separators/spaces and input string.
3290  */
3291  Assert(n->character[0] && !n->character[1]);
3292 
3293  if (*s == n->character[0])
3294  s++;
3295  else
3297  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3298  errmsg("unmatched format separator \"%c\"",
3299  n->character[0]))));
3300  }
3301  else if (!fx_mode)
3302  {
3303  /*
3304  * In non FX (fixed format) mode one format string space or
3305  * separator match to one space or separator in input string.
3306  * Or match nothing if there is no space or separator in the
3307  * current position of input string.
3308  */
3309  extra_skip--;
3310  if (isspace((unsigned char) *s) || is_separator_char(s))
3311  {
3312  s++;
3313  extra_skip++;
3314  }
3315  }
3316  else
3317  {
3318  /*
3319  * In FX mode, on format string space or separator we consume
3320  * exactly one character from input string. Notice we don't
3321  * insist that the consumed character match the format's
3322  * character.
3323  */
3324  s += pg_mblen(s);
3325  }
3326  continue;
3327  }
3328  else if (n->type != NODE_TYPE_ACTION)
3329  {
3330  /*
3331  * Text character, so consume one character from input string.
3332  * Notice we don't insist that the consumed character match the
3333  * format's character.
3334  */
3335  if (!fx_mode)
3336  {
3337  /*
3338  * In non FX mode we might have skipped some extra characters
3339  * (more than specified in format string) before. In this
3340  * case we don't skip input string character, because it might
3341  * be part of field.
3342  */
3343  if (extra_skip > 0)
3344  extra_skip--;
3345  else
3346  s += pg_mblen(s);
3347  }
3348  else
3349  {
3350  s += pg_mblen(s);
3351  }
3352  continue;
3353  }
3354 
3355  from_char_set_mode(out, n->key->date_mode, have_error);
3356  CHECK_ERROR;
3357 
3358  switch (n->key->id)
3359  {
3360  case DCH_FX:
3361  fx_mode = true;
3362  break;
3363  case DCH_A_M:
3364  case DCH_P_M:
3365  case DCH_a_m:
3366  case DCH_p_m:
3368  NULL, InvalidOid,
3369  n, have_error);
3370  CHECK_ERROR;
3371  from_char_set_int(&out->pm, value % 2, n, have_error);
3372  CHECK_ERROR;
3373  out->clock = CLOCK_12_HOUR;
3374  break;
3375  case DCH_AM:
3376  case DCH_PM:
3377  case DCH_am:
3378  case DCH_pm:
3379  from_char_seq_search(&value, &s, ampm_strings,
3380  NULL, InvalidOid,
3381  n, have_error);
3382  CHECK_ERROR;
3383  from_char_set_int(&out->pm, value % 2, n, have_error);
3384  CHECK_ERROR;
3385  out->clock = CLOCK_12_HOUR;
3386  break;
3387  case DCH_HH:
3388  case DCH_HH12:
3389  from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3390  CHECK_ERROR;
3391  out->clock = CLOCK_12_HOUR;
3392  SKIP_THth(s, n->suffix);
3393  break;
3394  case DCH_HH24:
3395  from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3396  CHECK_ERROR;
3397  SKIP_THth(s, n->suffix);
3398  break;
3399  case DCH_MI:
3400  from_char_parse_int(&out->mi, &s, n, have_error);
3401  CHECK_ERROR;
3402  SKIP_THth(s, n->suffix);
3403  break;
3404  case DCH_SS:
3405  from_char_parse_int(&out->ss, &s, n, have_error);
3406  CHECK_ERROR;
3407  SKIP_THth(s, n->suffix);
3408  break;
3409  case DCH_MS: /* millisecond */
3410  len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
3411  CHECK_ERROR;
3412 
3413  /*
3414  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3415  */
3416  out->ms *= len == 1 ? 100 :
3417  len == 2 ? 10 : 1;
3418 
3419  SKIP_THth(s, n->suffix);
3420  break;
3421  case DCH_FF1:
3422  case DCH_FF2:
3423  case DCH_FF3:
3424  case DCH_FF4:
3425  case DCH_FF5:
3426  case DCH_FF6:
3427  out->ff = n->key->id - DCH_FF1 + 1;
3428  /* fall through */
3429  case DCH_US: /* microsecond */
3430  len = from_char_parse_int_len(&out->us, &s,
3431  n->key->id == DCH_US ? 6 :
3432  out->ff, n, have_error);
3433  CHECK_ERROR;
3434 
3435  out->us *= len == 1 ? 100000 :
3436  len == 2 ? 10000 :
3437  len == 3 ? 1000 :
3438  len == 4 ? 100 :
3439  len == 5 ? 10 : 1;
3440 
3441  SKIP_THth(s, n->suffix);
3442  break;
3443  case DCH_SSSS:
3444  from_char_parse_int(&out->ssss, &s, n, have_error);
3445  CHECK_ERROR;
3446  SKIP_THth(s, n->suffix);
3447  break;
3448  case DCH_tz:
3449  case DCH_TZ:
3450  case DCH_OF:
3452  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3453  errmsg("formatting field \"%s\" is only supported in to_char",
3454  n->key->name))));
3455  CHECK_ERROR;
3456  break;
3457  case DCH_TZH:
3458 
3459  /*
3460  * Value of TZH might be negative. And the issue is that we
3461  * might swallow minus sign as the separator. So, if we have
3462  * skipped more characters than specified in the format
3463  * string, then we consider prepending last skipped minus to
3464  * TZH.
3465  */
3466  if (*s == '+' || *s == '-' || *s == ' ')
3467  {
3468  out->tzsign = *s == '-' ? -1 : +1;
3469  s++;
3470  }
3471  else
3472  {
3473  if (extra_skip > 0 && *(s - 1) == '-')
3474  out->tzsign = -1;
3475  else
3476  out->tzsign = +1;
3477  }
3478 
3479  from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
3480  CHECK_ERROR;
3481  break;
3482  case DCH_TZM:
3483  /* assign positive timezone sign if TZH was not seen before */
3484  if (!out->tzsign)
3485  out->tzsign = +1;
3486  from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
3487  CHECK_ERROR;
3488  break;
3489  case DCH_A_D:
3490  case DCH_B_C:
3491  case DCH_a_d:
3492  case DCH_b_c:
3494  NULL, InvalidOid,
3495  n, have_error);
3496  CHECK_ERROR;
3497  from_char_set_int(&out->bc, value % 2, n, have_error);
3498  CHECK_ERROR;
3499  break;
3500  case DCH_AD:
3501  case DCH_BC:
3502  case DCH_ad:
3503  case DCH_bc:
3504  from_char_seq_search(&value, &s, adbc_strings,
3505  NULL, InvalidOid,
3506  n, have_error);
3507  CHECK_ERROR;
3508  from_char_set_int(&out->bc, value % 2, n, have_error);
3509  CHECK_ERROR;
3510  break;
3511  case DCH_MONTH:
3512  case DCH_Month:
3513  case DCH_month:
3514  from_char_seq_search(&value, &s, months_full,
3515  S_TM(n->suffix) ? localized_full_months : NULL,
3516  collid,
3517  n, have_error);
3518  CHECK_ERROR;
3519  from_char_set_int(&out->mm, value + 1, n, have_error);
3520  CHECK_ERROR;
3521  break;
3522  case DCH_MON:
3523  case DCH_Mon:
3524  case DCH_mon:
3525  from_char_seq_search(&value, &s, months,
3526  S_TM(n->suffix) ? localized_abbrev_months : NULL,
3527  collid,
3528  n, have_error);
3529  CHECK_ERROR;
3530  from_char_set_int(&out->mm, value + 1, n, have_error);
3531  CHECK_ERROR;
3532  break;
3533  case DCH_MM:
3534  from_char_parse_int(&out->mm, &s, n, have_error);
3535  CHECK_ERROR;
3536  SKIP_THth(s, n->suffix);
3537  break;
3538  case DCH_DAY:
3539  case DCH_Day:
3540  case DCH_day:
3541  from_char_seq_search(&value, &s, days,
3542  S_TM(n->suffix) ? localized_full_days : NULL,
3543  collid,
3544  n, have_error);
3545  CHECK_ERROR;
3546  from_char_set_int(&out->d, value, n, have_error);
3547  CHECK_ERROR;
3548  out->d++;
3549  break;
3550  case DCH_DY:
3551  case DCH_Dy:
3552  case DCH_dy:
3553  from_char_seq_search(&value, &s, days_short,
3554  S_TM(n->suffix) ? localized_abbrev_days : NULL,
3555  collid,
3556  n, have_error);
3557  CHECK_ERROR;
3558  from_char_set_int(&out->d, value, n, have_error);
3559  CHECK_ERROR;
3560  out->d++;
3561  break;
3562  case DCH_DDD:
3563  from_char_parse_int(&out->ddd, &s, n, have_error);
3564  CHECK_ERROR;
3565  SKIP_THth(s, n->suffix);
3566  break;
3567  case DCH_IDDD:
3568  from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
3569  CHECK_ERROR;
3570  SKIP_THth(s, n->suffix);
3571  break;
3572  case DCH_DD:
3573  from_char_parse_int(&out->dd, &s, n, have_error);
3574  CHECK_ERROR;
3575  SKIP_THth(s, n->suffix);
3576  break;
3577  case DCH_D:
3578  from_char_parse_int(&out->d, &s, n, have_error);
3579  CHECK_ERROR;
3580  SKIP_THth(s, n->suffix);
3581  break;
3582  case DCH_ID:
3583  from_char_parse_int_len(&out->d, &s, 1, n, have_error);
3584  CHECK_ERROR;
3585  /* Shift numbering to match Gregorian where Sunday = 1 */
3586  if (++out->d > 7)
3587  out->d = 1;
3588  SKIP_THth(s, n->suffix);
3589  break;
3590  case DCH_WW:
3591  case DCH_IW:
3592  from_char_parse_int(&out->ww, &s, n, have_error);
3593  CHECK_ERROR;
3594  SKIP_THth(s, n->suffix);
3595  break;
3596  case DCH_Q:
3597 
3598  /*
3599  * We ignore 'Q' when converting to date because it is unclear
3600  * which date in the quarter to use, and some people specify
3601  * both quarter and month, so if it was honored it might
3602  * conflict with the supplied month. That is also why we don't
3603  * throw an error.
3604  *
3605  * We still parse the source string for an integer, but it
3606  * isn't stored anywhere in 'out'.
3607  */
3608  from_char_parse_int((int *) NULL, &s, n, have_error);
3609  CHECK_ERROR;
3610  SKIP_THth(s, n->suffix);
3611  break;
3612  case DCH_CC:
3613  from_char_parse_int(&out->cc, &s, n, have_error);
3614  CHECK_ERROR;
3615  SKIP_THth(s, n->suffix);
3616  break;
3617  case DCH_Y_YYY:
3618  {
3619  int matched,
3620  years,
3621  millennia,
3622  nch;
3623 
3624  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3625  if (matched < 2)
3627  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3628  errmsg("invalid input string for \"Y,YYY\""))));
3629  years += (millennia * 1000);
3630  from_char_set_int(&out->year, years, n, have_error);
3631  CHECK_ERROR;
3632  out->yysz = 4;
3633  s += nch;
3634  SKIP_THth(s, n->suffix);
3635  }
3636  break;
3637  case DCH_YYYY:
3638  case DCH_IYYY:
3639  from_char_parse_int(&out->year, &s, n, have_error);
3640  CHECK_ERROR;
3641  out->yysz = 4;
3642  SKIP_THth(s, n->suffix);
3643  break;
3644  case DCH_YYY:
3645  case DCH_IYY:
3646  len = from_char_parse_int(&out->year, &s, n, have_error);
3647  CHECK_ERROR;
3648  if (len < 4)
3649  out->year = adjust_partial_year_to_2020(out->year);
3650  out->yysz = 3;
3651  SKIP_THth(s, n->suffix);
3652  break;
3653  case DCH_YY:
3654  case DCH_IY:
3655  len = from_char_parse_int(&out->year, &s, n, have_error);
3656  CHECK_ERROR;
3657  if (len < 4)
3658  out->year = adjust_partial_year_to_2020(out->year);
3659  out->yysz = 2;
3660  SKIP_THth(s, n->suffix);
3661  break;
3662  case DCH_Y:
3663  case DCH_I:
3664  len = from_char_parse_int(&out->year, &s, n, have_error);
3665  CHECK_ERROR;
3666  if (len < 4)
3667  out->year = adjust_partial_year_to_2020(out->year);
3668  out->yysz = 1;
3669  SKIP_THth(s, n->suffix);
3670  break;
3671  case DCH_RM:
3672  case DCH_rm:
3674  NULL, InvalidOid,
3675  n, have_error);
3676  CHECK_ERROR;
3677  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3678  n, have_error);
3679  CHECK_ERROR;
3680  break;
3681  case DCH_W:
3682  from_char_parse_int(&out->w, &s, n, have_error);
3683  CHECK_ERROR;
3684  SKIP_THth(s, n->suffix);
3685  break;
3686  case DCH_J:
3687  from_char_parse_int(&out->j, &s, n, have_error);
3688  CHECK_ERROR;
3689  SKIP_THth(s, n->suffix);
3690  break;
3691  }
3692 
3693  /* Ignore all spaces after fields */
3694  if (!fx_mode)
3695  {
3696  extra_skip = 0;
3697  while (*s != '\0' && isspace((unsigned char) *s))
3698  {
3699  s++;
3700  extra_skip++;
3701  }
3702  }
3703  }
3704 
3705  /*
3706  * Standard parsing mode doesn't allow unmatched format patterns or
3707  * trailing characters in the input string.
3708  */
3709  if (std)
3710  {
3711  if (n->type != NODE_TYPE_END)
3713  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3714  errmsg("input string is too short for datetime format"))));
3715 
3716  while (*s != '\0' && isspace((unsigned char) *s))
3717  s++;
3718 
3719  if (*s != '\0')
3721  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3722  errmsg("trailing characters remain in input string "
3723  "after datetime format"))));
3724  }
3725 
3726 on_error:
3727  return;
3728 }
3729 
3730 /*
3731  * The invariant for DCH cache entry management is that DCHCounter is equal
3732  * to the maximum age value among the existing entries, and we increment it
3733  * whenever an access occurs. If we approach overflow, deal with that by
3734  * halving all the age values, so that we retain a fairly accurate idea of
3735  * which entries are oldest.
3736  */
3737 static inline void
3739 {
3740  if (DCHCounter >= (INT_MAX - 1))
3741  {
3742  for (int i = 0; i < n_DCHCache; i++)
3743  DCHCache[i]->age >>= 1;
3744  DCHCounter >>= 1;
3745  }
3746 }
3747 
3748 /*
3749  * Get mask of date/time/zone components present in format nodes.
3750  *
3751  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3752  */
3753 static int
3754 DCH_datetime_type(FormatNode *node, bool *have_error)
3755 {
3756  FormatNode *n;
3757  int flags = 0;
3758 
3759  for (n = node; n->type != NODE_TYPE_END; n++)
3760  {
3761  if (n->type != NODE_TYPE_ACTION)
3762  continue;
3763 
3764  switch (n->key->id)
3765  {
3766  case DCH_FX:
3767  break;
3768  case DCH_A_M:
3769  case DCH_P_M:
3770  case DCH_a_m:
3771  case DCH_p_m:
3772  case DCH_AM:
3773  case DCH_PM:
3774  case DCH_am:
3775  case DCH_pm:
3776  case DCH_HH:
3777  case DCH_HH12:
3778  case DCH_HH24:
3779  case DCH_MI:
3780  case DCH_SS:
3781  case DCH_MS: /* millisecond */
3782  case DCH_US: /* microsecond */
3783  case DCH_FF1:
3784  case DCH_FF2:
3785  case DCH_FF3:
3786  case DCH_FF4:
3787  case DCH_FF5:
3788  case DCH_FF6:
3789  case DCH_SSSS:
3790  flags |= DCH_TIMED;
3791  break;
3792  case DCH_tz:
3793  case DCH_TZ:
3794  case DCH_OF:
3796  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3797  errmsg("formatting field \"%s\" is only supported in to_char",
3798  n->key->name))));
3799  flags |= DCH_ZONED;
3800  break;
3801  case DCH_TZH:
3802  case DCH_TZM:
3803  flags |= DCH_ZONED;
3804  break;
3805  case DCH_A_D:
3806  case DCH_B_C:
3807  case DCH_a_d:
3808  case DCH_b_c:
3809  case DCH_AD:
3810  case DCH_BC:
3811  case DCH_ad:
3812  case DCH_bc:
3813  case DCH_MONTH:
3814  case DCH_Month:
3815  case DCH_month:
3816  case DCH_MON:
3817  case DCH_Mon:
3818  case DCH_mon:
3819  case DCH_MM:
3820  case DCH_DAY:
3821  case DCH_Day:
3822  case DCH_day:
3823  case DCH_DY:
3824  case DCH_Dy:
3825  case DCH_dy:
3826  case DCH_DDD:
3827  case DCH_IDDD:
3828  case DCH_DD:
3829  case DCH_D:
3830  case DCH_ID:
3831  case DCH_WW:
3832  case DCH_Q:
3833  case DCH_CC:
3834  case DCH_Y_YYY:
3835  case DCH_YYYY:
3836  case DCH_IYYY:
3837  case DCH_YYY:
3838  case DCH_IYY:
3839  case DCH_YY:
3840  case DCH_IY:
3841  case DCH_Y:
3842  case DCH_I:
3843  case DCH_RM:
3844  case DCH_rm:
3845  case DCH_W:
3846  case DCH_J:
3847  flags |= DCH_DATED;
3848  break;
3849  }
3850  }
3851 
3852 on_error:
3853  return flags;
3854 }
3855 
3856 /* select a DCHCacheEntry to hold the given format picture */
3857 static DCHCacheEntry *
3858 DCH_cache_getnew(const char *str, bool std)
3859 {
3860  DCHCacheEntry *ent;
3861 
3862  /* Ensure we can advance DCHCounter below */
3864 
3865  /*
3866  * If cache is full, remove oldest entry (or recycle first not-valid one)
3867  */
3869  {
3870  DCHCacheEntry *old = DCHCache[0];
3871 
3872 #ifdef DEBUG_TO_FROM_CHAR
3873  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3874 #endif
3875  if (old->valid)
3876  {
3877  for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3878  {
3879  ent = DCHCache[i];
3880  if (!ent->valid)
3881  {
3882  old = ent;
3883  break;
3884  }
3885  if (ent->age < old->age)
3886  old = ent;
3887  }
3888  }
3889 #ifdef DEBUG_TO_FROM_CHAR
3890  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3891 #endif
3892  old->valid = false;
3893  strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
3894  old->age = (++DCHCounter);
3895  /* caller is expected to fill format, then set valid */
3896  return old;
3897  }
3898  else
3899  {
3900 #ifdef DEBUG_TO_FROM_CHAR
3901  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3902 #endif
3903  Assert(DCHCache[n_DCHCache] == NULL);
3904  DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3906  ent->valid = false;
3907  strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
3908  ent->std = std;
3909  ent->age = (++DCHCounter);
3910  /* caller is expected to fill format, then set valid */
3911  ++n_DCHCache;
3912  return ent;
3913  }
3914 }
3915 
3916 /* look for an existing DCHCacheEntry matching the given format picture */
3917 static DCHCacheEntry *
3918 DCH_cache_search(const char *str, bool std)
3919 {
3920  /* Ensure we can advance DCHCounter below */
3922 
3923  for (int i = 0; i < n_DCHCache; i++)
3924  {
3925  DCHCacheEntry *ent = DCHCache[i];
3926 
3927  if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3928  {
3929  ent->age = (++DCHCounter);
3930  return ent;
3931  }
3932  }
3933 
3934  return NULL;
3935 }
3936 
3937 /* Find or create a DCHCacheEntry for the given format picture */
3938 static DCHCacheEntry *
3939 DCH_cache_fetch(const char *str, bool std)
3940 {
3941  DCHCacheEntry *ent;
3942 
3943  if ((ent = DCH_cache_search(str, std)) == NULL)
3944  {
3945  /*
3946  * Not in the cache, must run parser and save a new format-picture to
3947  * the cache. Do not mark the cache entry valid until parsing
3948  * succeeds.
3949  */
3950  ent = DCH_cache_getnew(str, std);
3951 
3952  parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
3953  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
3954 
3955  ent->valid = true;
3956  }
3957  return ent;
3958 }
3959 
3960 /*
3961  * Format a date/time or interval into a string according to fmt.
3962  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
3963  * for formatting.
3964  */
3965 static text *
3966 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
3967 {
3968  FormatNode *format;
3969  char *fmt_str,
3970  *result;
3971  bool incache;
3972  int fmt_len;
3973  text *res;
3974 
3975  /*
3976  * Convert fmt to C string
3977  */
3978  fmt_str = text_to_cstring(fmt);
3979  fmt_len = strlen(fmt_str);
3980 
3981  /*
3982  * Allocate workspace for result as C string
3983  */
3984  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
3985  *result = '\0';
3986 
3987  if (fmt_len > DCH_CACHE_SIZE)
3988  {
3989  /*
3990  * Allocate new memory if format picture is bigger than static cache
3991  * and do not use cache (call parser always)
3992  */
3993  incache = false;
3994 
3995  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3996 
3997  parse_format(format, fmt_str, DCH_keywords,
3998  DCH_suff, DCH_index, DCH_FLAG, NULL);
3999  }
4000  else
4001  {
4002  /*
4003  * Use cache buffers
4004  */
4005  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4006 
4007  incache = true;
4008  format = ent->format;
4009  }
4010 
4011  /* The real work is here */
4012  DCH_to_char(format, is_interval, tmtc, result, collid);
4013 
4014  if (!incache)
4015  pfree(format);
4016 
4017  pfree(fmt_str);
4018 
4019  /* convert C-string result to TEXT format */
4020  res = cstring_to_text(result);
4021 
4022  pfree(result);
4023  return res;
4024 }
4025 
4026 /****************************************************************************
4027  * Public routines
4028  ***************************************************************************/
4029 
4030 /* -------------------
4031  * TIMESTAMP to_char()
4032  * -------------------
4033  */
4034 Datum
4036 {
4038  text *fmt = PG_GETARG_TEXT_PP(1),
4039  *res;
4040  TmToChar tmtc;
4041  struct pg_tm *tm;
4042  int thisdate;
4043 
4044  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4045  PG_RETURN_NULL();
4046 
4047  ZERO_tmtc(&tmtc);
4048  tm = tmtcTm(&tmtc);
4049 
4050  if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4051  ereport(ERROR,
4052  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4053  errmsg("timestamp out of range")));
4054 
4055  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4056  tm->tm_wday = (thisdate + 1) % 7;
4057  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4058 
4059  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4060  PG_RETURN_NULL();
4061 
4062  PG_RETURN_TEXT_P(res);
4063 }
4064 
4065 Datum
4067 {
4069  text *fmt = PG_GETARG_TEXT_PP(1),
4070  *res;
4071  TmToChar tmtc;
4072  int tz;
4073  struct pg_tm *tm;
4074  int thisdate;
4075 
4076  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4077  PG_RETURN_NULL();
4078 
4079  ZERO_tmtc(&tmtc);
4080  tm = tmtcTm(&tmtc);
4081 
4082  if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4083  ereport(ERROR,
4084  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4085  errmsg("timestamp out of range")));
4086 
4087  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4088  tm->tm_wday = (thisdate + 1) % 7;
4089  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4090 
4091  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4092  PG_RETURN_NULL();
4093 
4094  PG_RETURN_TEXT_P(res);
4095 }
4096 
4097 
4098 /* -------------------
4099  * INTERVAL to_char()
4100  * -------------------
4101  */
4102 Datum
4104 {
4105  Interval *it = PG_GETARG_INTERVAL_P(0);
4106  text *fmt = PG_GETARG_TEXT_PP(1),
4107  *res;
4108  TmToChar tmtc;
4109  struct pg_tm *tm;
4110 
4111  if (VARSIZE_ANY_EXHDR(fmt) <= 0)
4112  PG_RETURN_NULL();
4113 
4114  ZERO_tmtc(&tmtc);
4115  tm = tmtcTm(&tmtc);
4116 
4117  if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
4118  PG_RETURN_NULL();
4119 
4120  /* wday is meaningless, yday approximates the total span in days */
4121  tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4122 
4123  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4124  PG_RETURN_NULL();
4125 
4126  PG_RETURN_TEXT_P(res);
4127 }
4128 
4129 /* ---------------------
4130  * TO_TIMESTAMP()
4131  *
4132  * Make Timestamp from date_str which is formatted at argument 'fmt'
4133  * ( to_timestamp is reverse to_char() )
4134  * ---------------------
4135  */
4136 Datum
4138 {
4139  text *date_txt = PG_GETARG_TEXT_PP(0);
4140  text *fmt = PG_GETARG_TEXT_PP(1);
4141  Oid collid = PG_GET_COLLATION();
4142  Timestamp result;
4143  int tz;
4144  struct pg_tm tm;
4145  fsec_t fsec;
4146  int fprec;
4147 
4148  do_to_timestamp(date_txt, fmt, collid, false,
4149  &tm, &fsec, &fprec, NULL, NULL);
4150 
4151  /* Use the specified time zone, if any. */
4152  if (tm.tm_zone)
4153  {
4154  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
4155 
4156  if (dterr)
4157  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4158  }
4159  else
4161 
4162  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4163  ereport(ERROR,
4164  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4165  errmsg("timestamp out of range")));
4166 
4167  /* Use the specified fractional precision, if any. */
4168  if (fprec)
4169  AdjustTimestampForTypmod(&result, fprec);
4170 
4171  PG_RETURN_TIMESTAMP(result);
4172 }
4173 
4174 /* ----------
4175  * TO_DATE
4176  * Make Date from date_str which is formatted at argument 'fmt'
4177  * ----------
4178  */
4179 Datum
4181 {
4182  text *date_txt = PG_GETARG_TEXT_PP(0);
4183  text *fmt = PG_GETARG_TEXT_PP(1);
4184  Oid collid = PG_GET_COLLATION();
4185  DateADT result;
4186  struct pg_tm tm;
4187  fsec_t fsec;
4188 
4189  do_to_timestamp(date_txt, fmt, collid, false,
4190  &tm, &fsec, NULL, NULL, NULL);
4191 
4192  /* Prevent overflow in Julian-day routines */
4193  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4194  ereport(ERROR,
4195  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4196  errmsg("date out of range: \"%s\"",
4197  text_to_cstring(date_txt))));
4198 
4199  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4200 
4201  /* Now check for just-out-of-range dates */
4202  if (!IS_VALID_DATE(result))
4203  ereport(ERROR,
4204  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4205  errmsg("date out of range: \"%s\"",
4206  text_to_cstring(date_txt))));
4207 
4208  PG_RETURN_DATEADT(result);
4209 }
4210 
4211 /*
4212  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4213  * as a format string. The collation 'collid' may be used for case-folding
4214  * rules in some cases. 'strict' specifies standard parsing mode.
4215  *
4216  * The actual data type (returned in 'typid', 'typmod') is determined by
4217  * the presence of date/time/zone components in the format string.
4218  *
4219  * When timezone component is present, the corresponding offset is
4220  * returned in '*tz'.
4221  *
4222  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
4223  * and zero value is returned.
4224  */
4225 Datum
4226 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4227  Oid *typid, int32 *typmod, int *tz,
4228  bool *have_error)
4229 {
4230  struct pg_tm tm;
4231  fsec_t fsec;
4232  int fprec;
4233  uint32 flags;
4234 
4235  do_to_timestamp(date_txt, fmt, collid, strict,
4236  &tm, &fsec, &fprec, &flags, have_error);
4237  CHECK_ERROR;
4238 
4239  *typmod = fprec ? fprec : -1; /* fractional part precision */
4240 
4241  if (flags & DCH_DATED)
4242  {
4243  if (flags & DCH_TIMED)
4244  {
4245  if (flags & DCH_ZONED)
4246  {
4247  TimestampTz result;
4248 
4249  if (tm.tm_zone)
4250  {
4251  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4252 
4253  if (dterr)
4254  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4255  }
4256  else
4257  {
4258  /*
4259  * Time zone is present in format string, but not in input
4260  * string. Assuming do_to_timestamp() triggers no error
4261  * this should be possible only in non-strict case.
4262  */
4263  Assert(!strict);
4264 
4266  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4267  errmsg("missing time zone in input string for type timestamptz"))));
4268  }
4269 
4270  if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4272  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4273  errmsg("timestamptz out of range"))));
4274 
4275  AdjustTimestampForTypmod(&result, *typmod);
4276 
4277  *typid = TIMESTAMPTZOID;
4278  return TimestampTzGetDatum(result);
4279  }
4280  else
4281  {
4282  Timestamp result;
4283 
4284  if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4286  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4287  errmsg("timestamp out of range"))));
4288 
4289  AdjustTimestampForTypmod(&result, *typmod);
4290 
4291  *typid = TIMESTAMPOID;
4292  return TimestampGetDatum(result);
4293  }
4294  }
4295  else
4296  {
4297  if (flags & DCH_ZONED)
4298  {
4300  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4301  errmsg("datetime format is zoned but not timed"))));
4302  }
4303  else
4304  {
4305  DateADT result;
4306 
4307  /* Prevent overflow in Julian-day routines */
4308  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4310  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4311  errmsg("date out of range: \"%s\"",
4312  text_to_cstring(date_txt)))));
4313 
4314  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4316 
4317  /* Now check for just-out-of-range dates */
4318  if (!IS_VALID_DATE(result))
4320  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4321  errmsg("date out of range: \"%s\"",
4322  text_to_cstring(date_txt)))));
4323 
4324  *typid = DATEOID;
4325  return DateADTGetDatum(result);
4326  }
4327  }
4328  }
4329  else if (flags & DCH_TIMED)
4330  {
4331  if (flags & DCH_ZONED)
4332  {
4333  TimeTzADT *result = palloc(sizeof(TimeTzADT));
4334 
4335  if (tm.tm_zone)
4336  {
4337  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4338 
4339  if (dterr)
4340  RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
4341  }
4342  else
4343  {
4344  /*
4345  * Time zone is present in format string, but not in input
4346  * string. Assuming do_to_timestamp() triggers no error this
4347  * should be possible only in non-strict case.
4348  */
4349  Assert(!strict);
4350 
4352  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4353  errmsg("missing time zone in input string for type timetz"))));
4354  }
4355 
4356  if (tm2timetz(&tm, fsec, *tz, result) != 0)
4358  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4359  errmsg("timetz out of range"))));
4360 
4361  AdjustTimeForTypmod(&result->time, *typmod);
4362 
4363  *typid = TIMETZOID;
4364  return TimeTzADTPGetDatum(result);
4365  }
4366  else
4367  {
4368  TimeADT result;
4369 
4370  if (tm2time(&tm, fsec, &result) != 0)
4372  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4373  errmsg("time out of range"))));
4374 
4375  AdjustTimeForTypmod(&result, *typmod);
4376 
4377  *typid = TIMEOID;
4378  return TimeADTGetDatum(result);
4379  }
4380  }
4381  else
4382  {
4384  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4385  errmsg("datetime format is not dated and not timed"))));
4386  }
4387 
4388 on_error:
4389  return (Datum) 0;
4390 }
4391 
4392 /*
4393  * do_to_timestamp: shared code for to_timestamp and to_date
4394  *
4395  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4396  * fractional seconds, and fractional precision.
4397  *
4398  * 'collid' identifies the collation to use, if needed.
4399  * 'std' specifies standard parsing mode.
4400  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4401  * if that is not NULL.
4402  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
4403  *
4404  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4405  * DCH_from_char to populate a TmFromChar with the parsed contents of
4406  * 'date_txt'.
4407  *
4408  * The TmFromChar is then analysed and converted into the final results in
4409  * struct 'tm', 'fsec', and 'fprec'.
4410  */
4411 static void
4412 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4413  struct pg_tm *tm, fsec_t *fsec, int *fprec,
4414  uint32 *flags, bool *have_error)
4415 {
4416  FormatNode *format = NULL;
4417  TmFromChar tmfc;
4418  int fmt_len;
4419  char *date_str;
4420  int fmask;
4421  bool incache = false;
4422 
4423  Assert(tm != NULL);
4424  Assert(fsec != NULL);
4425 
4426  date_str = text_to_cstring(date_txt);
4427 
4428  ZERO_tmfc(&tmfc);
4429  ZERO_tm(tm);
4430  *fsec = 0;
4431  if (fprec)
4432  *fprec = 0;
4433  if (flags)
4434  *flags = 0;
4435  fmask = 0; /* bit mask for ValidateDate() */
4436 
4437  fmt_len = VARSIZE_ANY_EXHDR(fmt);
4438 
4439  if (fmt_len)
4440  {
4441  char *fmt_str;
4442 
4443  fmt_str = text_to_cstring(fmt);
4444 
4445  if (fmt_len > DCH_CACHE_SIZE)
4446  {
4447  /*
4448  * Allocate new memory if format picture is bigger than static
4449  * cache and do not use cache (call parser always)
4450  */
4451  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4452 
4453  parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4454  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4455  }
4456  else
4457  {
4458  /*
4459  * Use cache buffers
4460  */
4461  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4462 
4463  incache = true;
4464  format = ent->format;
4465  }
4466 
4467 #ifdef DEBUG_TO_FROM_CHAR
4468  /* dump_node(format, fmt_len); */
4469  /* dump_index(DCH_keywords, DCH_index); */
4470 #endif
4471 
4472  DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
4473  CHECK_ERROR;
4474 
4475  pfree(fmt_str);
4476 
4477  if (flags)
4478  *flags = DCH_datetime_type(format, have_error);
4479 
4480  if (!incache)
4481  {
4482  pfree(format);
4483  format = NULL;
4484  }
4485 
4486  CHECK_ERROR;
4487  }
4488 
4489  DEBUG_TMFC(&tmfc);
4490 
4491  /*
4492  * Convert to_date/to_timestamp input fields to standard 'tm'
4493  */
4494  if (tmfc.ssss)
4495  {
4496  int x = tmfc.ssss;
4497 
4498  tm->tm_hour = x / SECS_PER_HOUR;
4499  x %= SECS_PER_HOUR;
4500  tm->tm_min = x / SECS_PER_MINUTE;
4501  x %= SECS_PER_MINUTE;
4502  tm->tm_sec = x;
4503  }
4504 
4505  if (tmfc.ss)
4506  tm->tm_sec = tmfc.ss;
4507  if (tmfc.mi)
4508  tm->tm_min = tmfc.mi;
4509  if (tmfc.hh)
4510  tm->tm_hour = tmfc.hh;
4511 
4512  if (tmfc.clock == CLOCK_12_HOUR)
4513  {
4514  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4515  {
4517  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4518  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4519  tm->tm_hour),
4520  errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
4521  }
4522 
4523  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4524  tm->tm_hour += HOURS_PER_DAY / 2;
4525  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4526  tm->tm_hour = 0;
4527  }
4528 
4529  if (tmfc.year)
4530  {
4531  /*
4532  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4533  * the year in the given century. Keep in mind that the 21st century
4534  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4535  * 600BC to 501BC.
4536  */
4537  if (tmfc.cc && tmfc.yysz <= 2)
4538  {
4539  if (tmfc.bc)
4540  tmfc.cc = -tmfc.cc;
4541  tm->tm_year = tmfc.year % 100;
4542  if (tm->tm_year)
4543  {
4544  if (tmfc.cc >= 0)
4545  tm->tm_year += (tmfc.cc - 1) * 100;
4546  else
4547  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4548  }
4549  else
4550  {
4551  /* find century year for dates ending in "00" */
4552  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4553  }
4554  }
4555  else
4556  {
4557  /* If a 4-digit year is provided, we use that and ignore CC. */
4558  tm->tm_year = tmfc.year;
4559  if (tmfc.bc && tm->tm_year > 0)
4560  tm->tm_year = -(tm->tm_year - 1);
4561  }
4562  fmask |= DTK_M(YEAR);
4563  }
4564  else if (tmfc.cc)
4565  {
4566  /* use first year of century */
4567  if (tmfc.bc)
4568  tmfc.cc = -tmfc.cc;
4569  if (tmfc.cc >= 0)
4570  /* +1 because 21st century started in 2001 */
4571  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4572  else
4573  /* +1 because year == 599 is 600 BC */
4574  tm->tm_year = tmfc.cc * 100 + 1;
4575  fmask |= DTK_M(YEAR);
4576  }
4577 
4578  if (tmfc.j)
4579  {
4580  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4581  fmask |= DTK_DATE_M;
4582  }
4583 
4584  if (tmfc.ww)
4585  {
4586  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4587  {
4588  /*
4589  * If tmfc.d is not set, then the date is left at the beginning of
4590  * the ISO week (Monday).
4591  */
4592  if (tmfc.d)
4593  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4594  else
4595  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4596  fmask |= DTK_DATE_M;
4597  }
4598  else
4599  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4600  }
4601 
4602  if (tmfc.w)
4603  tmfc.dd = (tmfc.w - 1) * 7 + 1;
4604  if (tmfc.dd)
4605  {
4606  tm->tm_mday = tmfc.dd;
4607  fmask |= DTK_M(DAY);
4608  }
4609  if (tmfc.mm)
4610  {
4611  tm->tm_mon = tmfc.mm;
4612  fmask |= DTK_M(MONTH);
4613  }
4614 
4615  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4616  {
4617  /*
4618  * The month and day field have not been set, so we use the
4619  * day-of-year field to populate them. Depending on the date mode,
4620  * this field may be interpreted as a Gregorian day-of-year, or an ISO
4621  * week date day-of-year.
4622  */
4623 
4624  if (!tm->tm_year && !tmfc.bc)
4625  {
4627  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4628  errmsg("cannot calculate day of year without year information"))));
4629  }
4630 
4631  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4632  {
4633  int j0; /* zeroth day of the ISO year, in Julian */
4634 
4635  j0 = isoweek2j(tm->tm_year, 1) - 1;
4636 
4637  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4638  fmask |= DTK_DATE_M;
4639  }
4640  else
4641  {
4642  const int *y;
4643  int i;
4644 
4645  static const int ysum[2][13] = {
4646  {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4647  {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4648 
4649  y = ysum[isleap(tm->tm_year)];
4650 
4651  for (i = 1; i <= MONTHS_PER_YEAR; i++)
4652  {
4653  if (tmfc.ddd <= y[i])
4654  break;
4655  }
4656  if (tm->tm_mon <= 1)
4657  tm->tm_mon = i;
4658 
4659  if (tm->tm_mday <= 1)
4660  tm->tm_mday = tmfc.ddd - y[i - 1];
4661 
4662  fmask |= DTK_M(MONTH) | DTK_M(DAY);
4663  }
4664  }
4665 
4666  if (tmfc.ms)
4667  *fsec += tmfc.ms * 1000;
4668  if (tmfc.us)
4669  *fsec += tmfc.us;
4670  if (fprec)
4671  *fprec = tmfc.ff; /* fractional precision, if specified */
4672 
4673  /* Range-check date fields according to bit mask computed above */
4674  if (fmask != 0)
4675  {
4676  /* We already dealt with AD/BC, so pass isjulian = true */
4677  int dterr = ValidateDate(fmask, true, false, false, tm);
4678 
4679  if (dterr != 0)
4680  {
4681  /*
4682  * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4683  * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4684  * irrelevant hint about datestyle.
4685  */
4686  RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4687  }
4688  }
4689 
4690  /* Range-check time fields too */
4691  if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4692  tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4693  tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4694  *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4695  {
4696  RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4697  }
4698 
4699  /* Save parsed time-zone into tm->tm_zone if it was specified */
4700  if (tmfc.tzsign)
4701  {
4702  char *tz;
4703 
4704  if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4705  tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4706  {
4707  RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
4708  }
4709 
4710  tz = psprintf("%c%02d:%02d",
4711  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4712 
4713  tm->tm_zone = tz;
4714  }
4715 
4716  DEBUG_TM(tm);
4717 
4718 on_error:
4719 
4720  if (format && !incache)
4721  pfree(format);
4722 
4723  pfree(date_str);
4724 }
4725 
4726 
4727 /**********************************************************************
4728  * the NUMBER version part
4729  *********************************************************************/
4730 
4731 
4732 static char *
4733 fill_str(char *str, int c, int max)
4734 {
4735  memset(str, c, max);
4736  *(str + max) = '\0';
4737  return str;
4738 }
4739 
4740 #define zeroize_NUM(_n) \
4741 do { \
4742  (_n)->flag = 0; \
4743  (_n)->lsign = 0; \
4744  (_n)->pre = 0; \
4745  (_n)->post = 0; \
4746  (_n)->pre_lsign_num = 0; \
4747  (_n)->need_locale = 0; \
4748  (_n)->multi = 0; \
4749  (_n)->zero_start = 0; \
4750  (_n)->zero_end = 0; \
4751 } while(0)
4752 
4753 /* This works the same as DCH_prevent_counter_overflow */
4754 static inline void
4756 {
4757  if (NUMCounter >= (INT_MAX - 1))
4758  {
4759  for (int i = 0; i < n_NUMCache; i++)
4760  NUMCache[i]->age >>= 1;
4761  NUMCounter >>= 1;
4762  }
4763 }
4764 
4765 /* select a NUMCacheEntry to hold the given format picture */
4766 static NUMCacheEntry *
4767 NUM_cache_getnew(const char *str)
4768 {
4769  NUMCacheEntry *ent;
4770 
4771  /* Ensure we can advance NUMCounter below */
4773 
4774  /*
4775  * If cache is full, remove oldest entry (or recycle first not-valid one)
4776  */
4778  {
4779  NUMCacheEntry *old = NUMCache[0];
4780 
4781 #ifdef DEBUG_TO_FROM_CHAR
4782  elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4783 #endif
4784  if (old->valid)
4785  {
4786  for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4787  {
4788  ent = NUMCache[i];
4789  if (!ent->valid)
4790  {
4791  old = ent;
4792  break;
4793  }
4794  if (ent->age < old->age)
4795  old = ent;
4796  }
4797  }
4798 #ifdef DEBUG_TO_FROM_CHAR
4799  elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4800 #endif
4801  old->valid = false;
4802  strlcpy(old->str, str, NUM_CACHE_SIZE + 1);
4803  old->age = (++NUMCounter);
4804  /* caller is expected to fill format and Num, then set valid */
4805  return old;
4806  }
4807  else
4808  {
4809 #ifdef DEBUG_TO_FROM_CHAR
4810  elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4811 #endif
4812  Assert(NUMCache[n_NUMCache] == NULL);
4813  NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4815  ent->valid = false;
4816  strlcpy(ent->str, str, NUM_CACHE_SIZE + 1);
4817  ent->age = (++NUMCounter);
4818  /* caller is expected to fill format and Num, then set valid */
4819  ++n_NUMCache;
4820  return ent;
4821  }
4822 }
4823 
4824 /* look for an existing NUMCacheEntry matching the given format picture */
4825 static NUMCacheEntry *
4826 NUM_cache_search(const char *str)
4827 {
4828  /* Ensure we can advance NUMCounter below */
4830 
4831  for (int i = 0; i < n_NUMCache; i++)
4832  {
4833  NUMCacheEntry *ent = NUMCache[i];
4834 
4835  if (ent->valid && strcmp(ent->str, str) == 0)
4836  {
4837  ent->age = (++NUMCounter);
4838  return ent;
4839  }
4840  }
4841 
4842  return NULL;
4843 }
4844 
4845 /* Find or create a NUMCacheEntry for the given format picture */
4846 static NUMCacheEntry *
4847 NUM_cache_fetch(const char *str)
4848 {
4849  NUMCacheEntry *ent;
4850 
4851  if ((ent = NUM_cache_search(str)) == NULL)
4852  {
4853  /*
4854  * Not in the cache, must run parser and save a new format-picture to
4855  * the cache. Do not mark the cache entry valid until parsing
4856  * succeeds.
4857  */
4858  ent = NUM_cache_getnew(str);
4859 
4860  zeroize_NUM(&ent->Num);
4861 
4862  parse_format(ent->format, str, NUM_keywords,
4863  NULL, NUM_index, NUM_FLAG, &ent->Num);
4864 
4865  ent->valid = true;
4866  }
4867  return ent;
4868 }
4869 
4870 /* ----------
4871  * Cache routine for NUM to_char version
4872  * ----------
4873  */
4874 static FormatNode *
4875 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4876 {
4877  FormatNode *format = NULL;
4878  char *str;
4879 
4880  str = text_to_cstring(pars_str);
4881 
4882  if (len > NUM_CACHE_SIZE)
4883  {
4884  /*
4885  * Allocate new memory if format picture is bigger than static cache
4886  * and do not use cache (call parser always)
4887  */
4888  format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4889 
4890  *shouldFree = true;
4891 
4892  zeroize_NUM(Num);
4893 
4894  parse_format(format, str, NUM_keywords,
4895  NULL, NUM_index, NUM_FLAG, Num);
4896  }
4897  else
4898  {
4899  /*
4900  * Use cache buffers
4901  */
4902  NUMCacheEntry *ent = NUM_cache_fetch(str);
4903 
4904  *shouldFree = false;
4905 
4906  format = ent->format;
4907 
4908  /*
4909  * Copy cache to used struct
4910  */
4911  Num->flag = ent->Num.flag;
4912  Num->lsign = ent->Num.lsign;
4913  Num->pre = ent->Num.pre;
4914  Num->post = ent->Num.post;
4915  Num->pre_lsign_num = ent->Num.pre_lsign_num;
4916  Num->need_locale = ent->Num.need_locale;
4917  Num->multi = ent->Num.multi;
4918  Num->zero_start = ent->Num.zero_start;
4919  Num->zero_end = ent->Num.zero_end;
4920  }
4921 
4922 #ifdef DEBUG_TO_FROM_CHAR
4923  /* dump_node(format, len); */
4924  dump_index(NUM_keywords, NUM_index);
4925 #endif
4926 
4927  pfree(str);
4928  return format;
4929 }
4930 
4931 
4932 static char *
4933 int_to_roman(int number)
4934 {
4935  int len = 0,
4936  num = 0;
4937  char *p = NULL,
4938  *result,
4939  numstr[12];
4940 
4941  result = (char *) palloc(16);
4942  *result = '\0';
4943 
4944  if (number > 3999 || number < 1)
4945  {
4946  fill_str(result, '#', 15);
4947  return result;
4948  }
4949  len = snprintf(numstr, sizeof(numstr), "%d", number);
4950 
4951  for (p = numstr; *p != '\0'; p++, --len)
4952  {
4953  num = *p - 49; /* 48 ascii + 1 */
4954  if (num < 0)
4955  continue;
4956 
4957  if (len > 3)
4958  {
4959  while (num-- != -1)
4960  strcat(result, "M");
4961  }
4962  else
4963  {
4964  if (len == 3)
4965  strcat(result, rm100[num]);
4966  else if (len == 2)
4967  strcat(result, rm10[num]);
4968  else if (len == 1)
4969  strcat(result, rm1[num]);
4970  }
4971  }
4972  return result;
4973 }
4974 
4975 
4976 
4977 /* ----------
4978  * Locale
4979  * ----------
4980  */
4981 static void
4983 {
4984  if (Np->Num->need_locale)
4985  {
4986  struct lconv *lconv;
4987 
4988  /*
4989  * Get locales
4990  */
4991  lconv = PGLC_localeconv();
4992 
4993  /*
4994  * Positive / Negative number sign
4995  */
4996  if (lconv->negative_sign && *lconv->negative_sign)
4997  Np->L_negative_sign = lconv->negative_sign;
4998  else
4999  Np->L_negative_sign = "-";
5000 
5001  if (lconv->positive_sign && *lconv->positive_sign)
5002  Np->L_positive_sign = lconv->positive_sign;
5003  else
5004  Np->L_positive_sign = "+";
5005 
5006  /*
5007  * Number decimal point
5008  */
5009  if (lconv->decimal_point && *lconv->decimal_point)
5010  Np->decimal = lconv->decimal_point;
5011 
5012  else
5013  Np->decimal = ".";
5014 
5015  if (!IS_LDECIMAL(Np->Num))
5016  Np->decimal = ".";
5017 
5018  /*
5019  * Number thousands separator
5020  *
5021  * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5022  * but "" for thousands_sep, so we set the thousands_sep too.
5023  * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5024  */
5025  if (lconv->thousands_sep && *lconv->thousands_sep)
5026  Np->L_thousands_sep = lconv->thousands_sep;
5027  /* Make sure thousands separator doesn't match decimal point symbol. */
5028  else if (strcmp(Np->decimal, ",") != 0)
5029  Np->L_thousands_sep = ",";
5030  else
5031  Np->L_thousands_sep = ".";
5032 
5033  /*
5034  * Currency symbol
5035  */
5036  if (lconv->currency_symbol && *lconv->currency_symbol)
5037  Np->L_currency_symbol = lconv->currency_symbol;
5038  else
5039  Np->L_currency_symbol = " ";
5040  }
5041  else
5042  {
5043  /*
5044  * Default values
5045  */
5046  Np->L_negative_sign = "-";
5047  Np->L_positive_sign = "+";
5048  Np->decimal = ".";
5049 
5050  Np->L_thousands_sep = ",";
5051  Np->L_currency_symbol = " ";
5052  }
5053 }
5054 
5055 /* ----------
5056  * Return pointer of last relevant number after decimal point
5057  * 12.0500 --> last relevant is '5'
5058  * 12.0000 --> last relevant is '.'
5059  * If there is no decimal point, return NULL (which will result in same
5060  * behavior as if FM hadn't been specified).
5061  * ----------
5062  */
5063 static char *
5065 {
5066  char *result,
5067  *p = strchr(num, '.');
5068 
5069 #ifdef DEBUG_TO_FROM_CHAR
5070  elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5071 #endif
5072 
5073  if (!p)
5074  return NULL;
5075 
5076  result = p;
5077 
5078  while (*(++p))
5079  {
5080  if (*p != '0')
5081  result = p;
5082  }
5083 
5084  return result;
5085 }
5086 
5087 /*
5088  * These macros are used in NUM_processor() and its subsidiary routines.
5089  * OVERLOAD_TEST: true if we've reached end of input string
5090  * AMOUNT_TEST(s): true if at least s bytes remain in string
5091  */
5092 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
5093 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
5094 
5095 /* ----------
5096  * Number extraction for TO_NUMBER()
5097  * ----------
5098  */
5099 static void
5100 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5101 {
5102  bool isread = false;
5103 
5104 #ifdef DEBUG_TO_FROM_CHAR
5105  elog(DEBUG_elog_output, " --- scan start --- id=%s",
5106  (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5107 #endif
5108 
5109  if (OVERLOAD_TEST)
5110  return;
5111 
5112  if (*Np->inout_p == ' ')
5113  Np->inout_p++;
5114 
5115  if (OVERLOAD_TEST)
5116  return;
5117 
5118  /*
5119  * read sign before number
5120  */
5121  if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5122  (Np->read_pre + Np->read_post) == 0)
5123  {
5124 #ifdef DEBUG_TO_FROM_CHAR
5125  elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5126  *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5127 #endif
5128 
5129  /*
5130  * locale sign
5131  */
5132  if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5133  {
5134  int x = 0;
5135 
5136 #ifdef DEBUG_TO_FROM_CHAR
5137  elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5138 #endif
5139  if ((x = strlen(Np->L_negative_sign)) &&
5140  AMOUNT_TEST(x) &&
5141  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5142  {
5143  Np->inout_p += x;
5144  *Np->number = '-';
5145  }
5146  else if ((x = strlen(Np->L_positive_sign)) &&
5147  AMOUNT_TEST(x) &&
5148  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5149  {
5150  Np->inout_p += x;
5151  *Np->number = '+';
5152  }
5153  }
5154  else
5155  {
5156 #ifdef DEBUG_TO_FROM_CHAR
5157  elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5158 #endif
5159 
5160  /*
5161  * simple + - < >
5162  */
5163  if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5164  *Np->inout_p == '<'))
5165  {
5166  *Np->number = '-'; /* set - */
5167  Np->inout_p++;
5168  }
5169  else if (*Np->inout_p == '+')
5170  {
5171  *Np->number = '+'; /* set + */
5172  Np->inout_p++;
5173  }
5174  }
5175  }
5176 
5177  if (OVERLOAD_TEST)
5178  return;
5179 
5180 #ifdef DEBUG_TO_FROM_CHAR
5181  elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5182 #endif
5183 
5184  /*
5185  * read digit or decimal point
5186  */
5187  if (isdigit((unsigned char) *Np->inout_p))
5188  {
5189  if (Np->read_dec && Np->read_post == Np->Num->post)
5190  return;
5191 
5192  *Np->number_p = *Np->inout_p;
5193  Np->number_p++;
5194 
5195  if (Np->read_dec)
5196  Np->read_post++;
5197  else
5198  Np->read_pre++;
5199 
5200  isread = true;
5201 
5202 #ifdef DEBUG_TO_FROM_CHAR
5203  elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5204 #endif
5205  }
5206  else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5207  {
5208  /*
5209  * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5210  * Np->decimal is always just "." if we don't have a D format token.
5211  * So we just unconditionally match to Np->decimal.
5212  */
5213  int x = strlen(Np->decimal);
5214 
5215 #ifdef DEBUG_TO_FROM_CHAR
5216  elog(DEBUG_elog_output, "Try read decimal point (%c)",
5217  *Np->inout_p);
5218 #endif
5219  if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5220  {
5221  Np->inout_p += x - 1;
5222  *Np->number_p = '.';
5223  Np->number_p++;
5224  Np->read_dec = true;
5225  isread = true;
5226  }
5227  }
5228 
5229  if (OVERLOAD_TEST)
5230  return;
5231 
5232  /*
5233  * Read sign behind "last" number
5234  *
5235  * We need sign detection because determine exact position of post-sign is
5236  * difficult:
5237  *
5238  * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
5239  * 5.01-
5240  */
5241  if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5242  {
5243  /*
5244  * locale sign (NUM_S) is always anchored behind a last number, if: -
5245  * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5246  * next char is not digit
5247  */
5248  if (IS_LSIGN(Np->Num) && isread &&
5249  (Np->inout_p + 1) < Np->inout + input_len &&
5250  !isdigit((unsigned char) *(Np->inout_p + 1)))
5251  {
5252  int x;
5253  char *tmp = Np->inout_p++;
5254 
5255 #ifdef DEBUG_TO_FROM_CHAR
5256  elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5257 #endif
5258  if ((x = strlen(Np->L_negative_sign)) &&
5259  AMOUNT_TEST(x) &&
5260  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5261  {
5262  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5263  *Np->number = '-';
5264  }
5265  else if ((x = strlen(Np->L_positive_sign)) &&
5266  AMOUNT_TEST(x) &&
5267  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5268  {
5269  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5270  *Np->number = '+';
5271  }
5272  if (*Np->number == ' ')
5273  /* no sign read */
5274  Np->inout_p = tmp;
5275  }
5276 
5277  /*
5278  * try read non-locale sign, it's happen only if format is not exact
5279  * and we cannot determine sign position of MI/PL/SG, an example:
5280  *
5281  * FM9.999999MI -> 5.01-
5282  *
5283  *