PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2020, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for roman number to standard number conversion
53  * - add support for number spelling
54  * - add support for string to string formatting (we must be better
55  * than Oracle :-),
56  * to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 
73 /*
74  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75  * declare them in <wchar.h>, so include that too.
76  */
77 #include <wchar.h>
78 #ifdef HAVE_WCTYPE_H
79 #include <wctype.h>
80 #endif
81 
82 #ifdef USE_ICU
83 #include <unicode/ustring.h>
84 #endif
85 
86 #include "catalog/pg_collation.h"
87 #include "catalog/pg_type.h"
88 #include "mb/pg_wchar.h"
89 #include "parser/scansup.h"
90 #include "utils/builtins.h"
91 #include "utils/date.h"
92 #include "utils/datetime.h"
93 #include "utils/float.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/memutils.h"
97 #include "utils/numeric.h"
98 #include "utils/pg_locale.h"
99 
100 /* ----------
101  * Convenience macros for error handling
102  * ----------
103  *
104  * Two macros below help to handle errors in functions that take
105  * 'bool *have_error' argument. When this argument is not NULL, it's expected
106  * that function will suppress ereports when possible. Instead it should
107  * return some default value and set *have_error flag.
108  *
109  * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error
110  * function argument is not NULL, then instead of ereport'ing we set
111  * *have_error flag and go to on_error label. It's supposed that jump
112  * resources will be freed and some 'default' value returned.
113  *
114  * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
115  * It's supposed to be used for immediate exit from the function on error
116  * after call of another function with 'bool *have_error' argument.
117  */
118 #define RETURN_ERROR(throw_error) \
119 do { \
120  if (have_error) \
121  { \
122  *have_error = true; \
123  goto on_error; \
124  } \
125  else \
126  { \
127  throw_error; \
128  } \
129 } while (0)
130 
131 #define CHECK_ERROR \
132 do { \
133  if (have_error && *have_error) \
134  goto on_error; \
135 } while (0)
136 
137 /* ----------
138  * Routines flags
139  * ----------
140  */
141 #define DCH_FLAG 0x1 /* DATE-TIME flag */
142 #define NUM_FLAG 0x2 /* NUMBER flag */
143 #define STD_FLAG 0x4 /* STANDARD flag */
144 
145 /* ----------
146  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
147  * ----------
148  */
149 #define KeyWord_INDEX_SIZE ('~' - ' ')
150 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
151 
152 /* ----------
153  * Maximal length of one node
154  * ----------
155  */
156 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
157 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
158 
159 
160 /* ----------
161  * Format parser structs
162  * ----------
163  */
164 typedef struct
165 {
166  const char *name; /* suffix string */
167  int len, /* suffix length */
168  id, /* used in node->suffix */
169  type; /* prefix / postfix */
170 } KeySuffix;
171 
172 /* ----------
173  * FromCharDateMode
174  * ----------
175  *
176  * This value is used to nominate one of several distinct (and mutually
177  * exclusive) date conventions that a keyword can belong to.
178  */
179 typedef enum
180 {
181  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
182  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
183  FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
185 
186 typedef struct
187 {
188  const char *name;
189  int len;
190  int id;
191  bool is_digit;
193 } KeyWord;
194 
195 typedef struct
196 {
197  uint8 type; /* NODE_TYPE_XXX, see below */
198  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
199  uint8 suffix; /* keyword prefix/suffix code, if any */
200  const KeyWord *key; /* if type is ACTION */
201 } FormatNode;
202 
203 #define NODE_TYPE_END 1
204 #define NODE_TYPE_ACTION 2
205 #define NODE_TYPE_CHAR 3
206 #define NODE_TYPE_SEPARATOR 4
207 #define NODE_TYPE_SPACE 5
208 
209 #define SUFFTYPE_PREFIX 1
210 #define SUFFTYPE_POSTFIX 2
211 
212 #define CLOCK_24_HOUR 0
213 #define CLOCK_12_HOUR 1
214 
215 
216 /* ----------
217  * Full months
218  * ----------
219  */
220 static const char *const months_full[] = {
221  "January", "February", "March", "April", "May", "June", "July",
222  "August", "September", "October", "November", "December", NULL
223 };
224 
225 static const char *const days_short[] = {
226  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
227 };
228 
229 /* ----------
230  * AD / BC
231  * ----------
232  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
233  * positive and map year == -1 to year zero, and shift all negative
234  * years up one. For interval years, we just return the year.
235  */
236 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
237 
238 #define A_D_STR "A.D."
239 #define a_d_STR "a.d."
240 #define AD_STR "AD"
241 #define ad_STR "ad"
242 
243 #define B_C_STR "B.C."
244 #define b_c_STR "b.c."
245 #define BC_STR "BC"
246 #define bc_STR "bc"
247 
248 /*
249  * AD / BC strings for seq_search.
250  *
251  * These are given in two variants, a long form with periods and a standard
252  * form without.
253  *
254  * The array is laid out such that matches for AD have an even index, and
255  * matches for BC have an odd index. So the boolean value for BC is given by
256  * taking the array index of the match, modulo 2.
257  */
258 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
259 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
260 
261 /* ----------
262  * AM / PM
263  * ----------
264  */
265 #define A_M_STR "A.M."
266 #define a_m_STR "a.m."
267 #define AM_STR "AM"
268 #define am_STR "am"
269 
270 #define P_M_STR "P.M."
271 #define p_m_STR "p.m."
272 #define PM_STR "PM"
273 #define pm_STR "pm"
274 
275 /*
276  * AM / PM strings for seq_search.
277  *
278  * These are given in two variants, a long form with periods and a standard
279  * form without.
280  *
281  * The array is laid out such that matches for AM have an even index, and
282  * matches for PM have an odd index. So the boolean value for PM is given by
283  * taking the array index of the match, modulo 2.
284  */
285 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
286 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
287 
288 /* ----------
289  * Months in roman-numeral
290  * (Must be in reverse order for seq_search (in FROM_CHAR), because
291  * 'VIII' must have higher precedence than 'V')
292  * ----------
293  */
294 static const char *const rm_months_upper[] =
295 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
296 
297 static const char *const rm_months_lower[] =
298 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
299 
300 /* ----------
301  * Roman numbers
302  * ----------
303  */
304 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
305 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
306 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
307 
308 /* ----------
309  * Ordinal postfixes
310  * ----------
311  */
312 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
313 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
314 
315 /* ----------
316  * Flags & Options:
317  * ----------
318  */
319 #define TH_UPPER 1
320 #define TH_LOWER 2
321 
322 /* ----------
323  * Number description struct
324  * ----------
325  */
326 typedef struct
327 {
328  int pre, /* (count) numbers before decimal */
329  post, /* (count) numbers after decimal */
330  lsign, /* want locales sign */
331  flag, /* number parameters */
332  pre_lsign_num, /* tmp value for lsign */
333  multi, /* multiplier for 'V' */
334  zero_start, /* position of first zero */
335  zero_end, /* position of last zero */
336  need_locale; /* needs it locale */
337 } NUMDesc;
338 
339 /* ----------
340  * Flags for NUMBER version
341  * ----------
342  */
343 #define NUM_F_DECIMAL (1 << 1)
344 #define NUM_F_LDECIMAL (1 << 2)
345 #define NUM_F_ZERO (1 << 3)
346 #define NUM_F_BLANK (1 << 4)
347 #define NUM_F_FILLMODE (1 << 5)
348 #define NUM_F_LSIGN (1 << 6)
349 #define NUM_F_BRACKET (1 << 7)
350 #define NUM_F_MINUS (1 << 8)
351 #define NUM_F_PLUS (1 << 9)
352 #define NUM_F_ROMAN (1 << 10)
353 #define NUM_F_MULTI (1 << 11)
354 #define NUM_F_PLUS_POST (1 << 12)
355 #define NUM_F_MINUS_POST (1 << 13)
356 #define NUM_F_EEEE (1 << 14)
357 
358 #define NUM_LSIGN_PRE (-1)
359 #define NUM_LSIGN_POST 1
360 #define NUM_LSIGN_NONE 0
361 
362 /* ----------
363  * Tests
364  * ----------
365  */
366 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
367 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
368 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
369 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
370 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
371 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
372 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
373 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
374 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
375 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
376 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
377 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
378 
379 /* ----------
380  * Format picture cache
381  *
382  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
383  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
384  *
385  * For simplicity, the cache entries are fixed-size, so they allow for the
386  * worst case of a FormatNode for each byte in the picture string.
387  *
388  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
389  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
390  * we don't waste too much space by palloc'ing them individually. Be sure
391  * to adjust those macros if you add fields to those structs.
392  *
393  * The max number of entries in each cache is DCH_CACHE_ENTRIES
394  * resp. NUM_CACHE_ENTRIES.
395  * ----------
396  */
397 #define DCH_CACHE_OVERHEAD \
398  MAXALIGN(sizeof(bool) + sizeof(int))
399 #define NUM_CACHE_OVERHEAD \
400  MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
401 
402 #define DCH_CACHE_SIZE \
403  ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
404 #define NUM_CACHE_SIZE \
405  ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
406 
407 #define DCH_CACHE_ENTRIES 20
408 #define NUM_CACHE_ENTRIES 20
409 
410 typedef struct
411 {
413  char str[DCH_CACHE_SIZE + 1];
414  bool std;
415  bool valid;
416  int age;
417 } DCHCacheEntry;
418 
419 typedef struct
420 {
422  char str[NUM_CACHE_SIZE + 1];
423  bool valid;
424  int age;
426 } NUMCacheEntry;
427 
428 /* global cache for date/time format pictures */
430 static int n_DCHCache = 0; /* current number of entries */
431 static int DCHCounter = 0; /* aging-event counter */
432 
433 /* global cache for number format pictures */
435 static int n_NUMCache = 0; /* current number of entries */
436 static int NUMCounter = 0; /* aging-event counter */
437 
438 /* ----------
439  * For char->date/time conversion
440  * ----------
441  */
442 typedef struct
443 {
445  int hh,
446  pm,
447  mi,
448  ss,
449  ssss,
450  d, /* stored as 1-7, Sunday = 1, 0 means missing */
451  dd,
452  ddd,
453  mm,
454  ms,
455  year,
456  bc,
457  ww,
458  w,
459  cc,
460  j,
461  us,
462  yysz, /* is it YY or YYYY ? */
463  clock, /* 12 or 24 hour clock? */
464  tzsign, /* +1, -1 or 0 if timezone info is absent */
465  tzh,
466  tzm,
467  ff; /* fractional precision */
468 } TmFromChar;
469 
470 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
471 
472 /* ----------
473  * Debug
474  * ----------
475  */
476 #ifdef DEBUG_TO_FROM_CHAR
477 #define DEBUG_TMFC(_X) \
478  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
479  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
480  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
481  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
482  (_X)->yysz, (_X)->clock)
483 #define DEBUG_TM(_X) \
484  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
485  (_X)->tm_sec, (_X)->tm_year,\
486  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
487  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
488 #else
489 #define DEBUG_TMFC(_X)
490 #define DEBUG_TM(_X)
491 #endif
492 
493 /* ----------
494  * Datetime to char conversion
495  * ----------
496  */
497 typedef struct TmToChar
498 {
499  struct pg_tm tm; /* classic 'tm' struct */
500  fsec_t fsec; /* fractional seconds */
501  const char *tzn; /* timezone */
502 } TmToChar;
503 
504 #define tmtcTm(_X) (&(_X)->tm)
505 #define tmtcTzn(_X) ((_X)->tzn)
506 #define tmtcFsec(_X) ((_X)->fsec)
507 
508 #define ZERO_tm(_X) \
509 do { \
510  (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
511  (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
512  (_X)->tm_mday = (_X)->tm_mon = 1; \
513  (_X)->tm_zone = NULL; \
514 } while(0)
515 
516 #define ZERO_tmtc(_X) \
517 do { \
518  ZERO_tm( tmtcTm(_X) ); \
519  tmtcFsec(_X) = 0; \
520  tmtcTzn(_X) = NULL; \
521 } while(0)
522 
523 /*
524  * to_char(time) appears to to_char() as an interval, so this check
525  * is really for interval and time data types.
526  */
527 #define INVALID_FOR_INTERVAL \
528 do { \
529  if (is_interval) \
530  ereport(ERROR, \
531  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
532  errmsg("invalid format specification for an interval value"), \
533  errhint("Intervals are not tied to specific calendar dates."))); \
534 } while(0)
535 
536 /*****************************************************************************
537  * KeyWord definitions
538  *****************************************************************************/
539 
540 /* ----------
541  * Suffixes (FormatNode.suffix is an OR of these codes)
542  * ----------
543  */
544 #define DCH_S_FM 0x01
545 #define DCH_S_TH 0x02
546 #define DCH_S_th 0x04
547 #define DCH_S_SP 0x08
548 #define DCH_S_TM 0x10
549 
550 /* ----------
551  * Suffix tests
552  * ----------
553  */
554 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
555 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
556 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
557 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
558 
559 /* Oracle toggles FM behavior, we don't; see docs. */
560 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
561 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
562 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
563 
564 /* ----------
565  * Suffixes definition for DATE-TIME TO/FROM CHAR
566  * ----------
567  */
568 #define TM_SUFFIX_LEN 2
569 
570 static const KeySuffix DCH_suff[] = {
571  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
572  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
573  {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
574  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
575  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
576  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
577  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
578  /* last */
579  {NULL, 0, 0, 0}
580 };
581 
582 
583 /* ----------
584  * Format-pictures (KeyWord).
585  *
586  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
587  * complicated -to-> easy:
588  *
589  * (example: "DDD","DD","Day","D" )
590  *
591  * (this specific sort needs the algorithm for sequential search for strings,
592  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
593  * or "HH12"? You must first try "HH12", because "HH" is in string, but
594  * it is not good.
595  *
596  * (!)
597  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
598  * (!)
599  *
600  * For fast search is used the 'int index[]', index is ascii table from position
601  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
602  * position or -1 if char is not used in the KeyWord. Search example for
603  * string "MM":
604  * 1) see in index to index['M' - 32],
605  * 2) take keywords position (enum DCH_MI) from index
606  * 3) run sequential search in keywords[] from this position
607  *
608  * ----------
609  */
610 
611 typedef enum
612 {
633  DCH_FX, /* global suffix */
722 
723  /* last */
725 } DCH_poz;
726 
727 typedef enum
728 {
765 
766  /* last */
768 } NUM_poz;
769 
770 /* ----------
771  * KeyWords for DATE-TIME version
772  * ----------
773  */
774 static const KeyWord DCH_keywords[] = {
775 /* name, len, id, is_digit, date_mode */
776  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
777  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
778  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
779  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
780  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
781  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
782  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
783  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
784  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
787  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
788  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
789  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
790  {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
791  {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
792  {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
793  {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
794  {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
795  {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
796  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
797  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
798  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
799  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
800  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
801  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
802  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
803  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
804  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
805  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
806  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
807  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
808  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
809  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
810  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
811  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
812  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
813  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
814  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
815  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
816  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
817  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
818  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
819  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
820  {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
821  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
822  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
823  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
824  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
825  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
826  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
827  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
828  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
829  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
830  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
831  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
832  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
833  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
834  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
835  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
836  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
837  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
838  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
839  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
840  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
841  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
842  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
843  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
844  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
845  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
846  {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
847  {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
848  {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
849  {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
850  {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
851  {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
852  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
853  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
854  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
855  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
856  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
857  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
858  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
859  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
860  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
861  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
862  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
863  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
864  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
865  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
866  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
867  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
868  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
869  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
870  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
871  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
872  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
873  {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
874  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
875  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
876  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
877  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
878  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
879  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
880  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
881  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
882  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
883  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
884  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
885 
886  /* last */
887  {NULL, 0, 0, 0, 0}
888 };
889 
890 /* ----------
891  * KeyWords for NUMBER version
892  *
893  * The is_digit and date_mode fields are not relevant here.
894  * ----------
895  */
896 static const KeyWord NUM_keywords[] = {
897 /* name, len, id is in Index */
898  {",", 1, NUM_COMMA}, /* , */
899  {".", 1, NUM_DEC}, /* . */
900  {"0", 1, NUM_0}, /* 0 */
901  {"9", 1, NUM_9}, /* 9 */
902  {"B", 1, NUM_B}, /* B */
903  {"C", 1, NUM_C}, /* C */
904  {"D", 1, NUM_D}, /* D */
905  {"EEEE", 4, NUM_E}, /* E */
906  {"FM", 2, NUM_FM}, /* F */
907  {"G", 1, NUM_G}, /* G */
908  {"L", 1, NUM_L}, /* L */
909  {"MI", 2, NUM_MI}, /* M */
910  {"PL", 2, NUM_PL}, /* P */
911  {"PR", 2, NUM_PR},
912  {"RN", 2, NUM_RN}, /* R */
913  {"SG", 2, NUM_SG}, /* S */
914  {"SP", 2, NUM_SP},
915  {"S", 1, NUM_S},
916  {"TH", 2, NUM_TH}, /* T */
917  {"V", 1, NUM_V}, /* V */
918  {"b", 1, NUM_B}, /* b */
919  {"c", 1, NUM_C}, /* c */
920  {"d", 1, NUM_D}, /* d */
921  {"eeee", 4, NUM_E}, /* e */
922  {"fm", 2, NUM_FM}, /* f */
923  {"g", 1, NUM_G}, /* g */
924  {"l", 1, NUM_L}, /* l */
925  {"mi", 2, NUM_MI}, /* m */
926  {"pl", 2, NUM_PL}, /* p */
927  {"pr", 2, NUM_PR},
928  {"rn", 2, NUM_rn}, /* r */
929  {"sg", 2, NUM_SG}, /* s */
930  {"sp", 2, NUM_SP},
931  {"s", 1, NUM_S},
932  {"th", 2, NUM_th}, /* t */
933  {"v", 1, NUM_V}, /* v */
934 
935  /* last */
936  {NULL, 0, 0}
937 };
938 
939 
940 /* ----------
941  * KeyWords index for DATE-TIME version
942  * ----------
943  */
944 static const int DCH_index[KeyWord_INDEX_SIZE] = {
945 /*
946 0 1 2 3 4 5 6 7 8 9
947 */
948  /*---- first 0..31 chars are skipped ----*/
949 
950  -1, -1, -1, -1, -1, -1, -1, -1,
951  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
952  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
953  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
954  DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
956  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
957  DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
958  -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
959  -1, DCH_y_yyy, -1, -1, -1, -1
960 
961  /*---- chars over 126 are skipped ----*/
962 };
963 
964 /* ----------
965  * KeyWords index for NUMBER version
966  * ----------
967  */
968 static const int NUM_index[KeyWord_INDEX_SIZE] = {
969 /*
970 0 1 2 3 4 5 6 7 8 9
971 */
972  /*---- first 0..31 chars are skipped ----*/
973 
974  -1, -1, -1, -1, -1, -1, -1, -1,
975  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
976  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
977  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
978  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
979  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
980  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
981  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
982  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
983  -1, -1, -1, -1, -1, -1
984 
985  /*---- chars over 126 are skipped ----*/
986 };
987 
988 /* ----------
989  * Number processor struct
990  * ----------
991  */
992 typedef struct NUMProc
993 {
995  NUMDesc *Num; /* number description */
996 
997  int sign, /* '-' or '+' */
998  sign_wrote, /* was sign write */
999  num_count, /* number of write digits */
1000  num_in, /* is inside number */
1001  num_curr, /* current position in number */
1002  out_pre_spaces, /* spaces before first digit */
1003 
1004  read_dec, /* to_number - was read dec. point */
1005  read_post, /* to_number - number of dec. digit */
1006  read_pre; /* to_number - number non-dec. digit */
1007 
1008  char *number, /* string with number */
1009  *number_p, /* pointer to current number position */
1010  *inout, /* in / out buffer */
1011  *inout_p, /* pointer to current inout position */
1012  *last_relevant, /* last relevant number after decimal point */
1013 
1014  *L_negative_sign, /* Locale */
1015  *L_positive_sign,
1016  *decimal,
1017  *L_thousands_sep,
1018  *L_currency_symbol;
1019 } NUMProc;
1020 
1021 /* Return flags for DCH_from_char() */
1022 #define DCH_DATED 0x01
1023 #define DCH_TIMED 0x02
1024 #define DCH_ZONED 0x04
1025 
1026 /* ----------
1027  * Functions
1028  * ----------
1029  */
1030 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1031  const int *index);
1032 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1033 static bool is_separator_char(const char *str);
1034 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1035 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1036  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1037 
1038 static void DCH_to_char(FormatNode *node, bool is_interval,
1039  TmToChar *in, char *out, Oid collid);
1040 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1041  Oid collid, bool std, bool *have_error);
1042 
1043 #ifdef DEBUG_TO_FROM_CHAR
1044 static void dump_index(const KeyWord *k, const int *index);
1045 static void dump_node(FormatNode *node, int max);
1046 #endif
1047 
1048 static const char *get_th(char *num, int type);
1049 static char *str_numth(char *dest, char *num, int type);
1050 static int adjust_partial_year_to_2020(int year);
1051 static int strspace_len(const char *str);
1052 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1053  bool *have_error);
1054 static void from_char_set_int(int *dest, const int value, const FormatNode *node,
1055  bool *have_error);
1056 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1057  FormatNode *node, bool *have_error);
1058 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1059  bool *have_error);
1060 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1061 static int seq_search_localized(const char *name, char **array, int *len,
1062  Oid collid);
1063 static int from_char_seq_search(int *dest, const char **src,
1064  const char *const *array,
1065  char **localized_array, Oid collid,
1066  FormatNode *node, bool *have_error);
1067 static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1068  struct pg_tm *tm, fsec_t *fsec, int *fprec,
1069  uint32 *flags, bool *have_error);
1070 static char *fill_str(char *str, int c, int max);
1071 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1072 static char *int_to_roman(int number);
1073 static void NUM_prepare_locale(NUMProc *Np);
1074 static char *get_last_relevant_decnum(char *num);
1075 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1076 static void NUM_numpart_to_char(NUMProc *Np, int id);
1077 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1078  char *number, int input_len, int to_char_out_pre_spaces,
1079  int sign, bool is_to_char, Oid collid);
1080 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1081 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1082 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1083 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1084 static NUMCacheEntry *NUM_cache_search(const char *str);
1085 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1086 
1087 
1088 /* ----------
1089  * Fast sequential search, use index for data selection which
1090  * go to seq. cycle (it is very fast for unwanted strings)
1091  * (can't be used binary search in format parsing)
1092  * ----------
1093  */
1094 static const KeyWord *
1095 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1096 {
1097  int poz;
1098 
1099  if (!KeyWord_INDEX_FILTER(*str))
1100  return NULL;
1101 
1102  if ((poz = *(index + (*str - ' '))) > -1)
1103  {
1104  const KeyWord *k = kw + poz;
1105 
1106  do
1107  {
1108  if (strncmp(str, k->name, k->len) == 0)
1109  return k;
1110  k++;
1111  if (!k->name)
1112  return NULL;
1113  } while (*str == *k->name);
1114  }
1115  return NULL;
1116 }
1117 
1118 static const KeySuffix *
1119 suff_search(const char *str, const KeySuffix *suf, int type)
1120 {
1121  const KeySuffix *s;
1122 
1123  for (s = suf; s->name != NULL; s++)
1124  {
1125  if (s->type != type)
1126  continue;
1127 
1128  if (strncmp(str, s->name, s->len) == 0)
1129  return s;
1130  }
1131  return NULL;
1132 }
1133 
1134 static bool
1136 {
1137  /* ASCII printable character, but not letter or digit */
1138  return (*str > 0x20 && *str < 0x7F &&
1139  !(*str >= 'A' && *str <= 'Z') &&
1140  !(*str >= 'a' && *str <= 'z') &&
1141  !(*str >= '0' && *str <= '9'));
1142 }
1143 
1144 /* ----------
1145  * Prepare NUMDesc (number description struct) via FormatNode struct
1146  * ----------
1147  */
1148 static void
1150 {
1151  if (n->type != NODE_TYPE_ACTION)
1152  return;
1153 
1154  if (IS_EEEE(num) && n->key->id != NUM_E)
1155  ereport(ERROR,
1156  (errcode(ERRCODE_SYNTAX_ERROR),
1157  errmsg("\"EEEE\" must be the last pattern used")));
1158 
1159  switch (n->key->id)
1160  {
1161  case NUM_9:
1162  if (IS_BRACKET(num))
1163  ereport(ERROR,
1164  (errcode(ERRCODE_SYNTAX_ERROR),
1165  errmsg("\"9\" must be ahead of \"PR\"")));
1166  if (IS_MULTI(num))
1167  {
1168  ++num->multi;
1169  break;
1170  }
1171  if (IS_DECIMAL(num))
1172  ++num->post;
1173  else
1174  ++num->pre;
1175  break;
1176 
1177  case NUM_0:
1178  if (IS_BRACKET(num))
1179  ereport(ERROR,
1180  (errcode(ERRCODE_SYNTAX_ERROR),
1181  errmsg("\"0\" must be ahead of \"PR\"")));
1182  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1183  {
1184  num->flag |= NUM_F_ZERO;
1185  num->zero_start = num->pre + 1;
1186  }
1187  if (!IS_DECIMAL(num))
1188  ++num->pre;
1189  else
1190  ++num->post;
1191 
1192  num->zero_end = num->pre + num->post;
1193  break;
1194 
1195  case NUM_B:
1196  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1197  num->flag |= NUM_F_BLANK;
1198  break;
1199 
1200  case NUM_D:
1201  num->flag |= NUM_F_LDECIMAL;
1202  num->need_locale = true;
1203  /* FALLTHROUGH */
1204  case NUM_DEC:
1205  if (IS_DECIMAL(num))
1206  ereport(ERROR,
1207  (errcode(ERRCODE_SYNTAX_ERROR),
1208  errmsg("multiple decimal points")));
1209  if (IS_MULTI(num))
1210  ereport(ERROR,
1211  (errcode(ERRCODE_SYNTAX_ERROR),
1212  errmsg("cannot use \"V\" and decimal point together")));
1213  num->flag |= NUM_F_DECIMAL;
1214  break;
1215 
1216  case NUM_FM:
1217  num->flag |= NUM_F_FILLMODE;
1218  break;
1219 
1220  case NUM_S:
1221  if (IS_LSIGN(num))
1222  ereport(ERROR,
1223  (errcode(ERRCODE_SYNTAX_ERROR),
1224  errmsg("cannot use \"S\" twice")));
1225  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1226  ereport(ERROR,
1227  (errcode(ERRCODE_SYNTAX_ERROR),
1228  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1229  if (!IS_DECIMAL(num))
1230  {
1231  num->lsign = NUM_LSIGN_PRE;
1232  num->pre_lsign_num = num->pre;
1233  num->need_locale = true;
1234  num->flag |= NUM_F_LSIGN;
1235  }
1236  else if (num->lsign == NUM_LSIGN_NONE)
1237  {
1238  num->lsign = NUM_LSIGN_POST;
1239  num->need_locale = true;
1240  num->flag |= NUM_F_LSIGN;
1241  }
1242  break;
1243 
1244  case NUM_MI:
1245  if (IS_LSIGN(num))
1246  ereport(ERROR,
1247  (errcode(ERRCODE_SYNTAX_ERROR),
1248  errmsg("cannot use \"S\" and \"MI\" together")));
1249  num->flag |= NUM_F_MINUS;
1250  if (IS_DECIMAL(num))
1251  num->flag |= NUM_F_MINUS_POST;
1252  break;
1253 
1254  case NUM_PL:
1255  if (IS_LSIGN(num))
1256  ereport(ERROR,
1257  (errcode(ERRCODE_SYNTAX_ERROR),
1258  errmsg("cannot use \"S\" and \"PL\" together")));
1259  num->flag |= NUM_F_PLUS;
1260  if (IS_DECIMAL(num))
1261  num->flag |= NUM_F_PLUS_POST;
1262  break;
1263 
1264  case NUM_SG:
1265  if (IS_LSIGN(num))
1266  ereport(ERROR,
1267  (errcode(ERRCODE_SYNTAX_ERROR),
1268  errmsg("cannot use \"S\" and \"SG\" together")));
1269  num->flag |= NUM_F_MINUS;
1270  num->flag |= NUM_F_PLUS;
1271  break;
1272 
1273  case NUM_PR:
1274  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1275  ereport(ERROR,
1276  (errcode(ERRCODE_SYNTAX_ERROR),
1277  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1278  num->flag |= NUM_F_BRACKET;
1279  break;
1280 
1281  case NUM_rn:
1282  case NUM_RN:
1283  num->flag |= NUM_F_ROMAN;
1284  break;
1285 
1286  case NUM_L:
1287  case NUM_G:
1288  num->need_locale = true;
1289  break;
1290 
1291  case NUM_V:
1292  if (IS_DECIMAL(num))
1293  ereport(ERROR,
1294  (errcode(ERRCODE_SYNTAX_ERROR),
1295  errmsg("cannot use \"V\" and decimal point together")));
1296  num->flag |= NUM_F_MULTI;
1297  break;
1298 
1299  case NUM_E:
1300  if (IS_EEEE(num))
1301  ereport(ERROR,
1302  (errcode(ERRCODE_SYNTAX_ERROR),
1303  errmsg("cannot use \"EEEE\" twice")));
1304  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1305  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1306  IS_ROMAN(num) || IS_MULTI(num))
1307  ereport(ERROR,
1308  (errcode(ERRCODE_SYNTAX_ERROR),
1309  errmsg("\"EEEE\" is incompatible with other formats"),
1310  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1311  num->flag |= NUM_F_EEEE;
1312  break;
1313  }
1314 }
1315 
1316 /* ----------
1317  * Format parser, search small keywords and keyword's suffixes, and make
1318  * format-node tree.
1319  *
1320  * for DATE-TIME & NUMBER version
1321  * ----------
1322  */
1323 static void
1324 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1325  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1326 {
1327  FormatNode *n;
1328 
1329 #ifdef DEBUG_TO_FROM_CHAR
1330  elog(DEBUG_elog_output, "to_char/number(): run parser");
1331 #endif
1332 
1333  n = node;
1334 
1335  while (*str)
1336  {
1337  int suffix = 0;
1338  const KeySuffix *s;
1339 
1340  /*
1341  * Prefix
1342  */
1343  if ((flags & DCH_FLAG) &&
1344  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1345  {
1346  suffix |= s->id;
1347  if (s->len)
1348  str += s->len;
1349  }
1350 
1351  /*
1352  * Keyword
1353  */
1354  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1355  {
1356  n->type = NODE_TYPE_ACTION;
1357  n->suffix = suffix;
1358  if (n->key->len)
1359  str += n->key->len;
1360 
1361  /*
1362  * NUM version: Prepare global NUMDesc struct
1363  */
1364  if (flags & NUM_FLAG)
1365  NUMDesc_prepare(Num, n);
1366 
1367  /*
1368  * Postfix
1369  */
1370  if ((flags & DCH_FLAG) && *str &&
1371  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1372  {
1373  n->suffix |= s->id;
1374  if (s->len)
1375  str += s->len;
1376  }
1377 
1378  n++;
1379  }
1380  else if (*str)
1381  {
1382  int chlen;
1383 
1384  if ((flags & STD_FLAG) && *str != '"')
1385  {
1386  /*
1387  * Standard mode, allow only following separators: "-./,':; ".
1388  * However, we support double quotes even in standard mode
1389  * (see below). This is our extension of standard mode.
1390  */
1391  if (strchr("-./,':; ", *str) == NULL)
1392  ereport(ERROR,
1393  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1394  errmsg("invalid datetime format separator: \"%s\"",
1395  pnstrdup(str, pg_mblen(str)))));
1396 
1397  if (*str == ' ')
1398  n->type = NODE_TYPE_SPACE;
1399  else
1401 
1402  n->character[0] = *str;
1403  n->character[1] = '\0';
1404  n->key = NULL;
1405  n->suffix = 0;
1406  n++;
1407  str++;
1408  }
1409  else if (*str == '"')
1410  {
1411  /*
1412  * Process double-quoted literal string, if any
1413  */
1414  str++;
1415  while (*str)
1416  {
1417  if (*str == '"')
1418  {
1419  str++;
1420  break;
1421  }
1422  /* backslash quotes the next character, if any */
1423  if (*str == '\\' && *(str + 1))
1424  str++;
1425  chlen = pg_mblen(str);
1426  n->type = NODE_TYPE_CHAR;
1427  memcpy(n->character, str, chlen);
1428  n->character[chlen] = '\0';
1429  n->key = NULL;
1430  n->suffix = 0;
1431  n++;
1432  str += chlen;
1433  }
1434  }
1435  else
1436  {
1437  /*
1438  * Outside double-quoted strings, backslash is only special if
1439  * it immediately precedes a double quote.
1440  */
1441  if (*str == '\\' && *(str + 1) == '"')
1442  str++;
1443  chlen = pg_mblen(str);
1444 
1445  if ((flags & DCH_FLAG) && is_separator_char(str))
1447  else if (isspace((unsigned char) *str))
1448  n->type = NODE_TYPE_SPACE;
1449  else
1450  n->type = NODE_TYPE_CHAR;
1451 
1452  memcpy(n->character, str, chlen);
1453  n->character[chlen] = '\0';
1454  n->key = NULL;
1455  n->suffix = 0;
1456  n++;
1457  str += chlen;
1458  }
1459  }
1460  }
1461 
1462  n->type = NODE_TYPE_END;
1463  n->suffix = 0;
1464 }
1465 
1466 /* ----------
1467  * DEBUG: Dump the FormatNode Tree (debug)
1468  * ----------
1469  */
1470 #ifdef DEBUG_TO_FROM_CHAR
1471 
1472 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1473 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1474 
1475 static void
1476 dump_node(FormatNode *node, int max)
1477 {
1478  FormatNode *n;
1479  int a;
1480 
1481  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1482 
1483  for (a = 0, n = node; a <= max; n++, a++)
1484  {
1485  if (n->type == NODE_TYPE_ACTION)
1486  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1487  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1488  else if (n->type == NODE_TYPE_CHAR)
1489  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1490  a, n->character);
1491  else if (n->type == NODE_TYPE_END)
1492  {
1493  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1494  return;
1495  }
1496  else
1497  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1498  }
1499 }
1500 #endif /* DEBUG */
1501 
1502 /*****************************************************************************
1503  * Private utils
1504  *****************************************************************************/
1505 
1506 /* ----------
1507  * Return ST/ND/RD/TH for simple (1..9) numbers
1508  * type --> 0 upper, 1 lower
1509  * ----------
1510  */
1511 static const char *
1512 get_th(char *num, int type)
1513 {
1514  int len = strlen(num),
1515  last;
1516 
1517  last = *(num + (len - 1));
1518  if (!isdigit((unsigned char) last))
1519  ereport(ERROR,
1520  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1521  errmsg("\"%s\" is not a number", num)));
1522 
1523  /*
1524  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1525  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1526  */
1527  if ((len > 1) && (num[len - 2] == '1'))
1528  last = 0;
1529 
1530  switch (last)
1531  {
1532  case '1':
1533  if (type == TH_UPPER)
1534  return numTH[0];
1535  return numth[0];
1536  case '2':
1537  if (type == TH_UPPER)
1538  return numTH[1];
1539  return numth[1];
1540  case '3':
1541  if (type == TH_UPPER)
1542  return numTH[2];
1543  return numth[2];
1544  default:
1545  if (type == TH_UPPER)
1546  return numTH[3];
1547  return numth[3];
1548  }
1549 }
1550 
1551 /* ----------
1552  * Convert string-number to ordinal string-number
1553  * type --> 0 upper, 1 lower
1554  * ----------
1555  */
1556 static char *
1557 str_numth(char *dest, char *num, int type)
1558 {
1559  if (dest != num)
1560  strcpy(dest, num);
1561  strcat(dest, get_th(num, type));
1562  return dest;
1563 }
1564 
1565 /*****************************************************************************
1566  * upper/lower/initcap functions
1567  *****************************************************************************/
1568 
1569 #ifdef USE_ICU
1570 
1571 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1572  const UChar *src, int32_t srcLength,
1573  const char *locale,
1574  UErrorCode *pErrorCode);
1575 
1576 static int32_t
1577 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1578  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1579 {
1580  UErrorCode status;
1581  int32_t len_dest;
1582 
1583  len_dest = len_source; /* try first with same length */
1584  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1585  status = U_ZERO_ERROR;
1586  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1587  mylocale->info.icu.locale, &status);
1588  if (status == U_BUFFER_OVERFLOW_ERROR)
1589  {
1590  /* try again with adjusted length */
1591  pfree(*buff_dest);
1592  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1593  status = U_ZERO_ERROR;
1594  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1595  mylocale->info.icu.locale, &status);
1596  }
1597  if (U_FAILURE(status))
1598  ereport(ERROR,
1599  (errmsg("case conversion failed: %s", u_errorName(status))));
1600  return len_dest;
1601 }
1602 
1603 static int32_t
1604 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1605  const UChar *src, int32_t srcLength,
1606  const char *locale,
1607  UErrorCode *pErrorCode)
1608 {
1609  return u_strToTitle(dest, destCapacity, src, srcLength,
1610  NULL, locale, pErrorCode);
1611 }
1612 
1613 #endif /* USE_ICU */
1614 
1615 /*
1616  * If the system provides the needed functions for wide-character manipulation
1617  * (which are all standardized by C99), then we implement upper/lower/initcap
1618  * using wide-character functions, if necessary. Otherwise we use the
1619  * traditional <ctype.h> functions, which of course will not work as desired
1620  * in multibyte character sets. Note that in either case we are effectively
1621  * assuming that the database character encoding matches the encoding implied
1622  * by LC_CTYPE.
1623  *
1624  * If the system provides locale_t and associated functions (which are
1625  * standardized by Open Group's XBD), we can support collations that are
1626  * neither default nor C. The code is written to handle both combinations
1627  * of have-wide-characters and have-locale_t, though it's rather unlikely
1628  * a platform would have the latter without the former.
1629  */
1630 
1631 /*
1632  * collation-aware, wide-character-aware lower function
1633  *
1634  * We pass the number of bytes so we can pass varlena and char*
1635  * to this function. The result is a palloc'd, null-terminated string.
1636  */
1637 char *
1638 str_tolower(const char *buff, size_t nbytes, Oid collid)
1639 {
1640  char *result;
1641 
1642  if (!buff)
1643  return NULL;
1644 
1645  /* C/POSIX collations use this path regardless of database encoding */
1646  if (lc_ctype_is_c(collid))
1647  {
1648  result = asc_tolower(buff, nbytes);
1649  }
1650  else
1651  {
1652  pg_locale_t mylocale = 0;
1653 
1654  if (collid != DEFAULT_COLLATION_OID)
1655  {
1656  if (!OidIsValid(collid))
1657  {
1658  /*
1659  * This typically means that the parser could not resolve a
1660  * conflict of implicit collations, so report it that way.
1661  */
1662  ereport(ERROR,
1663  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1664  errmsg("could not determine which collation to use for %s function",
1665  "lower()"),
1666  errhint("Use the COLLATE clause to set the collation explicitly.")));
1667  }
1668  mylocale = pg_newlocale_from_collation(collid);
1669  }
1670 
1671 #ifdef USE_ICU
1672  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1673  {
1674  int32_t len_uchar;
1675  int32_t len_conv;
1676  UChar *buff_uchar;
1677  UChar *buff_conv;
1678 
1679  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1680  len_conv = icu_convert_case(u_strToLower, mylocale,
1681  &buff_conv, buff_uchar, len_uchar);
1682  icu_from_uchar(&result, buff_conv, len_conv);
1683  pfree(buff_uchar);
1684  pfree(buff_conv);
1685  }
1686  else
1687 #endif
1688  {
1690  {
1691  wchar_t *workspace;
1692  size_t curr_char;
1693  size_t result_size;
1694 
1695  /* Overflow paranoia */
1696  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1697  ereport(ERROR,
1698  (errcode(ERRCODE_OUT_OF_MEMORY),
1699  errmsg("out of memory")));
1700 
1701  /* Output workspace cannot have more codes than input bytes */
1702  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1703 
1704  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1705 
1706  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1707  {
1708 #ifdef HAVE_LOCALE_T
1709  if (mylocale)
1710  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1711  else
1712 #endif
1713  workspace[curr_char] = towlower(workspace[curr_char]);
1714  }
1715 
1716  /*
1717  * Make result large enough; case change might change number
1718  * of bytes
1719  */
1720  result_size = curr_char * pg_database_encoding_max_length() + 1;
1721  result = palloc(result_size);
1722 
1723  wchar2char(result, workspace, result_size, mylocale);
1724  pfree(workspace);
1725  }
1726  else
1727  {
1728  char *p;
1729 
1730  result = pnstrdup(buff, nbytes);
1731 
1732  /*
1733  * Note: we assume that tolower_l() will not be so broken as
1734  * to need an isupper_l() guard test. When using the default
1735  * collation, we apply the traditional Postgres behavior that
1736  * forces ASCII-style treatment of I/i, but in non-default
1737  * collations you get exactly what the collation says.
1738  */
1739  for (p = result; *p; p++)
1740  {
1741 #ifdef HAVE_LOCALE_T
1742  if (mylocale)
1743  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1744  else
1745 #endif
1746  *p = pg_tolower((unsigned char) *p);
1747  }
1748  }
1749  }
1750  }
1751 
1752  return result;
1753 }
1754 
1755 /*
1756  * collation-aware, wide-character-aware upper function
1757  *
1758  * We pass the number of bytes so we can pass varlena and char*
1759  * to this function. The result is a palloc'd, null-terminated string.
1760  */
1761 char *
1762 str_toupper(const char *buff, size_t nbytes, Oid collid)
1763 {
1764  char *result;
1765 
1766  if (!buff)
1767  return NULL;
1768 
1769  /* C/POSIX collations use this path regardless of database encoding */
1770  if (lc_ctype_is_c(collid))
1771  {
1772  result = asc_toupper(buff, nbytes);
1773  }
1774  else
1775  {
1776  pg_locale_t mylocale = 0;
1777 
1778  if (collid != DEFAULT_COLLATION_OID)
1779  {
1780  if (!OidIsValid(collid))
1781  {
1782  /*
1783  * This typically means that the parser could not resolve a
1784  * conflict of implicit collations, so report it that way.
1785  */
1786  ereport(ERROR,
1787  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1788  errmsg("could not determine which collation to use for %s function",
1789  "upper()"),
1790  errhint("Use the COLLATE clause to set the collation explicitly.")));
1791  }
1792  mylocale = pg_newlocale_from_collation(collid);
1793  }
1794 
1795 #ifdef USE_ICU
1796  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1797  {
1798  int32_t len_uchar,
1799  len_conv;
1800  UChar *buff_uchar;
1801  UChar *buff_conv;
1802 
1803  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1804  len_conv = icu_convert_case(u_strToUpper, mylocale,
1805  &buff_conv, buff_uchar, len_uchar);
1806  icu_from_uchar(&result, buff_conv, len_conv);
1807  pfree(buff_uchar);
1808  pfree(buff_conv);
1809  }
1810  else
1811 #endif
1812  {
1814  {
1815  wchar_t *workspace;
1816  size_t curr_char;
1817  size_t result_size;
1818 
1819  /* Overflow paranoia */
1820  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1821  ereport(ERROR,
1822  (errcode(ERRCODE_OUT_OF_MEMORY),
1823  errmsg("out of memory")));
1824 
1825  /* Output workspace cannot have more codes than input bytes */
1826  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1827 
1828  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1829 
1830  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1831  {
1832 #ifdef HAVE_LOCALE_T
1833  if (mylocale)
1834  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1835  else
1836 #endif
1837  workspace[curr_char] = towupper(workspace[curr_char]);
1838  }
1839 
1840  /*
1841  * Make result large enough; case change might change number
1842  * of bytes
1843  */
1844  result_size = curr_char * pg_database_encoding_max_length() + 1;
1845  result = palloc(result_size);
1846 
1847  wchar2char(result, workspace, result_size, mylocale);
1848  pfree(workspace);
1849  }
1850  else
1851  {
1852  char *p;
1853 
1854  result = pnstrdup(buff, nbytes);
1855 
1856  /*
1857  * Note: we assume that toupper_l() will not be so broken as
1858  * to need an islower_l() guard test. When using the default
1859  * collation, we apply the traditional Postgres behavior that
1860  * forces ASCII-style treatment of I/i, but in non-default
1861  * collations you get exactly what the collation says.
1862  */
1863  for (p = result; *p; p++)
1864  {
1865 #ifdef HAVE_LOCALE_T
1866  if (mylocale)
1867  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1868  else
1869 #endif
1870  *p = pg_toupper((unsigned char) *p);
1871  }
1872  }
1873  }
1874  }
1875 
1876  return result;
1877 }
1878 
1879 /*
1880  * collation-aware, wide-character-aware initcap function
1881  *
1882  * We pass the number of bytes so we can pass varlena and char*
1883  * to this function. The result is a palloc'd, null-terminated string.
1884  */
1885 char *
1886 str_initcap(const char *buff, size_t nbytes, Oid collid)
1887 {
1888  char *result;
1889  int wasalnum = false;
1890 
1891  if (!buff)
1892  return NULL;
1893 
1894  /* C/POSIX collations use this path regardless of database encoding */
1895  if (lc_ctype_is_c(collid))
1896  {
1897  result = asc_initcap(buff, nbytes);
1898  }
1899  else
1900  {
1901  pg_locale_t mylocale = 0;
1902 
1903  if (collid != DEFAULT_COLLATION_OID)
1904  {
1905  if (!OidIsValid(collid))
1906  {
1907  /*
1908  * This typically means that the parser could not resolve a
1909  * conflict of implicit collations, so report it that way.
1910  */
1911  ereport(ERROR,
1912  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1913  errmsg("could not determine which collation to use for %s function",
1914  "initcap()"),
1915  errhint("Use the COLLATE clause to set the collation explicitly.")));
1916  }
1917  mylocale = pg_newlocale_from_collation(collid);
1918  }
1919 
1920 #ifdef USE_ICU
1921  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1922  {
1923  int32_t len_uchar,
1924  len_conv;
1925  UChar *buff_uchar;
1926  UChar *buff_conv;
1927 
1928  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1929  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1930  &buff_conv, buff_uchar, len_uchar);
1931  icu_from_uchar(&result, buff_conv, len_conv);
1932  pfree(buff_uchar);
1933  pfree(buff_conv);
1934  }
1935  else
1936 #endif
1937  {
1939  {
1940  wchar_t *workspace;
1941  size_t curr_char;
1942  size_t result_size;
1943 
1944  /* Overflow paranoia */
1945  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1946  ereport(ERROR,
1947  (errcode(ERRCODE_OUT_OF_MEMORY),
1948  errmsg("out of memory")));
1949 
1950  /* Output workspace cannot have more codes than input bytes */
1951  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1952 
1953  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1954 
1955  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1956  {
1957 #ifdef HAVE_LOCALE_T
1958  if (mylocale)
1959  {
1960  if (wasalnum)
1961  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1962  else
1963  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1964  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1965  }
1966  else
1967 #endif
1968  {
1969  if (wasalnum)
1970  workspace[curr_char] = towlower(workspace[curr_char]);
1971  else
1972  workspace[curr_char] = towupper(workspace[curr_char]);
1973  wasalnum = iswalnum(workspace[curr_char]);
1974  }
1975  }
1976 
1977  /*
1978  * Make result large enough; case change might change number
1979  * of bytes
1980  */
1981  result_size = curr_char * pg_database_encoding_max_length() + 1;
1982  result = palloc(result_size);
1983 
1984  wchar2char(result, workspace, result_size, mylocale);
1985  pfree(workspace);
1986  }
1987  else
1988  {
1989  char *p;
1990 
1991  result = pnstrdup(buff, nbytes);
1992 
1993  /*
1994  * Note: we assume that toupper_l()/tolower_l() will not be so
1995  * broken as to need guard tests. When using the default
1996  * collation, we apply the traditional Postgres behavior that
1997  * forces ASCII-style treatment of I/i, but in non-default
1998  * collations you get exactly what the collation says.
1999  */
2000  for (p = result; *p; p++)
2001  {
2002 #ifdef HAVE_LOCALE_T
2003  if (mylocale)
2004  {
2005  if (wasalnum)
2006  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2007  else
2008  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2009  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2010  }
2011  else
2012 #endif
2013  {
2014  if (wasalnum)
2015  *p = pg_tolower((unsigned char) *p);
2016  else
2017  *p = pg_toupper((unsigned char) *p);
2018  wasalnum = isalnum((unsigned char) *p);
2019  }
2020  }
2021  }
2022  }
2023  }
2024 
2025  return result;
2026 }
2027 
2028 /*
2029  * ASCII-only lower function
2030  *
2031  * We pass the number of bytes so we can pass varlena and char*
2032  * to this function. The result is a palloc'd, null-terminated string.
2033  */
2034 char *
2035 asc_tolower(const char *buff, size_t nbytes)
2036 {
2037  char *result;
2038  char *p;
2039 
2040  if (!buff)
2041  return NULL;
2042 
2043  result = pnstrdup(buff, nbytes);
2044 
2045  for (p = result; *p; p++)
2046  *p = pg_ascii_tolower((unsigned char) *p);
2047 
2048  return result;
2049 }
2050 
2051 /*
2052  * ASCII-only upper function
2053  *
2054  * We pass the number of bytes so we can pass varlena and char*
2055  * to this function. The result is a palloc'd, null-terminated string.
2056  */
2057 char *
2058 asc_toupper(const char *buff, size_t nbytes)
2059 {
2060  char *result;
2061  char *p;
2062 
2063  if (!buff)
2064  return NULL;
2065 
2066  result = pnstrdup(buff, nbytes);
2067 
2068  for (p = result; *p; p++)
2069  *p = pg_ascii_toupper((unsigned char) *p);
2070 
2071  return result;
2072 }
2073 
2074 /*
2075  * ASCII-only initcap function
2076  *
2077  * We pass the number of bytes so we can pass varlena and char*
2078  * to this function. The result is a palloc'd, null-terminated string.
2079  */
2080 char *
2081 asc_initcap(const char *buff, size_t nbytes)
2082 {
2083  char *result;
2084  char *p;
2085  int wasalnum = false;
2086 
2087  if (!buff)
2088  return NULL;
2089 
2090  result = pnstrdup(buff, nbytes);
2091 
2092  for (p = result; *p; p++)
2093  {
2094  char c;
2095 
2096  if (wasalnum)
2097  *p = c = pg_ascii_tolower((unsigned char) *p);
2098  else
2099  *p = c = pg_ascii_toupper((unsigned char) *p);
2100  /* we don't trust isalnum() here */
2101  wasalnum = ((c >= 'A' && c <= 'Z') ||
2102  (c >= 'a' && c <= 'z') ||
2103  (c >= '0' && c <= '9'));
2104  }
2105 
2106  return result;
2107 }
2108 
2109 /* convenience routines for when the input is null-terminated */
2110 
2111 static char *
2112 str_tolower_z(const char *buff, Oid collid)
2113 {
2114  return str_tolower(buff, strlen(buff), collid);
2115 }
2116 
2117 static char *
2118 str_toupper_z(const char *buff, Oid collid)
2119 {
2120  return str_toupper(buff, strlen(buff), collid);
2121 }
2122 
2123 static char *
2124 str_initcap_z(const char *buff, Oid collid)
2125 {
2126  return str_initcap(buff, strlen(buff), collid);
2127 }
2128 
2129 static char *
2130 asc_tolower_z(const char *buff)
2131 {
2132  return asc_tolower(buff, strlen(buff));
2133 }
2134 
2135 static char *
2136 asc_toupper_z(const char *buff)
2137 {
2138  return asc_toupper(buff, strlen(buff));
2139 }
2140 
2141 /* asc_initcap_z is not currently needed */
2142 
2143 
2144 /* ----------
2145  * Skip TM / th in FROM_CHAR
2146  *
2147  * If S_THth is on, skip two chars, assuming there are two available
2148  * ----------
2149  */
2150 #define SKIP_THth(ptr, _suf) \
2151  do { \
2152  if (S_THth(_suf)) \
2153  { \
2154  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2155  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2156  } \
2157  } while (0)
2158 
2159 
2160 #ifdef DEBUG_TO_FROM_CHAR
2161 /* -----------
2162  * DEBUG: Call for debug and for index checking; (Show ASCII char
2163  * and defined keyword for each used position
2164  * ----------
2165  */
2166 static void
2167 dump_index(const KeyWord *k, const int *index)
2168 {
2169  int i,
2170  count = 0,
2171  free_i = 0;
2172 
2173  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2174 
2175  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2176  {
2177  if (index[i] != -1)
2178  {
2179  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2180  count++;
2181  }
2182  else
2183  {
2184  free_i++;
2185  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2186  }
2187  }
2188  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2189  count, free_i);
2190 }
2191 #endif /* DEBUG */
2192 
2193 /* ----------
2194  * Return true if next format picture is not digit value
2195  * ----------
2196  */
2197 static bool
2199 {
2200  if (n->type == NODE_TYPE_END)
2201  return false;
2202 
2203  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2204  return true;
2205 
2206  /*
2207  * Next node
2208  */
2209  n++;
2210 
2211  /* end of format string is treated like a non-digit separator */
2212  if (n->type == NODE_TYPE_END)
2213  return true;
2214 
2215  if (n->type == NODE_TYPE_ACTION)
2216  {
2217  if (n->key->is_digit)
2218  return false;
2219 
2220  return true;
2221  }
2222  else if (n->character[1] == '\0' &&
2223  isdigit((unsigned char) n->character[0]))
2224  return false;
2225 
2226  return true; /* some non-digit input (separator) */
2227 }
2228 
2229 
2230 static int
2232 {
2233  /*
2234  * Adjust all dates toward 2020; this is effectively what happens when we
2235  * assume '70' is 1970 and '69' is 2069.
2236  */
2237  /* Force 0-69 into the 2000's */
2238  if (year < 70)
2239  return year + 2000;
2240  /* Force 70-99 into the 1900's */
2241  else if (year < 100)
2242  return year + 1900;
2243  /* Force 100-519 into the 2000's */
2244  else if (year < 520)
2245  return year + 2000;
2246  /* Force 520-999 into the 1000's */
2247  else if (year < 1000)
2248  return year + 1000;
2249  else
2250  return year;
2251 }
2252 
2253 
2254 static int
2255 strspace_len(const char *str)
2256 {
2257  int len = 0;
2258 
2259  while (*str && isspace((unsigned char) *str))
2260  {
2261  str++;
2262  len++;
2263  }
2264  return len;
2265 }
2266 
2267 /*
2268  * Set the date mode of a from-char conversion.
2269  *
2270  * Puke if the date mode has already been set, and the caller attempts to set
2271  * it to a conflicting mode.
2272  *
2273  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2274  */
2275 static void
2276 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
2277 {
2278  if (mode != FROM_CHAR_DATE_NONE)
2279  {
2280  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2281  tmfc->mode = mode;
2282  else if (tmfc->mode != mode)
2284  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2285  errmsg("invalid combination of date conventions"),
2286  errhint("Do not mix Gregorian and ISO week date "
2287  "conventions in a formatting template."))));
2288  }
2289 
2290 on_error:
2291  return;
2292 }
2293 
2294 /*
2295  * Set the integer pointed to by 'dest' to the given value.
2296  *
2297  * Puke if the destination integer has previously been set to some other
2298  * non-zero value.
2299  *
2300  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2301  */
2302 static void
2303 from_char_set_int(int *dest, const int value, const FormatNode *node,
2304  bool *have_error)
2305 {
2306  if (*dest != 0 && *dest != value)
2308  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2309  errmsg("conflicting values for \"%s\" field in "
2310  "formatting string",
2311  node->key->name),
2312  errdetail("This value contradicts a previous setting "
2313  "for the same field type."))));
2314  *dest = value;
2315 
2316 on_error:
2317  return;
2318 }
2319 
2320 /*
2321  * Read a single integer from the source string, into the int pointed to by
2322  * 'dest'. If 'dest' is NULL, the result is discarded.
2323  *
2324  * In fixed-width mode (the node does not have the FM suffix), consume at most
2325  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2326  *
2327  * We use strtol() to recover the integer value from the source string, in
2328  * accordance with the given FormatNode.
2329  *
2330  * If the conversion completes successfully, src will have been advanced to
2331  * point at the character immediately following the last character used in the
2332  * conversion.
2333  *
2334  * Return the number of characters consumed.
2335  *
2336  * Note that from_char_parse_int() provides a more convenient wrapper where
2337  * the length of the field is the same as the length of the format keyword (as
2338  * with DD and MI).
2339  *
2340  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
2341  * and -1 is returned.
2342  */
2343 static int
2344 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2345  bool *have_error)
2346 {
2347  long result;
2348  char copy[DCH_MAX_ITEM_SIZ + 1];
2349  const char *init = *src;
2350  int used;
2351 
2352  /*
2353  * Skip any whitespace before parsing the integer.
2354  */
2355  *src += strspace_len(*src);
2356 
2357  Assert(len <= DCH_MAX_ITEM_SIZ);
2358  used = (int) strlcpy(copy, *src, len + 1);
2359 
2360  if (S_FM(node->suffix) || is_next_separator(node))
2361  {
2362  /*
2363  * This node is in Fill Mode, or the next node is known to be a
2364  * non-digit value, so we just slurp as many characters as we can get.
2365  */
2366  char *endptr;
2367 
2368  errno = 0;
2369  result = strtol(init, &endptr, 10);
2370  *src = endptr;
2371  }
2372  else
2373  {
2374  /*
2375  * We need to pull exactly the number of characters given in 'len' out
2376  * of the string, and convert those.
2377  */
2378  char *last;
2379 
2380  if (used < len)
2382  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2383  errmsg("source string too short for \"%s\" "
2384  "formatting field",
2385  node->key->name),
2386  errdetail("Field requires %d characters, "
2387  "but only %d remain.",
2388  len, used),
2389  errhint("If your source string is not fixed-width, "
2390  "try using the \"FM\" modifier."))));
2391 
2392  errno = 0;
2393  result = strtol(copy, &last, 10);
2394  used = last - copy;
2395 
2396  if (used > 0 && used < len)
2398  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2399  errmsg("invalid value \"%s\" for \"%s\"",
2400  copy, node->key->name),
2401  errdetail("Field requires %d characters, "
2402  "but only %d could be parsed.",
2403  len, used),
2404  errhint("If your source string is not fixed-width, "
2405  "try using the \"FM\" modifier."))));
2406 
2407  *src += used;
2408  }
2409 
2410  if (*src == init)
2412  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2413  errmsg("invalid value \"%s\" for \"%s\"",
2414  copy, node->key->name),
2415  errdetail("Value must be an integer."))));
2416 
2417  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2419  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2420  errmsg("value for \"%s\" in source string is out of range",
2421  node->key->name),
2422  errdetail("Value must be in the range %d to %d.",
2423  INT_MIN, INT_MAX))));
2424 
2425  if (dest != NULL)
2426  {
2427  from_char_set_int(dest, (int) result, node, have_error);
2428  CHECK_ERROR;
2429  }
2430 
2431  return *src - init;
2432 
2433 on_error:
2434  return -1;
2435 }
2436 
2437 /*
2438  * Call from_char_parse_int_len(), using the length of the format keyword as
2439  * the expected length of the field.
2440  *
2441  * Don't call this function if the field differs in length from the format
2442  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2443  * In such cases, call from_char_parse_int_len() instead to specify the
2444  * required length explicitly.
2445  */
2446 static int
2447 from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
2448 {
2449  return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
2450 }
2451 
2452 /*
2453  * Sequentially search null-terminated "array" for a case-insensitive match
2454  * to the initial character(s) of "name".
2455  *
2456  * Returns array index of match, or -1 for no match.
2457  *
2458  * *len is set to the length of the match, or 0 for no match.
2459  *
2460  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2461  * suitable for comparisons to ASCII strings.
2462  */
2463 static int
2464 seq_search_ascii(const char *name, const char *const *array, int *len)
2465 {
2466  unsigned char firstc;
2467  const char *const *a;
2468 
2469  *len = 0;
2470 
2471  /* empty string can't match anything */
2472  if (!*name)
2473  return -1;
2474 
2475  /* we handle first char specially to gain some speed */
2476  firstc = pg_ascii_tolower((unsigned char) *name);
2477 
2478  for (a = array; *a != NULL; a++)
2479  {
2480  const char *p;
2481  const char *n;
2482 
2483  /* compare first chars */
2484  if (pg_ascii_tolower((unsigned char) **a) != firstc)
2485  continue;
2486 
2487  /* compare rest of string */
2488  for (p = *a + 1, n = name + 1;; p++, n++)
2489  {
2490  /* return success if we matched whole array entry */
2491  if (*p == '\0')
2492  {
2493  *len = n - name;
2494  return a - array;
2495  }
2496  /* else, must have another character in "name" ... */
2497  if (*n == '\0')
2498  break;
2499  /* ... and it must match */
2500  if (pg_ascii_tolower((unsigned char) *p) !=
2501  pg_ascii_tolower((unsigned char) *n))
2502  break;
2503  }
2504  }
2505 
2506  return -1;
2507 }
2508 
2509 /*
2510  * Sequentially search an array of possibly non-English words for
2511  * a case-insensitive match to the initial character(s) of "name".
2512  *
2513  * This has the same API as seq_search_ascii(), but we use a more general
2514  * case-folding transformation to achieve case-insensitivity. Case folding
2515  * is done per the rules of the collation identified by "collid".
2516  *
2517  * The array is treated as const, but we don't declare it that way because
2518  * the arrays exported by pg_locale.c aren't const.
2519  */
2520 static int
2521 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2522 {
2523  char **a;
2524  char *upper_name;
2525  char *lower_name;
2526 
2527  *len = 0;
2528 
2529  /* empty string can't match anything */
2530  if (!*name)
2531  return -1;
2532 
2533  /*
2534  * The case-folding processing done below is fairly expensive, so before
2535  * doing that, make a quick pass to see if there is an exact match.
2536  */
2537  for (a = array; *a != NULL; a++)
2538  {
2539  int element_len = strlen(*a);
2540 
2541  if (strncmp(name, *a, element_len) == 0)
2542  {
2543  *len = element_len;
2544  return a - array;
2545  }
2546  }
2547 
2548  /*
2549  * Fold to upper case, then to lower case, so that we can match reliably
2550  * even in languages in which case conversions are not injective.
2551  */
2552  upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
2553  lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2554  pfree(upper_name);
2555 
2556  for (a = array; *a != NULL; a++)
2557  {
2558  char *upper_element;
2559  char *lower_element;
2560  int element_len;
2561 
2562  /* Likewise upper/lower-case array element */
2563  upper_element = str_toupper(*a, strlen(*a), collid);
2564  lower_element = str_tolower(upper_element, strlen(upper_element),
2565  collid);
2566  pfree(upper_element);
2567  element_len = strlen(lower_element);
2568 
2569  /* Match? */
2570  if (strncmp(lower_name, lower_element, element_len) == 0)
2571  {
2572  *len = element_len;
2573  pfree(lower_element);
2574  pfree(lower_name);
2575  return a - array;
2576  }
2577  pfree(lower_element);
2578  }
2579 
2580  pfree(lower_name);
2581  return -1;
2582 }
2583 
2584 /*
2585  * Perform a sequential search in 'array' (or 'localized_array', if that's
2586  * not NULL) for an entry matching the first character(s) of the 'src'
2587  * string case-insensitively.
2588  *
2589  * The 'array' is presumed to be English words (all-ASCII), but
2590  * if 'localized_array' is supplied, that might be non-English
2591  * so we need a more expensive case-folding transformation
2592  * (which will follow the rules of the collation 'collid').
2593  *
2594  * If a match is found, copy the array index of the match into the integer
2595  * pointed to by 'dest', advance 'src' to the end of the part of the string
2596  * which matched, and return the number of characters consumed.
2597  *
2598  * If the string doesn't match, throw an error if 'have_error' is NULL,
2599  * otherwise set '*have_error' and return -1.
2600  *
2601  * 'node' is used only for error reports: node->key->name identifies the
2602  * field type we were searching for.
2603  */
2604 static int
2605 from_char_seq_search(int *dest, const char **src, const char *const *array,
2606  char **localized_array, Oid collid,
2607  FormatNode *node, bool *have_error)
2608 {
2609  int len;
2610 
2611  if (localized_array == NULL)
2612  *dest = seq_search_ascii(*src, array, &len);
2613  else
2614  *dest = seq_search_localized(*src, localized_array, &len, collid);
2615 
2616  if (len <= 0)
2617  {
2618  /*
2619  * In the error report, truncate the string at the next whitespace (if
2620  * any) to avoid including irrelevant data.
2621  */
2622  char *copy = pstrdup(*src);
2623  char *c;
2624 
2625  for (c = copy; *c; c++)
2626  {
2627  if (scanner_isspace(*c))
2628  {
2629  *c = '\0';
2630  break;
2631  }
2632  }
2633 
2635  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2636  errmsg("invalid value \"%s\" for \"%s\"",
2637  copy, node->key->name),
2638  errdetail("The given value did not match any of "
2639  "the allowed values for this field."))));
2640  }
2641  *src += len;
2642  return len;
2643 
2644 on_error:
2645  return -1;
2646 }
2647 
2648 /* ----------
2649  * Process a TmToChar struct as denoted by a list of FormatNodes.
2650  * The formatted data is written to the string pointed to by 'out'.
2651  * ----------
2652  */
2653 static void
2654 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2655 {
2656  FormatNode *n;
2657  char *s;
2658  struct pg_tm *tm = &in->tm;
2659  int i;
2660 
2661  /* cache localized days and months */
2663 
2664  s = out;
2665  for (n = node; n->type != NODE_TYPE_END; n++)
2666  {
2667  if (n->type != NODE_TYPE_ACTION)
2668  {
2669  strcpy(s, n->character);
2670  s += strlen(s);
2671  continue;
2672  }
2673 
2674  switch (n->key->id)
2675  {
2676  case DCH_A_M:
2677  case DCH_P_M:
2678  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2679  ? P_M_STR : A_M_STR);
2680  s += strlen(s);
2681  break;
2682  case DCH_AM:
2683  case DCH_PM:
2684  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2685  ? PM_STR : AM_STR);
2686  s += strlen(s);
2687  break;
2688  case DCH_a_m:
2689  case DCH_p_m:
2690  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2691  ? p_m_STR : a_m_STR);
2692  s += strlen(s);
2693  break;
2694  case DCH_am:
2695  case DCH_pm:
2696  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2697  ? pm_STR : am_STR);
2698  s += strlen(s);
2699  break;
2700  case DCH_HH:
2701  case DCH_HH12:
2702 
2703  /*
2704  * display time as shown on a 12-hour clock, even for
2705  * intervals
2706  */
2707  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2708  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2709  tm->tm_hour % (HOURS_PER_DAY / 2));
2710  if (S_THth(n->suffix))
2711  str_numth(s, s, S_TH_TYPE(n->suffix));
2712  s += strlen(s);
2713  break;
2714  case DCH_HH24:
2715  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2716  tm->tm_hour);
2717  if (S_THth(n->suffix))
2718  str_numth(s, s, S_TH_TYPE(n->suffix));
2719  s += strlen(s);
2720  break;
2721  case DCH_MI:
2722  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2723  tm->tm_min);
2724  if (S_THth(n->suffix))
2725  str_numth(s, s, S_TH_TYPE(n->suffix));
2726  s += strlen(s);
2727  break;
2728  case DCH_SS:
2729  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2730  tm->tm_sec);
2731  if (S_THth(n->suffix))
2732  str_numth(s, s, S_TH_TYPE(n->suffix));
2733  s += strlen(s);
2734  break;
2735 
2736 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2737  sprintf(s, frac_fmt, (int) (frac_val)); \
2738  if (S_THth(n->suffix)) \
2739  str_numth(s, s, S_TH_TYPE(n->suffix)); \
2740  s += strlen(s)
2741 
2742  case DCH_FF1: /* tenth of second */
2743  DCH_to_char_fsec("%01d", in->fsec / 100000);
2744  break;
2745  case DCH_FF2: /* hundredth of second */
2746  DCH_to_char_fsec("%02d", in->fsec / 10000);
2747  break;
2748  case DCH_FF3:
2749  case DCH_MS: /* millisecond */
2750  DCH_to_char_fsec("%03d", in->fsec / 1000);
2751  break;
2752  case DCH_FF4: /* tenth of a millisecond */
2753  DCH_to_char_fsec("%04d", in->fsec / 100);
2754  break;
2755  case DCH_FF5: /* hundredth of a millisecond */
2756  DCH_to_char_fsec("%05d", in->fsec / 10);
2757  break;
2758  case DCH_FF6:
2759  case DCH_US: /* microsecond */
2760  DCH_to_char_fsec("%06d", in->fsec);
2761  break;
2762 #undef DCH_to_char_fsec
2763  case DCH_SSSS:
2764  sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2765  tm->tm_min * SECS_PER_MINUTE +
2766  tm->tm_sec);
2767  if (S_THth(n->suffix))
2768  str_numth(s, s, S_TH_TYPE(n->suffix));
2769  s += strlen(s);
2770  break;
2771  case DCH_tz:
2773  if (tmtcTzn(in))
2774  {
2775  /* We assume here that timezone names aren't localized */
2776  char *p = asc_tolower_z(tmtcTzn(in));
2777 
2778  strcpy(s, p);
2779  pfree(p);
2780  s += strlen(s);
2781  }
2782  break;
2783  case DCH_TZ:
2785  if (tmtcTzn(in))
2786  {
2787  strcpy(s, tmtcTzn(in));
2788  s += strlen(s);
2789  }
2790  break;
2791  case DCH_TZH:
2793  sprintf(s, "%c%02d",
2794  (tm->tm_gmtoff >= 0) ? '+' : '-',
2795  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2796  s += strlen(s);
2797  break;
2798  case DCH_TZM:
2800  sprintf(s, "%02d",
2801  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2802  s += strlen(s);
2803  break;
2804  case DCH_OF:
2806  sprintf(s, "%c%0*d",
2807  (tm->tm_gmtoff >= 0) ? '+' : '-',
2808  S_FM(n->suffix) ? 0 : 2,
2809  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2810  s += strlen(s);
2811  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2812  {
2813  sprintf(s, ":%02d",
2814  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2815  s += strlen(s);
2816  }
2817  break;
2818  case DCH_A_D:
2819  case DCH_B_C:
2821  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2822  s += strlen(s);
2823  break;
2824  case DCH_AD:
2825  case DCH_BC:
2827  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2828  s += strlen(s);
2829  break;
2830  case DCH_a_d:
2831  case DCH_b_c:
2833  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2834  s += strlen(s);
2835  break;
2836  case DCH_ad:
2837  case DCH_bc:
2839  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2840  s += strlen(s);
2841  break;
2842  case DCH_MONTH:
2844  if (!tm->tm_mon)
2845  break;
2846  if (S_TM(n->suffix))
2847  {
2848  char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2849 
2850  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2851  strcpy(s, str);
2852  else
2853  ereport(ERROR,
2854  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2855  errmsg("localized string format value too long")));
2856  }
2857  else
2858  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2859  asc_toupper_z(months_full[tm->tm_mon - 1]));
2860  s += strlen(s);
2861  break;
2862  case DCH_Month:
2864  if (!tm->tm_mon)
2865  break;
2866  if (S_TM(n->suffix))
2867  {
2868  char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2869 
2870  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2871  strcpy(s, str);
2872  else
2873  ereport(ERROR,
2874  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2875  errmsg("localized string format value too long")));
2876  }
2877  else
2878  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2879  months_full[tm->tm_mon - 1]);
2880  s += strlen(s);
2881  break;
2882  case DCH_month:
2884  if (!tm->tm_mon)
2885  break;
2886  if (S_TM(n->suffix))
2887  {
2888  char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2889 
2890  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2891  strcpy(s, str);
2892  else
2893  ereport(ERROR,
2894  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2895  errmsg("localized string format value too long")));
2896  }
2897  else
2898  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2899  asc_tolower_z(months_full[tm->tm_mon - 1]));
2900  s += strlen(s);
2901  break;
2902  case DCH_MON:
2904  if (!tm->tm_mon)
2905  break;
2906  if (S_TM(n->suffix))
2907  {
2908  char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2909 
2910  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2911  strcpy(s, str);
2912  else
2913  ereport(ERROR,
2914  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2915  errmsg("localized string format value too long")));
2916  }
2917  else
2918  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2919  s += strlen(s);
2920  break;
2921  case DCH_Mon:
2923  if (!tm->tm_mon)
2924  break;
2925  if (S_TM(n->suffix))
2926  {
2927  char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2928 
2929  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2930  strcpy(s, str);
2931  else
2932  ereport(ERROR,
2933  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2934  errmsg("localized string format value too long")));
2935  }
2936  else
2937  strcpy(s, months[tm->tm_mon - 1]);
2938  s += strlen(s);
2939  break;
2940  case DCH_mon:
2942  if (!tm->tm_mon)
2943  break;
2944  if (S_TM(n->suffix))
2945  {
2946  char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2947 
2948  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2949  strcpy(s, str);
2950  else
2951  ereport(ERROR,
2952  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2953  errmsg("localized string format value too long")));
2954  }
2955  else
2956  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2957  s += strlen(s);
2958  break;
2959  case DCH_MM:
2960  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2961  tm->tm_mon);
2962  if (S_THth(n->suffix))
2963  str_numth(s, s, S_TH_TYPE(n->suffix));
2964  s += strlen(s);
2965  break;
2966  case DCH_DAY:
2968  if (S_TM(n->suffix))
2969  {
2970  char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2971 
2972  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2973  strcpy(s, str);
2974  else
2975  ereport(ERROR,
2976  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2977  errmsg("localized string format value too long")));
2978  }
2979  else
2980  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2981  asc_toupper_z(days[tm->tm_wday]));
2982  s += strlen(s);
2983  break;
2984  case DCH_Day:
2986  if (S_TM(n->suffix))
2987  {
2988  char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2989 
2990  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2991  strcpy(s, str);
2992  else
2993  ereport(ERROR,
2994  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2995  errmsg("localized string format value too long")));
2996  }
2997  else
2998  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2999  days[tm->tm_wday]);
3000  s += strlen(s);
3001  break;
3002  case DCH_day:
3004  if (S_TM(n->suffix))
3005  {
3006  char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3007 
3008  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3009  strcpy(s, str);
3010  else
3011  ereport(ERROR,
3012  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3013  errmsg("localized string format value too long")));
3014  }
3015  else
3016  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3017  asc_tolower_z(days[tm->tm_wday]));
3018  s += strlen(s);
3019  break;
3020  case DCH_DY:
3022  if (S_TM(n->suffix))
3023  {
3024  char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3025 
3026  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3027  strcpy(s, str);
3028  else
3029  ereport(ERROR,
3030  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3031  errmsg("localized string format value too long")));
3032  }
3033  else
3034  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3035  s += strlen(s);
3036  break;
3037  case DCH_Dy:
3039  if (S_TM(n->suffix))
3040  {
3041  char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3042 
3043  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3044  strcpy(s, str);
3045  else
3046  ereport(ERROR,
3047  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3048  errmsg("localized string format value too long")));
3049  }
3050  else
3051  strcpy(s, days_short[tm->tm_wday]);
3052  s += strlen(s);
3053  break;
3054  case DCH_dy:
3056  if (S_TM(n->suffix))
3057  {
3058  char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3059 
3060  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3061  strcpy(s, str);
3062  else
3063  ereport(ERROR,
3064  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3065  errmsg("localized string format value too long")));
3066  }
3067  else
3068  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3069  s += strlen(s);
3070  break;
3071  case DCH_DDD:
3072  case DCH_IDDD:
3073  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3074  (n->key->id == DCH_DDD) ?
3075  tm->tm_yday :
3076  date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3077  if (S_THth(n->suffix))
3078  str_numth(s, s, S_TH_TYPE(n->suffix));
3079  s += strlen(s);
3080  break;
3081  case DCH_DD:
3082  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3083  if (S_THth(n->suffix))
3084  str_numth(s, s, S_TH_TYPE(n->suffix));
3085  s += strlen(s);
3086  break;
3087  case DCH_D:
3089  sprintf(s, "%d", tm->tm_wday + 1);
3090  if (S_THth(n->suffix))
3091  str_numth(s, s, S_TH_TYPE(n->suffix));
3092  s += strlen(s);
3093  break;
3094  case DCH_ID:
3096  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3097  if (S_THth(n->suffix))
3098  str_numth(s, s, S_TH_TYPE(n->suffix));
3099  s += strlen(s);
3100  break;
3101  case DCH_WW:
3102  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3103  (tm->tm_yday - 1) / 7 + 1);
3104  if (S_THth(n->suffix))
3105  str_numth(s, s, S_TH_TYPE(n->suffix));
3106  s += strlen(s);
3107  break;
3108  case DCH_IW:
3109  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3110  date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3111  if (S_THth(n->suffix))
3112  str_numth(s, s, S_TH_TYPE(n->suffix));
3113  s += strlen(s);
3114  break;
3115  case DCH_Q:
3116  if (!tm->tm_mon)
3117  break;
3118  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3119  if (S_THth(n->suffix))
3120  str_numth(s, s, S_TH_TYPE(n->suffix));
3121  s += strlen(s);
3122  break;
3123  case DCH_CC:
3124  if (is_interval) /* straight calculation */
3125  i = tm->tm_year / 100;
3126  else
3127  {
3128  if (tm->tm_year > 0)
3129  /* Century 20 == 1901 - 2000 */
3130  i = (tm->tm_year - 1) / 100 + 1;
3131  else
3132  /* Century 6BC == 600BC - 501BC */
3133  i = tm->tm_year / 100 - 1;
3134  }
3135  if (i <= 99 && i >= -99)
3136  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3137  else
3138  sprintf(s, "%d", i);
3139  if (S_THth(n->suffix))
3140  str_numth(s, s, S_TH_TYPE(n->suffix));
3141  s += strlen(s);
3142  break;
3143  case DCH_Y_YYY:
3144  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3145  sprintf(s, "%d,%03d", i,
3146  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3147  if (S_THth(n->suffix))
3148  str_numth(s, s, S_TH_TYPE(n->suffix));
3149  s += strlen(s);
3150  break;
3151  case DCH_YYYY:
3152  case DCH_IYYY:
3153  sprintf(s, "%0*d",
3154  S_FM(n->suffix) ? 0 :
3155  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3156  (n->key->id == DCH_YYYY ?
3157  ADJUST_YEAR(tm->tm_year, is_interval) :
3159  tm->tm_mon,
3160  tm->tm_mday),
3161  is_interval)));
3162  if (S_THth(n->suffix))
3163  str_numth(s, s, S_TH_TYPE(n->suffix));
3164  s += strlen(s);
3165  break;
3166  case DCH_YYY:
3167  case DCH_IYY:
3168  sprintf(s, "%0*d",
3169  S_FM(n->suffix) ? 0 :
3170  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3171  (n->key->id == DCH_YYY ?
3172  ADJUST_YEAR(tm->tm_year, is_interval) :
3174  tm->tm_mon,
3175  tm->tm_mday),
3176  is_interval)) % 1000);
3177  if (S_THth(n->suffix))
3178  str_numth(s, s, S_TH_TYPE(n->suffix));
3179  s += strlen(s);
3180  break;
3181  case DCH_YY:
3182  case DCH_IY:
3183  sprintf(s, "%0*d",
3184  S_FM(n->suffix) ? 0 :
3185  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3186  (n->key->id == DCH_YY ?
3187  ADJUST_YEAR(tm->tm_year, is_interval) :
3189  tm->tm_mon,
3190  tm->tm_mday),
3191  is_interval)) % 100);
3192  if (S_THth(n->suffix))
3193  str_numth(s, s, S_TH_TYPE(n->suffix));
3194  s += strlen(s);
3195  break;
3196  case DCH_Y:
3197  case DCH_I:
3198  sprintf(s, "%1d",
3199  (n->key->id == DCH_Y ?
3200  ADJUST_YEAR(tm->tm_year, is_interval) :
3202  tm->tm_mon,
3203  tm->tm_mday),
3204  is_interval)) % 10);
3205  if (S_THth(n->suffix))
3206  str_numth(s, s, S_TH_TYPE(n->suffix));
3207  s += strlen(s);
3208  break;
3209  case DCH_RM:
3210  if (!tm->tm_mon)
3211  break;
3212  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3214  s += strlen(s);
3215  break;
3216  case DCH_rm:
3217  if (!tm->tm_mon)
3218  break;
3219  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3221  s += strlen(s);
3222  break;
3223  case DCH_W:
3224  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3225  if (S_THth(n->suffix))
3226  str_numth(s, s, S_TH_TYPE(n->suffix));
3227  s += strlen(s);
3228  break;
3229  case DCH_J:
3230  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3231  if (S_THth(n->suffix))
3232  str_numth(s, s, S_TH_TYPE(n->suffix));
3233  s += strlen(s);
3234  break;
3235  }
3236  }
3237 
3238  *s = '\0';
3239 }
3240 
3241 /*
3242  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3243  * The TmFromChar struct pointed to by 'out' is populated with the results.
3244  *
3245  * 'collid' identifies the collation to use, if needed.
3246  * 'std' specifies standard parsing mode.
3247  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3248  *
3249  * Note: we currently don't have any to_interval() function, so there
3250  * is no need here for INVALID_FOR_INTERVAL checks.
3251  */
3252 static void
3253 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3254  Oid collid, bool std, bool *have_error)
3255 {
3256  FormatNode *n;
3257  const char *s;
3258  int len,
3259  value;
3260  bool fx_mode = std;
3261 
3262  /* number of extra skipped characters (more than given in format string) */
3263  int extra_skip = 0;
3264 
3265  /* cache localized days and months */
3267 
3268  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3269  {
3270  /*
3271  * Ignore spaces at the beginning of the string and before fields when
3272  * not in FX (fixed width) mode.
3273  */
3274  if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3275  (n->type == NODE_TYPE_ACTION || n == node))
3276  {
3277  while (*s != '\0' && isspace((unsigned char) *s))
3278  {
3279  s++;
3280  extra_skip++;
3281  }
3282  }
3283 
3284  if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3285  {
3286  if (std)
3287  {
3288  /*
3289  * Standard mode requires strict matching between format
3290  * string separators/spaces and input string.
3291  */
3292  Assert(n->character[0] && !n->character[1]);
3293 
3294  if (*s == n->character[0])
3295  s++;
3296  else
3298  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3299  errmsg("unmatched format separator \"%c\"",
3300  n->character[0]))));
3301  }
3302  else if (!fx_mode)
3303  {
3304  /*
3305  * In non FX (fixed format) mode one format string space or
3306  * separator match to one space or separator in input string.
3307  * Or match nothing if there is no space or separator in the
3308  * current position of input string.
3309  */
3310  extra_skip--;
3311  if (isspace((unsigned char) *s) || is_separator_char(s))
3312  {
3313  s++;
3314  extra_skip++;
3315  }
3316  }
3317  else
3318  {
3319  /*
3320  * In FX mode, on format string space or separator we consume
3321  * exactly one character from input string. Notice we don't
3322  * insist that the consumed character match the format's
3323  * character.
3324  */
3325  s += pg_mblen(s);
3326  }
3327  continue;
3328  }
3329  else if (n->type != NODE_TYPE_ACTION)
3330  {
3331  /*
3332  * Text character, so consume one character from input string.
3333  * Notice we don't insist that the consumed character match the
3334  * format's character.
3335  */
3336  if (!fx_mode)
3337  {
3338  /*
3339  * In non FX mode we might have skipped some extra characters
3340  * (more than specified in format string) before. In this
3341  * case we don't skip input string character, because it might
3342  * be part of field.
3343  */
3344  if (extra_skip > 0)
3345  extra_skip--;
3346  else
3347  s += pg_mblen(s);
3348  }
3349  else
3350  {
3351  int chlen = pg_mblen(s);
3352 
3353  /*
3354  * Standard mode requires strict match of format characters.
3355  */
3356  if (std && n->type == NODE_TYPE_CHAR &&
3357  strncmp(s, n->character, chlen) != 0)
3359  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3360  errmsg("unmatched format character \"%s\"",
3361  n->character))));
3362 
3363  s += chlen;
3364  }
3365  continue;
3366  }
3367 
3368  from_char_set_mode(out, n->key->date_mode, have_error);
3369  CHECK_ERROR;
3370 
3371  switch (n->key->id)
3372  {
3373  case DCH_FX:
3374  fx_mode = true;
3375  break;
3376  case DCH_A_M:
3377  case DCH_P_M:
3378  case DCH_a_m:
3379  case DCH_p_m:
3381  NULL, InvalidOid,
3382  n, have_error);
3383  CHECK_ERROR;
3384  from_char_set_int(&out->pm, value % 2, n, have_error);
3385  CHECK_ERROR;
3386  out->clock = CLOCK_12_HOUR;
3387  break;
3388  case DCH_AM:
3389  case DCH_PM:
3390  case DCH_am:
3391  case DCH_pm:
3392  from_char_seq_search(&value, &s, ampm_strings,
3393  NULL, InvalidOid,
3394  n, have_error);
3395  CHECK_ERROR;
3396  from_char_set_int(&out->pm, value % 2, n, have_error);
3397  CHECK_ERROR;
3398  out->clock = CLOCK_12_HOUR;
3399  break;
3400  case DCH_HH:
3401  case DCH_HH12:
3402  from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3403  CHECK_ERROR;
3404  out->clock = CLOCK_12_HOUR;
3405  SKIP_THth(s, n->suffix);
3406  break;
3407  case DCH_HH24:
3408  from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3409  CHECK_ERROR;
3410  SKIP_THth(s, n->suffix);
3411  break;
3412  case DCH_MI:
3413  from_char_parse_int(&out->mi, &s, n, have_error);
3414  CHECK_ERROR;
3415  SKIP_THth(s, n->suffix);
3416  break;
3417  case DCH_SS:
3418  from_char_parse_int(&out->ss, &s, n, have_error);
3419  CHECK_ERROR;
3420  SKIP_THth(s, n->suffix);
3421  break;
3422  case DCH_MS: /* millisecond */
3423  len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
3424  CHECK_ERROR;
3425 
3426  /*
3427  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3428  */
3429  out->ms *= len == 1 ? 100 :
3430  len == 2 ? 10 : 1;
3431 
3432  SKIP_THth(s, n->suffix);
3433  break;
3434  case DCH_FF1:
3435  case DCH_FF2:
3436  case DCH_FF3:
3437  case DCH_FF4:
3438  case DCH_FF5:
3439  case DCH_FF6:
3440  out->ff = n->key->id - DCH_FF1 + 1;
3441  /* fall through */
3442  case DCH_US: /* microsecond */
3443  len = from_char_parse_int_len(&out->us, &s,
3444  n->key->id == DCH_US ? 6 :
3445  out->ff, n, have_error);
3446  CHECK_ERROR;
3447 
3448  out->us *= len == 1 ? 100000 :
3449  len == 2 ? 10000 :
3450  len == 3 ? 1000 :
3451  len == 4 ? 100 :
3452  len == 5 ? 10 : 1;
3453 
3454  SKIP_THth(s, n->suffix);
3455  break;
3456  case DCH_SSSS:
3457  from_char_parse_int(&out->ssss, &s, n, have_error);
3458  CHECK_ERROR;
3459  SKIP_THth(s, n->suffix);
3460  break;
3461  case DCH_tz:
3462  case DCH_TZ:
3463  case DCH_OF:
3465  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3466  errmsg("formatting field \"%s\" is only supported in to_char",
3467  n->key->name))));
3468  CHECK_ERROR;
3469  break;
3470  case DCH_TZH:
3471 
3472  /*
3473  * Value of TZH might be negative. And the issue is that we
3474  * might swallow minus sign as the separator. So, if we have
3475  * skipped more characters than specified in the format
3476  * string, then we consider prepending last skipped minus to
3477  * TZH.
3478  */
3479  if (*s == '+' || *s == '-' || *s == ' ')
3480  {
3481  out->tzsign = *s == '-' ? -1 : +1;
3482  s++;
3483  }
3484  else
3485  {
3486  if (extra_skip > 0 && *(s - 1) == '-')
3487  out->tzsign = -1;
3488  else
3489  out->tzsign = +1;
3490  }
3491 
3492  from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
3493  CHECK_ERROR;
3494  break;
3495  case DCH_TZM:
3496  /* assign positive timezone sign if TZH was not seen before */
3497  if (!out->tzsign)
3498  out->tzsign = +1;
3499  from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
3500  CHECK_ERROR;
3501  break;
3502  case DCH_A_D:
3503  case DCH_B_C:
3504  case DCH_a_d:
3505  case DCH_b_c:
3507  NULL, InvalidOid,
3508  n, have_error);
3509  CHECK_ERROR;
3510  from_char_set_int(&out->bc, value % 2, n, have_error);
3511  CHECK_ERROR;
3512  break;
3513  case DCH_AD:
3514  case DCH_BC:
3515  case DCH_ad:
3516  case DCH_bc:
3517  from_char_seq_search(&value, &s, adbc_strings,
3518  NULL, InvalidOid,
3519  n, have_error);
3520  CHECK_ERROR;
3521  from_char_set_int(&out->bc, value % 2, n, have_error);
3522  CHECK_ERROR;
3523  break;
3524  case DCH_MONTH:
3525  case DCH_Month:
3526  case DCH_month:
3527  from_char_seq_search(&value, &s, months_full,
3528  S_TM(n->suffix) ? localized_full_months : NULL,
3529  collid,
3530  n, have_error);
3531  CHECK_ERROR;
3532  from_char_set_int(&out->mm, value + 1, n, have_error);
3533  CHECK_ERROR;
3534  break;
3535  case DCH_MON:
3536  case DCH_Mon:
3537  case DCH_mon:
3538  from_char_seq_search(&value, &s, months,
3539  S_TM(n->suffix) ? localized_abbrev_months : NULL,
3540  collid,
3541  n, have_error);
3542  CHECK_ERROR;
3543  from_char_set_int(&out->mm, value + 1, n, have_error);
3544  CHECK_ERROR;
3545  break;
3546  case DCH_MM:
3547  from_char_parse_int(&out->mm, &s, n, have_error);
3548  CHECK_ERROR;
3549  SKIP_THth(s, n->suffix);
3550  break;
3551  case DCH_DAY:
3552  case DCH_Day:
3553  case DCH_day:
3554  from_char_seq_search(&value, &s, days,
3555  S_TM(n->suffix) ? localized_full_days : NULL,
3556  collid,
3557  n, have_error);
3558  CHECK_ERROR;
3559  from_char_set_int(&out->d, value, n, have_error);
3560  CHECK_ERROR;
3561  out->d++;
3562  break;
3563  case DCH_DY:
3564  case DCH_Dy:
3565  case DCH_dy:
3566  from_char_seq_search(&value, &s, days_short,
3567  S_TM(n->suffix) ? localized_abbrev_days : NULL,
3568  collid,
3569  n, have_error);
3570  CHECK_ERROR;
3571  from_char_set_int(&out->d, value, n, have_error);
3572  CHECK_ERROR;
3573  out->d++;
3574  break;
3575  case DCH_DDD:
3576  from_char_parse_int(&out->ddd, &s, n, have_error);
3577  CHECK_ERROR;
3578  SKIP_THth(s, n->suffix);
3579  break;
3580  case DCH_IDDD:
3581  from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
3582  CHECK_ERROR;
3583  SKIP_THth(s, n->suffix);
3584  break;
3585  case DCH_DD:
3586  from_char_parse_int(&out->dd, &s, n, have_error);
3587  CHECK_ERROR;
3588  SKIP_THth(s, n->suffix);
3589  break;
3590  case DCH_D:
3591  from_char_parse_int(&out->d, &s, n, have_error);
3592  CHECK_ERROR;
3593  SKIP_THth(s, n->suffix);
3594  break;
3595  case DCH_ID:
3596  from_char_parse_int_len(&out->d, &s, 1, n, have_error);
3597  CHECK_ERROR;
3598  /* Shift numbering to match Gregorian where Sunday = 1 */
3599  if (++out->d > 7)
3600  out->d = 1;
3601  SKIP_THth(s, n->suffix);
3602  break;
3603  case DCH_WW:
3604  case DCH_IW:
3605  from_char_parse_int(&out->ww, &s, n, have_error);
3606  CHECK_ERROR;
3607  SKIP_THth(s, n->suffix);
3608  break;
3609  case DCH_Q:
3610 
3611  /*
3612  * We ignore 'Q' when converting to date because it is unclear
3613  * which date in the quarter to use, and some people specify
3614  * both quarter and month, so if it was honored it might
3615  * conflict with the supplied month. That is also why we don't
3616  * throw an error.
3617  *
3618  * We still parse the source string for an integer, but it
3619  * isn't stored anywhere in 'out'.
3620  */
3621  from_char_parse_int((int *) NULL, &s, n, have_error);
3622  CHECK_ERROR;
3623  SKIP_THth(s, n->suffix);
3624  break;
3625  case DCH_CC:
3626  from_char_parse_int(&out->cc, &s, n, have_error);
3627  CHECK_ERROR;
3628  SKIP_THth(s, n->suffix);
3629  break;
3630  case DCH_Y_YYY:
3631  {
3632  int matched,
3633  years,
3634  millennia,
3635  nch;
3636 
3637  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3638  if (matched < 2)
3640  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3641  errmsg("invalid input string for \"Y,YYY\""))));
3642  years += (millennia * 1000);
3643  from_char_set_int(&out->year, years, n, have_error);
3644  CHECK_ERROR;
3645  out->yysz = 4;
3646  s += nch;
3647  SKIP_THth(s, n->suffix);
3648  }
3649  break;
3650  case DCH_YYYY:
3651  case DCH_IYYY:
3652  from_char_parse_int(&out->year, &s, n, have_error);
3653  CHECK_ERROR;
3654  out->yysz = 4;
3655  SKIP_THth(s, n->suffix);
3656  break;
3657  case DCH_YYY:
3658  case DCH_IYY:
3659  len = from_char_parse_int(&out->year, &s, n, have_error);
3660  CHECK_ERROR;
3661  if (len < 4)
3662  out->year = adjust_partial_year_to_2020(out->year);
3663  out->yysz = 3;
3664  SKIP_THth(s, n->suffix);
3665  break;
3666  case DCH_YY:
3667  case DCH_IY:
3668  len = from_char_parse_int(&out->year, &s, n, have_error);
3669  CHECK_ERROR;
3670  if (len < 4)
3671  out->year = adjust_partial_year_to_2020(out->year);
3672  out->yysz = 2;
3673  SKIP_THth(s, n->suffix);
3674  break;
3675  case DCH_Y:
3676  case DCH_I:
3677  len = from_char_parse_int(&out->year, &s, n, have_error);
3678  CHECK_ERROR;
3679  if (len < 4)
3680  out->year = adjust_partial_year_to_2020(out->year);
3681  out->yysz = 1;
3682  SKIP_THth(s, n->suffix);
3683  break;
3684  case DCH_RM:
3685  case DCH_rm:
3687  NULL, InvalidOid,
3688  n, have_error);
3689  CHECK_ERROR;
3690  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3691  n, have_error);
3692  CHECK_ERROR;
3693  break;
3694  case DCH_W:
3695  from_char_parse_int(&out->w, &s, n, have_error);
3696  CHECK_ERROR;
3697  SKIP_THth(s, n->suffix);
3698  break;
3699  case DCH_J:
3700  from_char_parse_int(&out->j, &s, n, have_error);
3701  CHECK_ERROR;
3702  SKIP_THth(s, n->suffix);
3703  break;
3704  }
3705 
3706  /* Ignore all spaces after fields */
3707  if (!fx_mode)
3708  {
3709  extra_skip = 0;
3710  while (*s != '\0' && isspace((unsigned char) *s))
3711  {
3712  s++;
3713  extra_skip++;
3714  }
3715  }
3716  }
3717 
3718  /*
3719  * Standard parsing mode doesn't allow unmatched format patterns or
3720  * trailing characters in the input string.
3721  */
3722  if (std)
3723  {
3724  if (n->type != NODE_TYPE_END)
3726  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3727  errmsg("input string is too short for datetime format"))));
3728 
3729  while (*s != '\0' && isspace((unsigned char) *s))
3730  s++;
3731 
3732  if (*s != '\0')
3734  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3735  errmsg("trailing characters remain in input string "
3736  "after datetime format"))));
3737  }
3738 
3739 on_error:
3740  return;
3741 }
3742 
3743 /*
3744  * The invariant for DCH cache entry management is that DCHCounter is equal
3745  * to the maximum age value among the existing entries, and we increment it
3746  * whenever an access occurs. If we approach overflow, deal with that by
3747  * halving all the age values, so that we retain a fairly accurate idea of
3748  * which entries are oldest.
3749  */
3750 static inline void
3752 {
3753  if (DCHCounter >= (INT_MAX - 1))
3754  {
3755  for (int i = 0; i < n_DCHCache; i++)
3756  DCHCache[i]->age >>= 1;
3757  DCHCounter >>= 1;
3758  }
3759 }
3760 
3761 /*
3762  * Get mask of date/time/zone components present in format nodes.
3763  *
3764  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3765  */
3766 static int
3767 DCH_datetime_type(FormatNode *node, bool *have_error)
3768 {
3769  FormatNode *n;
3770  int flags = 0;
3771 
3772  for (n = node; n->type != NODE_TYPE_END; n++)
3773  {
3774  if (n->type != NODE_TYPE_ACTION)
3775  continue;
3776 
3777  switch (n->key->id)
3778  {
3779  case DCH_FX:
3780  break;
3781  case DCH_A_M:
3782  case DCH_P_M:
3783  case DCH_a_m:
3784  case DCH_p_m:
3785  case DCH_AM:
3786  case DCH_PM:
3787  case DCH_am:
3788  case DCH_pm:
3789  case DCH_HH:
3790  case DCH_HH12:
3791  case DCH_HH24:
3792  case DCH_MI:
3793  case DCH_SS:
3794  case DCH_MS: /* millisecond */
3795  case DCH_US: /* microsecond */
3796  case DCH_FF1:
3797  case DCH_FF2:
3798  case DCH_FF3:
3799  case DCH_FF4:
3800  case DCH_FF5:
3801  case DCH_FF6:
3802  case DCH_SSSS:
3803  flags |= DCH_TIMED;
3804  break;
3805  case DCH_tz:
3806  case DCH_TZ:
3807  case DCH_OF:
3809  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3810  errmsg("formatting field \"%s\" is only supported in to_char",
3811  n->key->name))));
3812  flags |= DCH_ZONED;
3813  break;
3814  case DCH_TZH:
3815  case DCH_TZM:
3816  flags |= DCH_ZONED;
3817  break;
3818  case DCH_A_D:
3819  case DCH_B_C:
3820  case DCH_a_d:
3821  case DCH_b_c:
3822  case DCH_AD:
3823  case DCH_BC:
3824  case DCH_ad:
3825  case DCH_bc:
3826  case DCH_MONTH:
3827  case DCH_Month:
3828  case DCH_month:
3829  case DCH_MON:
3830  case DCH_Mon:
3831  case DCH_mon:
3832  case DCH_MM:
3833  case DCH_DAY:
3834  case DCH_Day:
3835  case DCH_day:
3836  case DCH_DY:
3837  case DCH_Dy:
3838  case DCH_dy:
3839  case DCH_DDD:
3840  case DCH_IDDD:
3841  case DCH_DD:
3842  case DCH_D:
3843  case DCH_ID:
3844  case DCH_WW:
3845  case DCH_Q:
3846  case DCH_CC:
3847  case DCH_Y_YYY:
3848  case DCH_YYYY:
3849  case DCH_IYYY:
3850  case DCH_YYY:
3851  case DCH_IYY:
3852  case DCH_YY:
3853  case DCH_IY:
3854  case DCH_Y:
3855  case DCH_I:
3856  case DCH_RM:
3857  case DCH_rm:
3858  case DCH_W:
3859  case DCH_J:
3860  flags |= DCH_DATED;
3861  break;
3862  }
3863  }
3864 
3865 on_error:
3866  return flags;
3867 }
3868 
3869 /* select a DCHCacheEntry to hold the given format picture */
3870 static DCHCacheEntry *
3871 DCH_cache_getnew(const char *str, bool std)
3872 {
3873  DCHCacheEntry *ent;
3874 
3875  /* Ensure we can advance DCHCounter below */
3877 
3878  /*
3879  * If cache is full, remove oldest entry (or recycle first not-valid one)
3880  */
3882  {
3883  DCHCacheEntry *old = DCHCache[0];
3884 
3885 #ifdef DEBUG_TO_FROM_CHAR
3886  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3887 #endif
3888  if (old->valid)
3889  {
3890  for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3891  {
3892  ent = DCHCache[i];
3893  if (!ent->valid)
3894  {
3895  old = ent;
3896  break;
3897  }
3898  if (ent->age < old->age)
3899  old = ent;
3900  }
3901  }
3902 #ifdef DEBUG_TO_FROM_CHAR
3903  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3904 #endif
3905  old->valid = false;
3906  strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
3907  old->age = (++DCHCounter);
3908  /* caller is expected to fill format, then set valid */
3909  return old;
3910  }
3911  else
3912  {
3913 #ifdef DEBUG_TO_FROM_CHAR
3914  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3915 #endif
3916  Assert(DCHCache[n_DCHCache] == NULL);
3917  DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3919  ent->valid = false;
3920  strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
3921  ent->std = std;
3922  ent->age = (++DCHCounter);
3923  /* caller is expected to fill format, then set valid */
3924  ++n_DCHCache;
3925  return ent;
3926  }
3927 }
3928 
3929 /* look for an existing DCHCacheEntry matching the given format picture */
3930 static DCHCacheEntry *
3931 DCH_cache_search(const char *str, bool std)
3932 {
3933  /* Ensure we can advance DCHCounter below */
3935 
3936  for (int i = 0; i < n_DCHCache; i++)
3937  {
3938  DCHCacheEntry *ent = DCHCache[i];
3939 
3940  if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3941  {
3942  ent->age = (++DCHCounter);
3943  return ent;
3944  }
3945  }
3946 
3947  return NULL;
3948 }
3949 
3950 /* Find or create a DCHCacheEntry for the given format picture */
3951 static DCHCacheEntry *
3952 DCH_cache_fetch(const char *str, bool std)
3953 {
3954  DCHCacheEntry *ent;
3955 
3956  if ((ent = DCH_cache_search(str, std)) == NULL)
3957  {
3958  /*
3959  * Not in the cache, must run parser and save a new format-picture to
3960  * the cache. Do not mark the cache entry valid until parsing
3961  * succeeds.
3962  */
3963  ent = DCH_cache_getnew(str, std);
3964 
3965  parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
3966  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
3967 
3968  ent->valid = true;
3969  }
3970  return ent;
3971 }
3972 
3973 /*
3974  * Format a date/time or interval into a string according to fmt.
3975  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
3976  * for formatting.
3977  */
3978 static text *
3979 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
3980 {
3981  FormatNode *format;
3982  char *fmt_str,
3983  *result;
3984  bool incache;
3985  int fmt_len;
3986  text *res;
3987 
3988  /*
3989  * Convert fmt to C string
3990  */
3991  fmt_str = text_to_cstring(fmt);
3992  fmt_len = strlen(fmt_str);
3993 
3994  /*
3995  * Allocate workspace for result as C string
3996  */
3997  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
3998  *result = '\0';
3999 
4000  if (fmt_len > DCH_CACHE_SIZE)
4001  {
4002  /*
4003  * Allocate new memory if format picture is bigger than static cache
4004  * and do not use cache (call parser always)
4005  */
4006  incache = false;
4007 
4008  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4009 
4010  parse_format(format, fmt_str, DCH_keywords,
4011  DCH_suff, DCH_index, DCH_FLAG, NULL);
4012  }
4013  else
4014  {
4015  /*
4016  * Use cache buffers
4017  */
4018  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4019 
4020  incache = true;
4021  format = ent->format;
4022  }
4023 
4024  /* The real work is here */
4025  DCH_to_char(format, is_interval, tmtc, result, collid);
4026 
4027  if (!incache)
4028  pfree(format);
4029 
4030  pfree(fmt_str);
4031 
4032  /* convert C-string result to TEXT format */
4033  res = cstring_to_text(result);
4034 
4035  pfree(result);
4036  return res;
4037 }
4038 
4039 /****************************************************************************
4040  * Public routines
4041  ***************************************************************************/
4042 
4043 /* -------------------
4044  * TIMESTAMP to_char()
4045  * -------------------
4046  */
4047 Datum
4049 {
4051  text *fmt = PG_GETARG_TEXT_PP(1),
4052  *res;
4053  TmToChar tmtc;
4054  struct pg_tm *tm;
4055  int thisdate;
4056 
4057  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4058  PG_RETURN_NULL();
4059 
4060  ZERO_tmtc(&tmtc);
4061  tm = tmtcTm(&tmtc);
4062 
4063  if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4064  ereport(ERROR,
4065  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4066  errmsg("timestamp out of range")));
4067 
4068  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4069  tm->tm_wday = (thisdate + 1) % 7;
4070  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4071 
4072  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4073  PG_RETURN_NULL();
4074 
4075  PG_RETURN_TEXT_P(res);
4076 }
4077 
4078 Datum
4080 {
4082  text *fmt = PG_GETARG_TEXT_PP(1),
4083  *res;
4084  TmToChar tmtc;
4085  int tz;
4086  struct pg_tm *tm;
4087  int thisdate;
4088 
4089  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4090  PG_RETURN_NULL();
4091 
4092  ZERO_tmtc(&tmtc);
4093  tm = tmtcTm(&tmtc);
4094 
4095  if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4096  ereport(ERROR,
4097  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4098  errmsg("timestamp out of range")));
4099 
4100  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
4101  tm->tm_wday = (thisdate + 1) % 7;
4102  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
4103 
4104  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4105  PG_RETURN_NULL();
4106 
4107  PG_RETURN_TEXT_P(res);
4108 }
4109 
4110 
4111 /* -------------------
4112  * INTERVAL to_char()
4113  * -------------------
4114  */
4115 Datum
4117 {
4118  Interval *it = PG_GETARG_INTERVAL_P(0);
4119  text *fmt = PG_GETARG_TEXT_PP(1),
4120  *res;
4121  TmToChar tmtc;
4122  struct pg_tm *tm;
4123 
4124  if (VARSIZE_ANY_EXHDR(fmt) <= 0)
4125  PG_RETURN_NULL();
4126 
4127  ZERO_tmtc(&tmtc);
4128  tm = tmtcTm(&tmtc);
4129 
4130  if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
4131  PG_RETURN_NULL();
4132 
4133  /* wday is meaningless, yday approximates the total span in days */
4134  tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4135 
4136  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4137  PG_RETURN_NULL();
4138 
4139  PG_RETURN_TEXT_P(res);
4140 }
4141 
4142 /* ---------------------
4143  * TO_TIMESTAMP()
4144  *
4145  * Make Timestamp from date_str which is formatted at argument 'fmt'
4146  * ( to_timestamp is reverse to_char() )
4147  * ---------------------
4148  */
4149 Datum
4151 {
4152  text *date_txt = PG_GETARG_TEXT_PP(0);
4153  text *fmt = PG_GETARG_TEXT_PP(1);
4154  Oid collid = PG_GET_COLLATION();
4155  Timestamp result;
4156  int tz;
4157  struct pg_tm tm;
4158  fsec_t fsec;
4159  int fprec;
4160 
4161  do_to_timestamp(date_txt, fmt, collid, false,
4162  &tm, &fsec, &fprec, NULL, NULL);
4163 
4164  /* Use the specified time zone, if any. */
4165  if (tm.tm_zone)
4166  {
4167  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
4168 
4169  if (dterr)
4170  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4171  }
4172  else
4174 
4175  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4176  ereport(ERROR,
4177  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4178  errmsg("timestamp out of range")));
4179 
4180  /* Use the specified fractional precision, if any. */
4181  if (fprec)
4182  AdjustTimestampForTypmod(&result, fprec);
4183 
4184  PG_RETURN_TIMESTAMP(result);
4185 }
4186 
4187 /* ----------
4188  * TO_DATE
4189  * Make Date from date_str which is formatted at argument 'fmt'
4190  * ----------
4191  */
4192 Datum
4194 {
4195  text *date_txt = PG_GETARG_TEXT_PP(0);
4196  text *fmt = PG_GETARG_TEXT_PP(1);
4197  Oid collid = PG_GET_COLLATION();
4198  DateADT result;
4199  struct pg_tm tm;
4200  fsec_t fsec;
4201 
4202  do_to_timestamp(date_txt, fmt, collid, false,
4203  &tm, &fsec, NULL, NULL, NULL);
4204 
4205  /* Prevent overflow in Julian-day routines */
4206  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4207  ereport(ERROR,
4208  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4209  errmsg("date out of range: \"%s\"",
4210  text_to_cstring(date_txt))));
4211 
4212  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4213 
4214  /* Now check for just-out-of-range dates */
4215  if (!IS_VALID_DATE(result))
4216  ereport(ERROR,
4217  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4218  errmsg("date out of range: \"%s\"",
4219  text_to_cstring(date_txt))));
4220 
4221  PG_RETURN_DATEADT(result);
4222 }
4223 
4224 /*
4225  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4226  * as a format string. The collation 'collid' may be used for case-folding
4227  * rules in some cases. 'strict' specifies standard parsing mode.
4228  *
4229  * The actual data type (returned in 'typid', 'typmod') is determined by
4230  * the presence of date/time/zone components in the format string.
4231  *
4232  * When timezone component is present, the corresponding offset is
4233  * returned in '*tz'.
4234  *
4235  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
4236  * and zero value is returned.
4237  */
4238 Datum
4239 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4240  Oid *typid, int32 *typmod, int *tz,
4241  bool *have_error)
4242 {
4243  struct pg_tm tm;
4244  fsec_t fsec;
4245  int fprec;
4246  uint32 flags;
4247 
4248  do_to_timestamp(date_txt, fmt, collid, strict,
4249  &tm, &fsec, &fprec, &flags, have_error);
4250  CHECK_ERROR;
4251 
4252  *typmod = fprec ? fprec : -1; /* fractional part precision */
4253 
4254  if (flags & DCH_DATED)
4255  {
4256  if (flags & DCH_TIMED)
4257  {
4258  if (flags & DCH_ZONED)
4259  {
4260  TimestampTz result;
4261 
4262  if (tm.tm_zone)
4263  {
4264  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4265 
4266  if (dterr)
4267  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4268  }
4269  else
4270  {
4271  /*
4272  * Time zone is present in format string, but not in input
4273  * string. Assuming do_to_timestamp() triggers no error
4274  * this should be possible only in non-strict case.
4275  */
4276  Assert(!strict);
4277 
4279  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4280  errmsg("missing time zone in input string for type timestamptz"))));
4281  }
4282 
4283  if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4285  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4286  errmsg("timestamptz out of range"))));
4287 
4288  AdjustTimestampForTypmod(&result, *typmod);
4289 
4290  *typid = TIMESTAMPTZOID;
4291  return TimestampTzGetDatum(result);
4292  }
4293  else
4294  {
4295  Timestamp result;
4296 
4297  if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4299  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4300  errmsg("timestamp out of range"))));
4301 
4302  AdjustTimestampForTypmod(&result, *typmod);
4303 
4304  *typid = TIMESTAMPOID;
4305  return TimestampGetDatum(result);
4306  }
4307  }
4308  else
4309  {
4310  if (flags & DCH_ZONED)
4311  {
4313  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4314  errmsg("datetime format is zoned but not timed"))));
4315  }
4316  else
4317  {
4318  DateADT result;
4319 
4320  /* Prevent overflow in Julian-day routines */
4321  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4323  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4324  errmsg("date out of range: \"%s\"",
4325  text_to_cstring(date_txt)))));
4326 
4327  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4329 
4330  /* Now check for just-out-of-range dates */
4331  if (!IS_VALID_DATE(result))
4333  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4334  errmsg("date out of range: \"%s\"",
4335  text_to_cstring(date_txt)))));
4336 
4337  *typid = DATEOID;
4338  return DateADTGetDatum(result);
4339  }
4340  }
4341  }
4342  else if (flags & DCH_TIMED)
4343  {
4344  if (flags & DCH_ZONED)
4345  {
4346  TimeTzADT *result = palloc(sizeof(TimeTzADT));
4347 
4348  if (tm.tm_zone)
4349  {
4350  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4351 
4352  if (dterr)
4353  RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
4354  }
4355  else
4356  {
4357  /*
4358  * Time zone is present in format string, but not in input
4359  * string. Assuming do_to_timestamp() triggers no error this
4360  * should be possible only in non-strict case.
4361  */
4362  Assert(!strict);
4363 
4365  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4366  errmsg("missing time zone in input string for type timetz"))));
4367  }
4368 
4369  if (tm2timetz(&tm, fsec, *tz, result) != 0)
4371  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4372  errmsg("timetz out of range"))));
4373 
4374  AdjustTimeForTypmod(&result->time, *typmod);
4375 
4376  *typid = TIMETZOID;
4377  return TimeTzADTPGetDatum(result);
4378  }
4379  else
4380  {
4381  TimeADT result;
4382 
4383  if (tm2time(&tm, fsec, &result) != 0)
4385  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4386  errmsg("time out of range"))));
4387 
4388  AdjustTimeForTypmod(&result, *typmod);
4389 
4390  *typid = TIMEOID;
4391  return TimeADTGetDatum(result);
4392  }
4393  }
4394  else
4395  {
4397  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4398  errmsg("datetime format is not dated and not timed"))));
4399  }
4400 
4401 on_error:
4402  return (Datum) 0;
4403 }
4404 
4405 /*
4406  * do_to_timestamp: shared code for to_timestamp and to_date
4407  *
4408  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4409  * fractional seconds, and fractional precision.
4410  *
4411  * 'collid' identifies the collation to use, if needed.
4412  * 'std' specifies standard parsing mode.
4413  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4414  * if that is not NULL.
4415  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
4416  *
4417  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4418  * DCH_from_char to populate a TmFromChar with the parsed contents of
4419  * 'date_txt'.
4420  *
4421  * The TmFromChar is then analysed and converted into the final results in
4422  * struct 'tm', 'fsec', and 'fprec'.
4423  */
4424 static void
4425 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4426  struct pg_tm *tm, fsec_t *fsec, int *fprec,
4427  uint32 *flags, bool *have_error)
4428 {
4429  FormatNode *format = NULL;
4430  TmFromChar tmfc;
4431  int fmt_len;
4432  char *date_str;
4433  int fmask;
4434  bool incache = false;
4435 
4436  Assert(tm != NULL);
4437  Assert(fsec != NULL);
4438 
4439  date_str = text_to_cstring(date_txt);
4440 
4441  ZERO_tmfc(&tmfc);
4442  ZERO_tm(tm);
4443  *fsec = 0;
4444  if (fprec)
4445  *fprec = 0;
4446  if (flags)
4447  *flags = 0;
4448  fmask = 0; /* bit mask for ValidateDate() */
4449 
4450  fmt_len = VARSIZE_ANY_EXHDR(fmt);
4451 
4452  if (fmt_len)
4453  {
4454  char *fmt_str;
4455 
4456  fmt_str = text_to_cstring(fmt);
4457 
4458  if (fmt_len > DCH_CACHE_SIZE)
4459  {
4460  /*
4461  * Allocate new memory if format picture is bigger than static
4462  * cache and do not use cache (call parser always)
4463  */
4464  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4465 
4466  parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4467  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4468  }
4469  else
4470  {
4471  /*
4472  * Use cache buffers
4473  */
4474  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4475 
4476  incache = true;
4477  format = ent->format;
4478  }
4479 
4480 #ifdef DEBUG_TO_FROM_CHAR
4481  /* dump_node(format, fmt_len); */
4482  /* dump_index(DCH_keywords, DCH_index); */
4483 #endif
4484 
4485  DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
4486  CHECK_ERROR;
4487 
4488  pfree(fmt_str);
4489 
4490  if (flags)
4491  *flags = DCH_datetime_type(format, have_error);
4492 
4493  if (!incache)
4494  {
4495  pfree(format);
4496  format = NULL;
4497  }
4498 
4499  CHECK_ERROR;
4500  }
4501 
4502  DEBUG_TMFC(&tmfc);
4503 
4504  /*
4505  * Convert to_date/to_timestamp input fields to standard 'tm'
4506  */
4507  if (tmfc.ssss)
4508  {
4509  int x = tmfc.ssss;
4510 
4511  tm->tm_hour = x / SECS_PER_HOUR;
4512  x %= SECS_PER_HOUR;
4513  tm->tm_min = x / SECS_PER_MINUTE;
4514  x %= SECS_PER_MINUTE;
4515  tm->tm_sec = x;
4516  }
4517 
4518  if (tmfc.ss)
4519  tm->tm_sec = tmfc.ss;
4520  if (tmfc.mi)
4521  tm->tm_min = tmfc.mi;
4522  if (tmfc.hh)
4523  tm->tm_hour = tmfc.hh;
4524 
4525  if (tmfc.clock == CLOCK_12_HOUR)
4526  {
4527  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4528  {
4530  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4531  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4532  tm->tm_hour),
4533  errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
4534  }
4535 
4536  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4537  tm->tm_hour += HOURS_PER_DAY / 2;
4538  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4539  tm->tm_hour = 0;
4540  }
4541 
4542  if (tmfc.year)
4543  {
4544  /*
4545  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4546  * the year in the given century. Keep in mind that the 21st century
4547  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4548  * 600BC to 501BC.
4549  */
4550  if (tmfc.cc && tmfc.yysz <= 2)
4551  {
4552  if (tmfc.bc)
4553  tmfc.cc = -tmfc.cc;
4554  tm->tm_year = tmfc.year % 100;
4555  if (tm->tm_year)
4556  {
4557  if (tmfc.cc >= 0)
4558  tm->tm_year += (tmfc.cc - 1) * 100;
4559  else
4560  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4561  }
4562  else
4563  {
4564  /* find century year for dates ending in "00" */
4565  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4566  }
4567  }
4568  else
4569  {
4570  /* If a 4-digit year is provided, we use that and ignore CC. */
4571  tm->tm_year = tmfc.year;
4572  if (tmfc.bc)
4573  tm->tm_year = -tm->tm_year;
4574  /* correct for our representation of BC years */
4575  if (tm->tm_year < 0)
4576  tm->tm_year++;
4577  }
4578  fmask |= DTK_M(YEAR);
4579  }
4580  else if (tmfc.cc)
4581  {
4582  /* use first year of century */
4583  if (tmfc.bc)
4584  tmfc.cc = -tmfc.cc;
4585  if (tmfc.cc >= 0)
4586  /* +1 because 21st century started in 2001 */
4587  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4588  else
4589  /* +1 because year == 599 is 600 BC */
4590  tm->tm_year = tmfc.cc * 100 + 1;
4591  fmask |= DTK_M(YEAR);
4592  }
4593 
4594  if (tmfc.j)
4595  {
4596  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4597  fmask |= DTK_DATE_M;
4598  }
4599 
4600  if (tmfc.ww)
4601  {
4602  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4603  {
4604  /*
4605  * If tmfc.d is not set, then the date is left at the beginning of
4606  * the ISO week (Monday).
4607  */
4608  if (tmfc.d)
4609  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4610  else
4611  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4612  fmask |= DTK_DATE_M;
4613  }
4614  else
4615  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4616  }
4617 
4618  if (tmfc.w)
4619  tmfc.dd = (tmfc.w - 1) * 7 + 1;
4620  if (tmfc.dd)
4621  {
4622  tm->tm_mday = tmfc.dd;
4623  fmask |= DTK_M(DAY);
4624  }
4625  if (tmfc.mm)
4626  {
4627  tm->tm_mon = tmfc.mm;
4628  fmask |= DTK_M(MONTH);
4629  }
4630 
4631  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4632  {
4633  /*
4634  * The month and day field have not been set, so we use the
4635  * day-of-year field to populate them. Depending on the date mode,
4636  * this field may be interpreted as a Gregorian day-of-year, or an ISO
4637  * week date day-of-year.
4638  */
4639 
4640  if (!tm->tm_year && !tmfc.bc)
4641  {
4643  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4644  errmsg("cannot calculate day of year without year information"))));
4645  }
4646 
4647  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4648  {
4649  int j0; /* zeroth day of the ISO year, in Julian */
4650 
4651  j0 = isoweek2j(tm->tm_year, 1) - 1;
4652 
4653  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4654  fmask |= DTK_DATE_M;
4655  }
4656  else
4657  {
4658  const int *y;
4659  int i;
4660 
4661  static const int ysum[2][13] = {
4662  {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4663  {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4664 
4665  y = ysum[isleap(tm->tm_year)];
4666 
4667  for (i = 1; i <= MONTHS_PER_YEAR; i++)
4668  {
4669  if (tmfc.ddd <= y[i])
4670  break;
4671  }
4672  if (tm->tm_mon <= 1)
4673  tm->tm_mon = i;
4674 
4675  if (tm->tm_mday <= 1)
4676  tm->tm_mday = tmfc.ddd - y[i - 1];
4677 
4678  fmask |= DTK_M(MONTH) | DTK_M(DAY);
4679  }
4680  }
4681 
4682  if (tmfc.ms)
4683  *fsec += tmfc.ms * 1000;
4684  if (tmfc.us)
4685  *fsec += tmfc.us;
4686  if (fprec)
4687  *fprec = tmfc.ff; /* fractional precision, if specified */
4688 
4689  /* Range-check date fields according to bit mask computed above */
4690  if (fmask != 0)
4691  {
4692  /* We already dealt with AD/BC, so pass isjulian = true */
4693  int dterr = ValidateDate(fmask, true, false, false, tm);
4694 
4695  if (dterr != 0)
4696  {
4697  /*
4698  * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4699  * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4700  * irrelevant hint about datestyle.
4701  */
4702  RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4703  }
4704  }
4705 
4706  /* Range-check time fields too */
4707  if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4708  tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4709  tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4710  *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4711  {
4712  RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4713  }
4714 
4715  /* Save parsed time-zone into tm->tm_zone if it was specified */
4716  if (tmfc.tzsign)
4717  {
4718  char *tz;
4719 
4720  if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4721  tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4722  {
4723  RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
4724  }
4725 
4726  tz = psprintf("%c%02d:%02d",
4727  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4728 
4729  tm->tm_zone = tz;
4730  }
4731 
4732  DEBUG_TM(tm);
4733 
4734 on_error:
4735 
4736  if (format && !incache)
4737  pfree(format);
4738 
4739  pfree(date_str);
4740 }
4741 
4742 
4743 /**********************************************************************
4744  * the NUMBER version part
4745  *********************************************************************/
4746 
4747 
4748 static char *
4749 fill_str(char *str, int c, int max)
4750 {
4751  memset(str, c, max);
4752  *(str + max) = '\0';
4753  return str;
4754 }
4755 
4756 #define zeroize_NUM(_n) \
4757 do { \
4758  (_n)->flag = 0; \
4759  (_n)->lsign = 0; \
4760  (_n)->pre = 0; \
4761  (_n)->post = 0; \
4762  (_n)->pre_lsign_num = 0; \
4763  (_n)->need_locale = 0; \
4764  (_n)->multi = 0; \
4765  (_n)->zero_start = 0; \
4766  (_n)->zero_end = 0; \
4767 } while(0)
4768 
4769 /* This works the same as DCH_prevent_counter_overflow */
4770 static inline void
4772 {
4773  if (NUMCounter >= (INT_MAX - 1))
4774  {
4775  for (int i = 0; i < n_NUMCache; i++)
4776  NUMCache[i]->age >>= 1;
4777  NUMCounter >>= 1;
4778  }
4779 }
4780 
4781 /* select a NUMCacheEntry to hold the given format picture */
4782 static NUMCacheEntry *
4783 NUM_cache_getnew(const char *str)
4784 {
4785  NUMCacheEntry *ent;
4786 
4787  /* Ensure we can advance NUMCounter below */
4789 
4790  /*
4791  * If cache is full, remove oldest entry (or recycle first not-valid one)
4792  */
4794  {
4795  NUMCacheEntry *old = NUMCache[0];
4796 
4797 #ifdef DEBUG_TO_FROM_CHAR
4798  elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4799 #endif
4800  if (old->valid)
4801  {
4802  for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4803  {
4804  ent = NUMCache[i];
4805  if (!ent->valid)
4806  {
4807  old = ent;
4808  break;
4809  }
4810  if (ent->age < old->age)
4811  old = ent;
4812  }
4813  }
4814 #ifdef DEBUG_TO_FROM_CHAR
4815  elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4816 #endif
4817  old->valid = false;
4818  strlcpy(old->str, str, NUM_CACHE_SIZE + 1);
4819  old->age = (++NUMCounter);
4820  /* caller is expected to fill format and Num, then set valid */
4821  return old;
4822  }
4823  else
4824  {
4825 #ifdef DEBUG_TO_FROM_CHAR
4826  elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4827 #endif
4828  Assert(NUMCache[n_NUMCache] == NULL);
4829  NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4831  ent->valid = false;
4832  strlcpy(ent->str, str, NUM_CACHE_SIZE + 1);
4833  ent->age = (++NUMCounter);
4834  /* caller is expected to fill format and Num, then set valid */
4835  ++n_NUMCache;
4836  return ent;
4837  }
4838 }
4839 
4840 /* look for an existing NUMCacheEntry matching the given format picture */
4841 static NUMCacheEntry *
4842 NUM_cache_search(const char *str)
4843 {
4844  /* Ensure we can advance NUMCounter below */
4846 
4847  for (int i = 0; i < n_NUMCache; i++)
4848  {
4849  NUMCacheEntry *ent = NUMCache[i];
4850 
4851  if (ent->valid && strcmp(ent->str, str) == 0)
4852  {
4853  ent->age = (++NUMCounter);
4854  return ent;
4855  }
4856  }
4857 
4858  return NULL;
4859 }
4860 
4861 /* Find or create a NUMCacheEntry for the given format picture */
4862 static NUMCacheEntry *
4863 NUM_cache_fetch(const char *str)
4864 {
4865  NUMCacheEntry *ent;
4866 
4867  if ((ent = NUM_cache_search(str)) == NULL)
4868  {
4869  /*
4870  * Not in the cache, must run parser and save a new format-picture to
4871  * the cache. Do not mark the cache entry valid until parsing
4872  * succeeds.
4873  */
4874  ent = NUM_cache_getnew(str);
4875 
4876  zeroize_NUM(&ent->Num);
4877 
4878  parse_format(ent->format, str, NUM_keywords,
4879  NULL, NUM_index, NUM_FLAG, &ent->Num);
4880 
4881  ent->valid = true;
4882  }
4883  return ent;
4884 }
4885 
4886 /* ----------
4887  * Cache routine for NUM to_char version
4888  * ----------
4889  */
4890 static FormatNode *
4891 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4892 {
4893  FormatNode *format = NULL;
4894  char *str;
4895 
4896  str = text_to_cstring(pars_str);
4897 
4898  if (len > NUM_CACHE_SIZE)
4899  {
4900  /*
4901  * Allocate new memory if format picture is bigger than static cache
4902  * and do not use cache (call parser always)
4903  */
4904  format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4905 
4906  *shouldFree = true;
4907 
4908  zeroize_NUM(Num);
4909 
4910  parse_format(format, str, NUM_keywords,
4911  NULL, NUM_index, NUM_FLAG, Num);
4912  }
4913  else
4914  {
4915  /*
4916  * Use cache buffers
4917  */
4918  NUMCacheEntry *ent = NUM_cache_fetch(str);
4919 
4920  *shouldFree = false;
4921 
4922  format = ent->format;
4923 
4924  /*
4925  * Copy cache to used struct
4926  */
4927  Num->flag = ent->Num.flag;
4928  Num->lsign = ent->Num.lsign;
4929  Num->pre = ent->Num.pre;
4930  Num->post = ent->Num.post;
4931  Num->pre_lsign_num = ent->Num.pre_lsign_num;
4932  Num->need_locale = ent->Num.need_locale;
4933  Num->multi = ent->Num.multi;
4934  Num->zero_start = ent->Num.zero_start;
4935  Num->zero_end = ent->Num.zero_end;
4936  }
4937 
4938 #ifdef DEBUG_TO_FROM_CHAR
4939  /* dump_node(format, len); */
4940  dump_index(NUM_keywords, NUM_index);
4941 #endif
4942 
4943  pfree(str);
4944  return format;
4945 }
4946 
4947 
4948 static char *
4949 int_to_roman(int number)
4950 {
4951  int len,
4952  num;
4953  char *p,
4954  *result,
4955  numstr[12];
4956 
4957  result = (char *) palloc(16);
4958  *result = '\0';
4959 
4960  if (number > 3999 || number < 1)
4961  {
4962  fill_str(result, '#', 15);
4963  return result;
4964  }
4965  len = snprintf(numstr, sizeof(numstr), "%d", number);
4966 
4967  for (p = numstr; *p != '\0'; p++, --len)
4968  {
4969  num = *p - ('0' + 1);
4970  if (num < 0)
4971  continue;
4972 
4973  if (len > 3)
4974  {
4975  while (num-- != -1)
4976  strcat(result, "M");
4977  }
4978  else
4979  {
4980  if (len == 3)
4981  strcat(result, rm100[num]);
4982  else if (len == 2)
4983  strcat(result, rm10[num]);
4984  else if (len == 1)
4985  strcat(result, rm1[num]);
4986  }
4987  }
4988  return result;
4989 }
4990 
4991 
4992 
4993 /* ----------
4994  * Locale
4995  * ----------
4996  */
4997 static void
4999 {
5000  if (Np->Num->need_locale)
5001  {
5002  struct lconv *lconv;
5003 
5004  /*
5005  * Get locales
5006  */
5007  lconv = PGLC_localeconv();
5008 
5009  /*
5010  * Positive / Negative number sign
5011  */
5012  if (lconv->negative_sign && *lconv->negative_sign)
5013  Np->L_negative_sign = lconv->negative_sign;
5014  else
5015  Np->L_negative_sign = "-";
5016 
5017  if (lconv->positive_sign && *lconv->positive_sign)
5018  Np->L_positive_sign = lconv->positive_sign;
5019  else
5020  Np->L_positive_sign = "+";
5021 
5022  /*
5023  * Number decimal point
5024  */
5025  if (lconv->decimal_point && *lconv->decimal_point)
5026  Np->decimal = lconv->decimal_point;
5027 
5028  else
5029  Np->decimal = ".";
5030 
5031  if (!IS_LDECIMAL(Np->Num))
5032  Np->decimal = ".";
5033 
5034  /*
5035  * Number thousands separator
5036  *
5037  * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5038  * but "" for thousands_sep, so we set the thousands_sep too.
5039  * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5040  */
5041  if (lconv->thousands_sep && *lconv->thousands_sep)
5042  Np->L_thousands_sep = lconv->thousands_sep;
5043  /* Make sure thousands separator doesn't match decimal point symbol. */
5044  else if (strcmp(Np->decimal, ",") != 0)
5045  Np->L_thousands_sep = ",";
5046  else
5047  Np->L_thousands_sep = ".";
5048 
5049  /*
5050  * Currency symbol
5051  */
5052  if (lconv->currency_symbol && *lconv->currency_symbol)
5053  Np->L_currency_symbol = lconv->currency_symbol;
5054  else
5055  Np->L_currency_symbol = " ";
5056  }
5057  else
5058  {
5059  /*
5060  * Default values
5061  */
5062  Np->L_negative_sign = "-";
5063  Np->L_positive_sign = "+";
5064  Np->decimal = ".";
5065 
5066  Np->L_thousands_sep = ",";
5067  Np->L_currency_symbol = " ";
5068  }
5069 }
5070 
5071 /* ----------
5072  * Return pointer of last relevant number after decimal point
5073  * 12.0500 --> last relevant is '5'
5074  * 12.0000 --> last relevant is '.'
5075  * If there is no decimal point, return NULL (which will result in same
5076  * behavior as if FM hadn't been specified).
5077  * ----------
5078  */
5079 static char *
5081 {
5082  char *result,
5083  *p = strchr(num, '.');
5084 
5085 #ifdef DEBUG_TO_FROM_CHAR
5086  elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5087 #endif
5088 
5089  if (!p)
5090  return NULL;
5091 
5092  result = p;
5093 
5094  while (*(++p))
5095  {
5096  if (*p != '0')
5097  result = p;
5098  }
5099 
5100  return result;
5101 }
5102 
5103 /*
5104  * These macros are used in NUM_processor() and its subsidiary routines.
5105  * OVERLOAD_TEST: true if we've reached end of input string
5106  * AMOUNT_TEST(s): true if at least s bytes remain in string
5107  */
5108 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
5109 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
5110 
5111 /* ----------
5112  * Number extraction for TO_NUMBER()
5113  * ----------
5114  */
5115 static void
5116 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5117 {
5118  bool isread = false;
5119 
5120 #ifdef DEBUG_TO_FROM_CHAR
5121  elog(DEBUG_elog_output, " --- scan start --- id=%s",
5122  (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5123 #endif
5124 
5125  if (OVERLOAD_TEST)
5126  return;
5127 
5128  if (*Np->inout_p == ' ')
5129  Np->inout_p++;
5130 
5131  if (OVERLOAD_TEST)
5132  return;
5133 
5134  /*
5135  * read sign before number
5136  */
5137  if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5138  (Np->read_pre + Np->read_post) == 0)
5139  {
5140 #ifdef DEBUG_TO_FROM_CHAR
5141  elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5142  *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5143 #endif
5144 
5145  /*
5146  * locale sign
5147  */
5148  if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5149  {
5150  int x = 0;
5151 
5152 #ifdef DEBUG_TO_FROM_CHAR
5153  elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5154 #endif
5155  if ((x = strlen(Np->L_negative_sign)) &&
5156  AMOUNT_TEST(x) &&
5157  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5158  {
5159  Np->inout_p += x;
5160  *Np->number = '-';
5161  }
5162  else if ((x = strlen(Np->L_positive_sign)) &&
5163  AMOUNT_TEST(x) &&
5164  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5165  {
5166  Np->inout_p += x;
5167  *Np->number = '+';
5168  }
5169  }
5170  else
5171  {
5172 #ifdef DEBUG_TO_FROM_CHAR
5173  elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5174 #endif
5175 
5176  /*
5177  * simple + - < >
5178  */
5179  if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5180  *Np->inout_p == '<'))
5181  {
5182  *Np->number = '-'; /* set - */
5183  Np->inout_p++;
5184  }
5185  else if (*Np->inout_p == '+')
5186  {
5187  *Np->number = '+'; /* set + */
5188  Np->inout_p++;
5189  }
5190  }
5191  }
5192 
5193  if (OVERLOAD_TEST)
5194  return;
5195 
5196 #ifdef DEBUG_TO_FROM_CHAR
5197  elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5198 #endif
5199 
5200  /*
5201  * read digit or decimal point
5202  */
5203  if (isdigit((unsigned char) *Np->inout_p))
5204  {
5205  if (Np->read_dec && Np->read_post == Np->Num->post)
5206  return;
5207 
5208  *Np->number_p = *Np->inout_p;
5209  Np->number_p++;
5210 
5211  if (Np->read_dec)
5212  Np->read_post++;
5213  else
5214  Np->read_pre++;
5215 
5216  isread = true;
5217 
5218 #ifdef DEBUG_TO_FROM_CHAR
5219  elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5220 #endif
5221  }
5222  else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5223  {
5224  /*
5225  * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5226  * Np->decimal is always just "." if we don't have a D format token.
5227  * So we just unconditionally match to Np->decimal.
5228  */
5229  int x = strlen(Np->decimal);
5230 
5231 #ifdef DEBUG_TO_FROM_CHAR
5232  elog(DEBUG_elog_output, "Try read decimal point (%c)",
5233  *Np->inout_p);
5234 #endif
5235  if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5236  {
5237  Np->inout_p += x - 1;
5238  *Np->number_p = '.';
5239  Np->number_p++;
5240  Np->read_dec = true;
5241  isread = true;
5242  }
5243  }
5244 
5245  if (OVERLOAD_TEST)
5246  return;
5247 
5248  /*
5249  * Read sign behind "last" number
5250  *
5251  * We need sign detection because determine exact position of post-sign is
5252  * difficult:
5253  *
5254  * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
5255  * 5.01-
5256  */
5257  if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5258  {
5259  /*
5260  * locale sign (NUM_S) is always anchored behind a last number, if: -
5261  * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5262  * next char is not digit
5263  */
5264  if (IS_LSIGN(Np->Num) && isread &&
5265  (Np->inout_p + 1) < Np->inout + input_len &&
5266  !isdigit((unsigned char) *(Np->inout_p + 1)))
5267  {
5268  int x;
5269  char *tmp = Np->inout_p++;
5270 
5271 #ifdef DEBUG_TO_FROM_CHAR
5272  elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5273 #endif
5274  if ((x = strlen(Np->L_negative_sign)) &&
5275  AMOUNT_TEST(x) &&
5276  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5277  {
5278  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5279  *Np->number = '-';
5280  }
5281