PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2019, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for roman number to standard number conversion
53  * - add support for number spelling
54  * - add support for string to string formatting (we must be better
55  * than Oracle :-),
56  * to_char('Hello', 'X X X X X') -> 'H e l l o'
57  *
58  * -----------------------------------------------------------------------
59  */
60 
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
64 
65 #include "postgres.h"
66 
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 
73 /*
74  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
75  * declare them in <wchar.h>.
76  */
77 #ifdef HAVE_WCHAR_H
78 #include <wchar.h>
79 #endif
80 #ifdef HAVE_WCTYPE_H
81 #include <wctype.h>
82 #endif
83 
84 #ifdef USE_ICU
85 #include <unicode/ustring.h>
86 #endif
87 
88 #include "catalog/pg_collation.h"
89 #include "catalog/pg_type.h"
90 #include "mb/pg_wchar.h"
91 #include "utils/builtins.h"
92 #include "utils/date.h"
93 #include "utils/datetime.h"
94 #include "utils/float.h"
95 #include "utils/formatting.h"
96 #include "utils/int8.h"
97 #include "utils/memutils.h"
98 #include "utils/numeric.h"
99 #include "utils/pg_locale.h"
100 
101 /* ----------
102  * Convenience macros for error handling
103  * ----------
104  *
105  * Two macros below help to handle errors in functions that take
106  * 'bool *have_error' argument. When this argument is not NULL, it's expected
107  * that function will suppress ereports when possible. Instead it should
108  * return some default value and set *have_error flag.
109  *
110  * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error
111  * function argument is not NULL, then instead of ereport'ing we set
112  * *have_error flag and go to on_error label. It's supposed that jump
113  * resources will be freed and some 'default' value returned.
114  *
115  * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
116  * It's supposed to be used for immediate exit from the function on error
117  * after call of another function with 'bool *have_error' argument.
118  */
119 #define RETURN_ERROR(throw_error) \
120 do { \
121  if (have_error) \
122  { \
123  *have_error = true; \
124  goto on_error; \
125  } \
126  else \
127  { \
128  throw_error; \
129  } \
130 } while (0)
131 
132 #define CHECK_ERROR \
133 do { \
134  if (have_error && *have_error) \
135  goto on_error; \
136 } while (0)
137 
138 /* ----------
139  * Routines flags
140  * ----------
141  */
142 #define DCH_FLAG 0x1 /* DATE-TIME flag */
143 #define NUM_FLAG 0x2 /* NUMBER flag */
144 #define STD_FLAG 0x4 /* STANDARD flag */
145 
146 /* ----------
147  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
148  * ----------
149  */
150 #define KeyWord_INDEX_SIZE ('~' - ' ')
151 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
152 
153 /* ----------
154  * Maximal length of one node
155  * ----------
156  */
157 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
158 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
159 
160 
161 /* ----------
162  * Format parser structs
163  * ----------
164  */
165 typedef struct
166 {
167  const char *name; /* suffix string */
168  int len, /* suffix length */
169  id, /* used in node->suffix */
170  type; /* prefix / postfix */
171 } KeySuffix;
172 
173 /* ----------
174  * FromCharDateMode
175  * ----------
176  *
177  * This value is used to nominate one of several distinct (and mutually
178  * exclusive) date conventions that a keyword can belong to.
179  */
180 typedef enum
181 {
182  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
183  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
184  FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
186 
187 typedef struct
188 {
189  const char *name;
190  int len;
191  int id;
192  bool is_digit;
194 } KeyWord;
195 
196 typedef struct
197 {
198  uint8 type; /* NODE_TYPE_XXX, see below */
199  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
200  uint8 suffix; /* keyword prefix/suffix code, if any */
201  const KeyWord *key; /* if type is ACTION */
202 } FormatNode;
203 
204 #define NODE_TYPE_END 1
205 #define NODE_TYPE_ACTION 2
206 #define NODE_TYPE_CHAR 3
207 #define NODE_TYPE_SEPARATOR 4
208 #define NODE_TYPE_SPACE 5
209 
210 #define SUFFTYPE_PREFIX 1
211 #define SUFFTYPE_POSTFIX 2
212 
213 #define CLOCK_24_HOUR 0
214 #define CLOCK_12_HOUR 1
215 
216 
217 /* ----------
218  * Full months
219  * ----------
220  */
221 static const char *const months_full[] = {
222  "January", "February", "March", "April", "May", "June", "July",
223  "August", "September", "October", "November", "December", NULL
224 };
225 
226 static const char *const days_short[] = {
227  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
228 };
229 
230 /* ----------
231  * AD / BC
232  * ----------
233  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
234  * positive and map year == -1 to year zero, and shift all negative
235  * years up one. For interval years, we just return the year.
236  */
237 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
238 
239 #define A_D_STR "A.D."
240 #define a_d_STR "a.d."
241 #define AD_STR "AD"
242 #define ad_STR "ad"
243 
244 #define B_C_STR "B.C."
245 #define b_c_STR "b.c."
246 #define BC_STR "BC"
247 #define bc_STR "bc"
248 
249 /*
250  * AD / BC strings for seq_search.
251  *
252  * These are given in two variants, a long form with periods and a standard
253  * form without.
254  *
255  * The array is laid out such that matches for AD have an even index, and
256  * matches for BC have an odd index. So the boolean value for BC is given by
257  * taking the array index of the match, modulo 2.
258  */
259 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
260 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
261 
262 /* ----------
263  * AM / PM
264  * ----------
265  */
266 #define A_M_STR "A.M."
267 #define a_m_STR "a.m."
268 #define AM_STR "AM"
269 #define am_STR "am"
270 
271 #define P_M_STR "P.M."
272 #define p_m_STR "p.m."
273 #define PM_STR "PM"
274 #define pm_STR "pm"
275 
276 /*
277  * AM / PM strings for seq_search.
278  *
279  * These are given in two variants, a long form with periods and a standard
280  * form without.
281  *
282  * The array is laid out such that matches for AM have an even index, and
283  * matches for PM have an odd index. So the boolean value for PM is given by
284  * taking the array index of the match, modulo 2.
285  */
286 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
287 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
288 
289 /* ----------
290  * Months in roman-numeral
291  * (Must be in reverse order for seq_search (in FROM_CHAR), because
292  * 'VIII' must have higher precedence than 'V')
293  * ----------
294  */
295 static const char *const rm_months_upper[] =
296 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
297 
298 static const char *const rm_months_lower[] =
299 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
300 
301 /* ----------
302  * Roman numbers
303  * ----------
304  */
305 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
306 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
307 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
308 
309 /* ----------
310  * Ordinal postfixes
311  * ----------
312  */
313 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
314 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
315 
316 /* ----------
317  * Flags & Options:
318  * ----------
319  */
320 #define ONE_UPPER 1 /* Name */
321 #define ALL_UPPER 2 /* NAME */
322 #define ALL_LOWER 3 /* name */
323 
324 #define MAX_MONTH_LEN 9
325 #define MAX_MON_LEN 3
326 #define MAX_DAY_LEN 9
327 #define MAX_DY_LEN 3
328 #define MAX_RM_LEN 4
329 
330 #define TH_UPPER 1
331 #define TH_LOWER 2
332 
333 /* ----------
334  * Number description struct
335  * ----------
336  */
337 typedef struct
338 {
339  int pre, /* (count) numbers before decimal */
340  post, /* (count) numbers after decimal */
341  lsign, /* want locales sign */
342  flag, /* number parameters */
343  pre_lsign_num, /* tmp value for lsign */
344  multi, /* multiplier for 'V' */
345  zero_start, /* position of first zero */
346  zero_end, /* position of last zero */
347  need_locale; /* needs it locale */
348 } NUMDesc;
349 
350 /* ----------
351  * Flags for NUMBER version
352  * ----------
353  */
354 #define NUM_F_DECIMAL (1 << 1)
355 #define NUM_F_LDECIMAL (1 << 2)
356 #define NUM_F_ZERO (1 << 3)
357 #define NUM_F_BLANK (1 << 4)
358 #define NUM_F_FILLMODE (1 << 5)
359 #define NUM_F_LSIGN (1 << 6)
360 #define NUM_F_BRACKET (1 << 7)
361 #define NUM_F_MINUS (1 << 8)
362 #define NUM_F_PLUS (1 << 9)
363 #define NUM_F_ROMAN (1 << 10)
364 #define NUM_F_MULTI (1 << 11)
365 #define NUM_F_PLUS_POST (1 << 12)
366 #define NUM_F_MINUS_POST (1 << 13)
367 #define NUM_F_EEEE (1 << 14)
368 
369 #define NUM_LSIGN_PRE (-1)
370 #define NUM_LSIGN_POST 1
371 #define NUM_LSIGN_NONE 0
372 
373 /* ----------
374  * Tests
375  * ----------
376  */
377 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
378 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
379 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
380 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
381 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
382 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
383 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
384 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
385 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
386 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
387 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
388 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
389 
390 /* ----------
391  * Format picture cache
392  *
393  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
394  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
395  *
396  * For simplicity, the cache entries are fixed-size, so they allow for the
397  * worst case of a FormatNode for each byte in the picture string.
398  *
399  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
400  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
401  * we don't waste too much space by palloc'ing them individually. Be sure
402  * to adjust those macros if you add fields to those structs.
403  *
404  * The max number of entries in each cache is DCH_CACHE_ENTRIES
405  * resp. NUM_CACHE_ENTRIES.
406  * ----------
407  */
408 #define DCH_CACHE_OVERHEAD \
409  MAXALIGN(sizeof(bool) + sizeof(int))
410 #define NUM_CACHE_OVERHEAD \
411  MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
412 
413 #define DCH_CACHE_SIZE \
414  ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
415 #define NUM_CACHE_SIZE \
416  ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
417 
418 #define DCH_CACHE_ENTRIES 20
419 #define NUM_CACHE_ENTRIES 20
420 
421 typedef struct
422 {
424  char str[DCH_CACHE_SIZE + 1];
425  bool std;
426  bool valid;
427  int age;
428 } DCHCacheEntry;
429 
430 typedef struct
431 {
433  char str[NUM_CACHE_SIZE + 1];
434  bool valid;
435  int age;
437 } NUMCacheEntry;
438 
439 /* global cache for date/time format pictures */
441 static int n_DCHCache = 0; /* current number of entries */
442 static int DCHCounter = 0; /* aging-event counter */
443 
444 /* global cache for number format pictures */
446 static int n_NUMCache = 0; /* current number of entries */
447 static int NUMCounter = 0; /* aging-event counter */
448 
449 /* ----------
450  * For char->date/time conversion
451  * ----------
452  */
453 typedef struct
454 {
456  int hh,
457  pm,
458  mi,
459  ss,
460  ssss,
461  d, /* stored as 1-7, Sunday = 1, 0 means missing */
462  dd,
463  ddd,
464  mm,
465  ms,
466  year,
467  bc,
468  ww,
469  w,
470  cc,
471  j,
472  us,
473  yysz, /* is it YY or YYYY ? */
474  clock, /* 12 or 24 hour clock? */
475  tzsign, /* +1, -1 or 0 if timezone info is absent */
476  tzh,
477  tzm,
478  ff; /* fractional precision */
479 } TmFromChar;
480 
481 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
482 
483 /* ----------
484  * Debug
485  * ----------
486  */
487 #ifdef DEBUG_TO_FROM_CHAR
488 #define DEBUG_TMFC(_X) \
489  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
490  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
491  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
492  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
493  (_X)->yysz, (_X)->clock);
494 #define DEBUG_TM(_X) \
495  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
496  (_X)->tm_sec, (_X)->tm_year,\
497  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
498  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
499 #else
500 #define DEBUG_TMFC(_X)
501 #define DEBUG_TM(_X)
502 #endif
503 
504 /* ----------
505  * Datetime to char conversion
506  * ----------
507  */
508 typedef struct TmToChar
509 {
510  struct pg_tm tm; /* classic 'tm' struct */
511  fsec_t fsec; /* fractional seconds */
512  const char *tzn; /* timezone */
513 } TmToChar;
514 
515 #define tmtcTm(_X) (&(_X)->tm)
516 #define tmtcTzn(_X) ((_X)->tzn)
517 #define tmtcFsec(_X) ((_X)->fsec)
518 
519 #define ZERO_tm(_X) \
520 do { \
521  (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
522  (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
523  (_X)->tm_mday = (_X)->tm_mon = 1; \
524  (_X)->tm_zone = NULL; \
525 } while(0)
526 
527 #define ZERO_tmtc(_X) \
528 do { \
529  ZERO_tm( tmtcTm(_X) ); \
530  tmtcFsec(_X) = 0; \
531  tmtcTzn(_X) = NULL; \
532 } while(0)
533 
534 /*
535  * to_char(time) appears to to_char() as an interval, so this check
536  * is really for interval and time data types.
537  */
538 #define INVALID_FOR_INTERVAL \
539 do { \
540  if (is_interval) \
541  ereport(ERROR, \
542  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
543  errmsg("invalid format specification for an interval value"), \
544  errhint("Intervals are not tied to specific calendar dates."))); \
545 } while(0)
546 
547 /*****************************************************************************
548  * KeyWord definitions
549  *****************************************************************************/
550 
551 /* ----------
552  * Suffixes (FormatNode.suffix is an OR of these codes)
553  * ----------
554  */
555 #define DCH_S_FM 0x01
556 #define DCH_S_TH 0x02
557 #define DCH_S_th 0x04
558 #define DCH_S_SP 0x08
559 #define DCH_S_TM 0x10
560 
561 /* ----------
562  * Suffix tests
563  * ----------
564  */
565 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
566 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
567 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
568 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
569 
570 /* Oracle toggles FM behavior, we don't; see docs. */
571 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
572 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
573 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
574 
575 /* ----------
576  * Suffixes definition for DATE-TIME TO/FROM CHAR
577  * ----------
578  */
579 #define TM_SUFFIX_LEN 2
580 
581 static const KeySuffix DCH_suff[] = {
582  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
583  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
584  {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
585  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
586  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
587  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
588  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
589  /* last */
590  {NULL, 0, 0, 0}
591 };
592 
593 
594 /* ----------
595  * Format-pictures (KeyWord).
596  *
597  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
598  * complicated -to-> easy:
599  *
600  * (example: "DDD","DD","Day","D" )
601  *
602  * (this specific sort needs the algorithm for sequential search for strings,
603  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
604  * or "HH12"? You must first try "HH12", because "HH" is in string, but
605  * it is not good.
606  *
607  * (!)
608  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
609  * (!)
610  *
611  * For fast search is used the 'int index[]', index is ascii table from position
612  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
613  * position or -1 if char is not used in the KeyWord. Search example for
614  * string "MM":
615  * 1) see in index to index['M' - 32],
616  * 2) take keywords position (enum DCH_MI) from index
617  * 3) run sequential search in keywords[] from this position
618  *
619  * ----------
620  */
621 
622 typedef enum
623 {
644  DCH_FX, /* global suffix */
733 
734  /* last */
736 } DCH_poz;
737 
738 typedef enum
739 {
776 
777  /* last */
779 } NUM_poz;
780 
781 /* ----------
782  * KeyWords for DATE-TIME version
783  * ----------
784  */
785 static const KeyWord DCH_keywords[] = {
786 /* name, len, id, is_digit, date_mode */
787  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
788  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
789  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
790  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
791  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
792  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
793  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
794  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
795  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
796  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
797  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
798  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
799  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
800  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
801  {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
802  {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
803  {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
804  {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
805  {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
806  {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
807  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
808  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
809  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
810  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
811  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
812  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
813  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
814  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
815  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
816  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
817  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
818  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
819  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
820  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
821  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
822  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
823  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
824  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
825  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
826  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
827  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
828  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
829  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
830  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
831  {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
832  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
833  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
834  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
835  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
836  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
837  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
838  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
839  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
840  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
841  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
842  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
843  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
844  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
845  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
846  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
847  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
848  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
849  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
850  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
851  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
852  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
853  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
854  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
855  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
856  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
857  {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
858  {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
859  {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
860  {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
861  {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
862  {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
863  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
864  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
865  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
866  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
867  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
868  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
869  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
870  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
871  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
872  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
873  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
874  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
875  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
876  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
877  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
878  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
879  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
880  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
881  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
882  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
883  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
884  {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
885  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
886  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
887  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
888  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
889  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
890  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
891  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
892  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
893  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
894  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
895  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
896 
897  /* last */
898  {NULL, 0, 0, 0, 0}
899 };
900 
901 /* ----------
902  * KeyWords for NUMBER version
903  *
904  * The is_digit and date_mode fields are not relevant here.
905  * ----------
906  */
907 static const KeyWord NUM_keywords[] = {
908 /* name, len, id is in Index */
909  {",", 1, NUM_COMMA}, /* , */
910  {".", 1, NUM_DEC}, /* . */
911  {"0", 1, NUM_0}, /* 0 */
912  {"9", 1, NUM_9}, /* 9 */
913  {"B", 1, NUM_B}, /* B */
914  {"C", 1, NUM_C}, /* C */
915  {"D", 1, NUM_D}, /* D */
916  {"EEEE", 4, NUM_E}, /* E */
917  {"FM", 2, NUM_FM}, /* F */
918  {"G", 1, NUM_G}, /* G */
919  {"L", 1, NUM_L}, /* L */
920  {"MI", 2, NUM_MI}, /* M */
921  {"PL", 2, NUM_PL}, /* P */
922  {"PR", 2, NUM_PR},
923  {"RN", 2, NUM_RN}, /* R */
924  {"SG", 2, NUM_SG}, /* S */
925  {"SP", 2, NUM_SP},
926  {"S", 1, NUM_S},
927  {"TH", 2, NUM_TH}, /* T */
928  {"V", 1, NUM_V}, /* V */
929  {"b", 1, NUM_B}, /* b */
930  {"c", 1, NUM_C}, /* c */
931  {"d", 1, NUM_D}, /* d */
932  {"eeee", 4, NUM_E}, /* e */
933  {"fm", 2, NUM_FM}, /* f */
934  {"g", 1, NUM_G}, /* g */
935  {"l", 1, NUM_L}, /* l */
936  {"mi", 2, NUM_MI}, /* m */
937  {"pl", 2, NUM_PL}, /* p */
938  {"pr", 2, NUM_PR},
939  {"rn", 2, NUM_rn}, /* r */
940  {"sg", 2, NUM_SG}, /* s */
941  {"sp", 2, NUM_SP},
942  {"s", 1, NUM_S},
943  {"th", 2, NUM_th}, /* t */
944  {"v", 1, NUM_V}, /* v */
945 
946  /* last */
947  {NULL, 0, 0}
948 };
949 
950 
951 /* ----------
952  * KeyWords index for DATE-TIME version
953  * ----------
954  */
955 static const int DCH_index[KeyWord_INDEX_SIZE] = {
956 /*
957 0 1 2 3 4 5 6 7 8 9
958 */
959  /*---- first 0..31 chars are skipped ----*/
960 
961  -1, -1, -1, -1, -1, -1, -1, -1,
962  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
963  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
964  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
965  DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
967  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
968  DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
969  -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tz, DCH_us, -1, DCH_ww,
970  -1, DCH_y_yyy, -1, -1, -1, -1
971 
972  /*---- chars over 126 are skipped ----*/
973 };
974 
975 /* ----------
976  * KeyWords index for NUMBER version
977  * ----------
978  */
979 static const int NUM_index[KeyWord_INDEX_SIZE] = {
980 /*
981 0 1 2 3 4 5 6 7 8 9
982 */
983  /*---- first 0..31 chars are skipped ----*/
984 
985  -1, -1, -1, -1, -1, -1, -1, -1,
986  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
987  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
988  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
989  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
990  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
991  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
992  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
993  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
994  -1, -1, -1, -1, -1, -1
995 
996  /*---- chars over 126 are skipped ----*/
997 };
998 
999 /* ----------
1000  * Number processor struct
1001  * ----------
1002  */
1003 typedef struct NUMProc
1004 {
1006  NUMDesc *Num; /* number description */
1007 
1008  int sign, /* '-' or '+' */
1009  sign_wrote, /* was sign write */
1010  num_count, /* number of write digits */
1011  num_in, /* is inside number */
1012  num_curr, /* current position in number */
1013  out_pre_spaces, /* spaces before first digit */
1014 
1015  read_dec, /* to_number - was read dec. point */
1016  read_post, /* to_number - number of dec. digit */
1017  read_pre; /* to_number - number non-dec. digit */
1018 
1019  char *number, /* string with number */
1020  *number_p, /* pointer to current number position */
1021  *inout, /* in / out buffer */
1022  *inout_p, /* pointer to current inout position */
1023  *last_relevant, /* last relevant number after decimal point */
1024 
1025  *L_negative_sign, /* Locale */
1026  *L_positive_sign,
1027  *decimal,
1028  *L_thousands_sep,
1029  *L_currency_symbol;
1030 } NUMProc;
1031 
1032 /* Return flags for DCH_from_char() */
1033 #define DCH_DATED 0x01
1034 #define DCH_TIMED 0x02
1035 #define DCH_ZONED 0x04
1036 
1037 /* ----------
1038  * Functions
1039  * ----------
1040  */
1041 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1042  const int *index);
1043 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1044 static bool is_separator_char(const char *str);
1045 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1046 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1047  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1048 
1049 static void DCH_to_char(FormatNode *node, bool is_interval,
1050  TmToChar *in, char *out, Oid collid);
1051 static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out,
1052  bool std, bool *have_error);
1053 
1054 #ifdef DEBUG_TO_FROM_CHAR
1055 static void dump_index(const KeyWord *k, const int *index);
1056 static void dump_node(FormatNode *node, int max);
1057 #endif
1058 
1059 static const char *get_th(char *num, int type);
1060 static char *str_numth(char *dest, char *num, int type);
1061 static int adjust_partial_year_to_2020(int year);
1062 static int strspace_len(char *str);
1063 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1064  bool *have_error);
1065 static void from_char_set_int(int *dest, const int value, const FormatNode *node,
1066  bool *have_error);
1067 static int from_char_parse_int_len(int *dest, char **src, const int len,
1068  FormatNode *node, bool *have_error);
1069 static int from_char_parse_int(int *dest, char **src, FormatNode *node,
1070  bool *have_error);
1071 static int seq_search(char *name, const char *const *array, int type, int max, int *len);
1072 static int from_char_seq_search(int *dest, char **src,
1073  const char *const *array, int type, int max,
1074  FormatNode *node, bool *have_error);
1075 static void do_to_timestamp(text *date_txt, text *fmt, bool std,
1076  struct pg_tm *tm, fsec_t *fsec, int *fprec,
1077  uint32 *flags, bool *have_error);
1078 static char *fill_str(char *str, int c, int max);
1079 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1080 static char *int_to_roman(int number);
1081 static void NUM_prepare_locale(NUMProc *Np);
1082 static char *get_last_relevant_decnum(char *num);
1083 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1084 static void NUM_numpart_to_char(NUMProc *Np, int id);
1085 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1086  char *number, int input_len, int to_char_out_pre_spaces,
1087  int sign, bool is_to_char, Oid collid);
1088 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1089 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1090 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1091 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1092 static NUMCacheEntry *NUM_cache_search(const char *str);
1093 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1094 
1095 
1096 /* ----------
1097  * Fast sequential search, use index for data selection which
1098  * go to seq. cycle (it is very fast for unwanted strings)
1099  * (can't be used binary search in format parsing)
1100  * ----------
1101  */
1102 static const KeyWord *
1103 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1104 {
1105  int poz;
1106 
1107  if (!KeyWord_INDEX_FILTER(*str))
1108  return NULL;
1109 
1110  if ((poz = *(index + (*str - ' '))) > -1)
1111  {
1112  const KeyWord *k = kw + poz;
1113 
1114  do
1115  {
1116  if (strncmp(str, k->name, k->len) == 0)
1117  return k;
1118  k++;
1119  if (!k->name)
1120  return NULL;
1121  } while (*str == *k->name);
1122  }
1123  return NULL;
1124 }
1125 
1126 static const KeySuffix *
1127 suff_search(const char *str, const KeySuffix *suf, int type)
1128 {
1129  const KeySuffix *s;
1130 
1131  for (s = suf; s->name != NULL; s++)
1132  {
1133  if (s->type != type)
1134  continue;
1135 
1136  if (strncmp(str, s->name, s->len) == 0)
1137  return s;
1138  }
1139  return NULL;
1140 }
1141 
1142 static bool
1144 {
1145  /* ASCII printable character, but not letter or digit */
1146  return (*str > 0x20 && *str < 0x7F &&
1147  !(*str >= 'A' && *str <= 'Z') &&
1148  !(*str >= 'a' && *str <= 'z') &&
1149  !(*str >= '0' && *str <= '9'));
1150 }
1151 
1152 /* ----------
1153  * Prepare NUMDesc (number description struct) via FormatNode struct
1154  * ----------
1155  */
1156 static void
1158 {
1159  if (n->type != NODE_TYPE_ACTION)
1160  return;
1161 
1162  if (IS_EEEE(num) && n->key->id != NUM_E)
1163  ereport(ERROR,
1164  (errcode(ERRCODE_SYNTAX_ERROR),
1165  errmsg("\"EEEE\" must be the last pattern used")));
1166 
1167  switch (n->key->id)
1168  {
1169  case NUM_9:
1170  if (IS_BRACKET(num))
1171  ereport(ERROR,
1172  (errcode(ERRCODE_SYNTAX_ERROR),
1173  errmsg("\"9\" must be ahead of \"PR\"")));
1174  if (IS_MULTI(num))
1175  {
1176  ++num->multi;
1177  break;
1178  }
1179  if (IS_DECIMAL(num))
1180  ++num->post;
1181  else
1182  ++num->pre;
1183  break;
1184 
1185  case NUM_0:
1186  if (IS_BRACKET(num))
1187  ereport(ERROR,
1188  (errcode(ERRCODE_SYNTAX_ERROR),
1189  errmsg("\"0\" must be ahead of \"PR\"")));
1190  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1191  {
1192  num->flag |= NUM_F_ZERO;
1193  num->zero_start = num->pre + 1;
1194  }
1195  if (!IS_DECIMAL(num))
1196  ++num->pre;
1197  else
1198  ++num->post;
1199 
1200  num->zero_end = num->pre + num->post;
1201  break;
1202 
1203  case NUM_B:
1204  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1205  num->flag |= NUM_F_BLANK;
1206  break;
1207 
1208  case NUM_D:
1209  num->flag |= NUM_F_LDECIMAL;
1210  num->need_locale = true;
1211  /* FALLTHROUGH */
1212  case NUM_DEC:
1213  if (IS_DECIMAL(num))
1214  ereport(ERROR,
1215  (errcode(ERRCODE_SYNTAX_ERROR),
1216  errmsg("multiple decimal points")));
1217  if (IS_MULTI(num))
1218  ereport(ERROR,
1219  (errcode(ERRCODE_SYNTAX_ERROR),
1220  errmsg("cannot use \"V\" and decimal point together")));
1221  num->flag |= NUM_F_DECIMAL;
1222  break;
1223 
1224  case NUM_FM:
1225  num->flag |= NUM_F_FILLMODE;
1226  break;
1227 
1228  case NUM_S:
1229  if (IS_LSIGN(num))
1230  ereport(ERROR,
1231  (errcode(ERRCODE_SYNTAX_ERROR),
1232  errmsg("cannot use \"S\" twice")));
1233  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1234  ereport(ERROR,
1235  (errcode(ERRCODE_SYNTAX_ERROR),
1236  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1237  if (!IS_DECIMAL(num))
1238  {
1239  num->lsign = NUM_LSIGN_PRE;
1240  num->pre_lsign_num = num->pre;
1241  num->need_locale = true;
1242  num->flag |= NUM_F_LSIGN;
1243  }
1244  else if (num->lsign == NUM_LSIGN_NONE)
1245  {
1246  num->lsign = NUM_LSIGN_POST;
1247  num->need_locale = true;
1248  num->flag |= NUM_F_LSIGN;
1249  }
1250  break;
1251 
1252  case NUM_MI:
1253  if (IS_LSIGN(num))
1254  ereport(ERROR,
1255  (errcode(ERRCODE_SYNTAX_ERROR),
1256  errmsg("cannot use \"S\" and \"MI\" together")));
1257  num->flag |= NUM_F_MINUS;
1258  if (IS_DECIMAL(num))
1259  num->flag |= NUM_F_MINUS_POST;
1260  break;
1261 
1262  case NUM_PL:
1263  if (IS_LSIGN(num))
1264  ereport(ERROR,
1265  (errcode(ERRCODE_SYNTAX_ERROR),
1266  errmsg("cannot use \"S\" and \"PL\" together")));
1267  num->flag |= NUM_F_PLUS;
1268  if (IS_DECIMAL(num))
1269  num->flag |= NUM_F_PLUS_POST;
1270  break;
1271 
1272  case NUM_SG:
1273  if (IS_LSIGN(num))
1274  ereport(ERROR,
1275  (errcode(ERRCODE_SYNTAX_ERROR),
1276  errmsg("cannot use \"S\" and \"SG\" together")));
1277  num->flag |= NUM_F_MINUS;
1278  num->flag |= NUM_F_PLUS;
1279  break;
1280 
1281  case NUM_PR:
1282  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1283  ereport(ERROR,
1284  (errcode(ERRCODE_SYNTAX_ERROR),
1285  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1286  num->flag |= NUM_F_BRACKET;
1287  break;
1288 
1289  case NUM_rn:
1290  case NUM_RN:
1291  num->flag |= NUM_F_ROMAN;
1292  break;
1293 
1294  case NUM_L:
1295  case NUM_G:
1296  num->need_locale = true;
1297  break;
1298 
1299  case NUM_V:
1300  if (IS_DECIMAL(num))
1301  ereport(ERROR,
1302  (errcode(ERRCODE_SYNTAX_ERROR),
1303  errmsg("cannot use \"V\" and decimal point together")));
1304  num->flag |= NUM_F_MULTI;
1305  break;
1306 
1307  case NUM_E:
1308  if (IS_EEEE(num))
1309  ereport(ERROR,
1310  (errcode(ERRCODE_SYNTAX_ERROR),
1311  errmsg("cannot use \"EEEE\" twice")));
1312  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1313  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1314  IS_ROMAN(num) || IS_MULTI(num))
1315  ereport(ERROR,
1316  (errcode(ERRCODE_SYNTAX_ERROR),
1317  errmsg("\"EEEE\" is incompatible with other formats"),
1318  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1319  num->flag |= NUM_F_EEEE;
1320  break;
1321  }
1322 }
1323 
1324 /* ----------
1325  * Format parser, search small keywords and keyword's suffixes, and make
1326  * format-node tree.
1327  *
1328  * for DATE-TIME & NUMBER version
1329  * ----------
1330  */
1331 static void
1332 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1333  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1334 {
1335  FormatNode *n;
1336 
1337 #ifdef DEBUG_TO_FROM_CHAR
1338  elog(DEBUG_elog_output, "to_char/number(): run parser");
1339 #endif
1340 
1341  n = node;
1342 
1343  while (*str)
1344  {
1345  int suffix = 0;
1346  const KeySuffix *s;
1347 
1348  /*
1349  * Prefix
1350  */
1351  if ((flags & DCH_FLAG) &&
1352  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1353  {
1354  suffix |= s->id;
1355  if (s->len)
1356  str += s->len;
1357  }
1358 
1359  /*
1360  * Keyword
1361  */
1362  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1363  {
1364  n->type = NODE_TYPE_ACTION;
1365  n->suffix = suffix;
1366  if (n->key->len)
1367  str += n->key->len;
1368 
1369  /*
1370  * NUM version: Prepare global NUMDesc struct
1371  */
1372  if (flags & NUM_FLAG)
1373  NUMDesc_prepare(Num, n);
1374 
1375  /*
1376  * Postfix
1377  */
1378  if ((flags & DCH_FLAG) && *str &&
1379  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1380  {
1381  n->suffix |= s->id;
1382  if (s->len)
1383  str += s->len;
1384  }
1385 
1386  n++;
1387  }
1388  else if (*str)
1389  {
1390  int chlen;
1391 
1392  if (flags & STD_FLAG)
1393  {
1394  /*
1395  * Standard mode, allow only following separators: "-./,':; "
1396  */
1397  if (strchr("-./,':; ", *str) == NULL)
1398  ereport(ERROR,
1399  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1400  errmsg("invalid datetime format separator: \"%s\"",
1401  pnstrdup(str, pg_mblen(str)))));
1402 
1403  if (*str == ' ')
1404  n->type = NODE_TYPE_SPACE;
1405  else
1407 
1408  n->character[0] = *str;
1409  n->character[1] = '\0';
1410  n->key = NULL;
1411  n->suffix = 0;
1412  n++;
1413  str++;
1414  }
1415  else if (*str == '"')
1416  {
1417  /*
1418  * Process double-quoted literal string, if any
1419  */
1420  str++;
1421  while (*str)
1422  {
1423  if (*str == '"')
1424  {
1425  str++;
1426  break;
1427  }
1428  /* backslash quotes the next character, if any */
1429  if (*str == '\\' && *(str + 1))
1430  str++;
1431  chlen = pg_mblen(str);
1432  n->type = NODE_TYPE_CHAR;
1433  memcpy(n->character, str, chlen);
1434  n->character[chlen] = '\0';
1435  n->key = NULL;
1436  n->suffix = 0;
1437  n++;
1438  str += chlen;
1439  }
1440  }
1441  else
1442  {
1443  /*
1444  * Outside double-quoted strings, backslash is only special if
1445  * it immediately precedes a double quote.
1446  */
1447  if (*str == '\\' && *(str + 1) == '"')
1448  str++;
1449  chlen = pg_mblen(str);
1450 
1451  if ((flags & DCH_FLAG) && is_separator_char(str))
1453  else if (isspace((unsigned char) *str))
1454  n->type = NODE_TYPE_SPACE;
1455  else
1456  n->type = NODE_TYPE_CHAR;
1457 
1458  memcpy(n->character, str, chlen);
1459  n->character[chlen] = '\0';
1460  n->key = NULL;
1461  n->suffix = 0;
1462  n++;
1463  str += chlen;
1464  }
1465  }
1466  }
1467 
1468  n->type = NODE_TYPE_END;
1469  n->suffix = 0;
1470 }
1471 
1472 /* ----------
1473  * DEBUG: Dump the FormatNode Tree (debug)
1474  * ----------
1475  */
1476 #ifdef DEBUG_TO_FROM_CHAR
1477 
1478 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1479 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1480 
1481 static void
1482 dump_node(FormatNode *node, int max)
1483 {
1484  FormatNode *n;
1485  int a;
1486 
1487  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1488 
1489  for (a = 0, n = node; a <= max; n++, a++)
1490  {
1491  if (n->type == NODE_TYPE_ACTION)
1492  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1493  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1494  else if (n->type == NODE_TYPE_CHAR)
1495  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1496  a, n->character);
1497  else if (n->type == NODE_TYPE_END)
1498  {
1499  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1500  return;
1501  }
1502  else
1503  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1504  }
1505 }
1506 #endif /* DEBUG */
1507 
1508 /*****************************************************************************
1509  * Private utils
1510  *****************************************************************************/
1511 
1512 /* ----------
1513  * Return ST/ND/RD/TH for simple (1..9) numbers
1514  * type --> 0 upper, 1 lower
1515  * ----------
1516  */
1517 static const char *
1518 get_th(char *num, int type)
1519 {
1520  int len = strlen(num),
1521  last,
1522  seclast;
1523 
1524  last = *(num + (len - 1));
1525  if (!isdigit((unsigned char) last))
1526  ereport(ERROR,
1527  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1528  errmsg("\"%s\" is not a number", num)));
1529 
1530  /*
1531  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1532  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1533  */
1534  if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1535  last = 0;
1536 
1537  switch (last)
1538  {
1539  case '1':
1540  if (type == TH_UPPER)
1541  return numTH[0];
1542  return numth[0];
1543  case '2':
1544  if (type == TH_UPPER)
1545  return numTH[1];
1546  return numth[1];
1547  case '3':
1548  if (type == TH_UPPER)
1549  return numTH[2];
1550  return numth[2];
1551  default:
1552  if (type == TH_UPPER)
1553  return numTH[3];
1554  return numth[3];
1555  }
1556 }
1557 
1558 /* ----------
1559  * Convert string-number to ordinal string-number
1560  * type --> 0 upper, 1 lower
1561  * ----------
1562  */
1563 static char *
1564 str_numth(char *dest, char *num, int type)
1565 {
1566  if (dest != num)
1567  strcpy(dest, num);
1568  strcat(dest, get_th(num, type));
1569  return dest;
1570 }
1571 
1572 /*****************************************************************************
1573  * upper/lower/initcap functions
1574  *****************************************************************************/
1575 
1576 #ifdef USE_ICU
1577 
1578 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1579  const UChar *src, int32_t srcLength,
1580  const char *locale,
1581  UErrorCode *pErrorCode);
1582 
1583 static int32_t
1584 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1585  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1586 {
1587  UErrorCode status;
1588  int32_t len_dest;
1589 
1590  len_dest = len_source; /* try first with same length */
1591  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1592  status = U_ZERO_ERROR;
1593  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1594  mylocale->info.icu.locale, &status);
1595  if (status == U_BUFFER_OVERFLOW_ERROR)
1596  {
1597  /* try again with adjusted length */
1598  pfree(*buff_dest);
1599  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1600  status = U_ZERO_ERROR;
1601  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1602  mylocale->info.icu.locale, &status);
1603  }
1604  if (U_FAILURE(status))
1605  ereport(ERROR,
1606  (errmsg("case conversion failed: %s", u_errorName(status))));
1607  return len_dest;
1608 }
1609 
1610 static int32_t
1611 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1612  const UChar *src, int32_t srcLength,
1613  const char *locale,
1614  UErrorCode *pErrorCode)
1615 {
1616  return u_strToTitle(dest, destCapacity, src, srcLength,
1617  NULL, locale, pErrorCode);
1618 }
1619 
1620 #endif /* USE_ICU */
1621 
1622 /*
1623  * If the system provides the needed functions for wide-character manipulation
1624  * (which are all standardized by C99), then we implement upper/lower/initcap
1625  * using wide-character functions, if necessary. Otherwise we use the
1626  * traditional <ctype.h> functions, which of course will not work as desired
1627  * in multibyte character sets. Note that in either case we are effectively
1628  * assuming that the database character encoding matches the encoding implied
1629  * by LC_CTYPE.
1630  *
1631  * If the system provides locale_t and associated functions (which are
1632  * standardized by Open Group's XBD), we can support collations that are
1633  * neither default nor C. The code is written to handle both combinations
1634  * of have-wide-characters and have-locale_t, though it's rather unlikely
1635  * a platform would have the latter without the former.
1636  */
1637 
1638 /*
1639  * collation-aware, wide-character-aware lower function
1640  *
1641  * We pass the number of bytes so we can pass varlena and char*
1642  * to this function. The result is a palloc'd, null-terminated string.
1643  */
1644 char *
1645 str_tolower(const char *buff, size_t nbytes, Oid collid)
1646 {
1647  char *result;
1648 
1649  if (!buff)
1650  return NULL;
1651 
1652  /* C/POSIX collations use this path regardless of database encoding */
1653  if (lc_ctype_is_c(collid))
1654  {
1655  result = asc_tolower(buff, nbytes);
1656  }
1657  else
1658  {
1659  pg_locale_t mylocale = 0;
1660 
1661  if (collid != DEFAULT_COLLATION_OID)
1662  {
1663  if (!OidIsValid(collid))
1664  {
1665  /*
1666  * This typically means that the parser could not resolve a
1667  * conflict of implicit collations, so report it that way.
1668  */
1669  ereport(ERROR,
1670  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1671  errmsg("could not determine which collation to use for %s function",
1672  "lower()"),
1673  errhint("Use the COLLATE clause to set the collation explicitly.")));
1674  }
1675  mylocale = pg_newlocale_from_collation(collid);
1676  }
1677 
1678 #ifdef USE_ICU
1679  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1680  {
1681  int32_t len_uchar;
1682  int32_t len_conv;
1683  UChar *buff_uchar;
1684  UChar *buff_conv;
1685 
1686  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1687  len_conv = icu_convert_case(u_strToLower, mylocale,
1688  &buff_conv, buff_uchar, len_uchar);
1689  icu_from_uchar(&result, buff_conv, len_conv);
1690  pfree(buff_uchar);
1691  pfree(buff_conv);
1692  }
1693  else
1694 #endif
1695  {
1697  {
1698  wchar_t *workspace;
1699  size_t curr_char;
1700  size_t result_size;
1701 
1702  /* Overflow paranoia */
1703  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1704  ereport(ERROR,
1705  (errcode(ERRCODE_OUT_OF_MEMORY),
1706  errmsg("out of memory")));
1707 
1708  /* Output workspace cannot have more codes than input bytes */
1709  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1710 
1711  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1712 
1713  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1714  {
1715 #ifdef HAVE_LOCALE_T
1716  if (mylocale)
1717  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1718  else
1719 #endif
1720  workspace[curr_char] = towlower(workspace[curr_char]);
1721  }
1722 
1723  /*
1724  * Make result large enough; case change might change number
1725  * of bytes
1726  */
1727  result_size = curr_char * pg_database_encoding_max_length() + 1;
1728  result = palloc(result_size);
1729 
1730  wchar2char(result, workspace, result_size, mylocale);
1731  pfree(workspace);
1732  }
1733  else
1734  {
1735  char *p;
1736 
1737  result = pnstrdup(buff, nbytes);
1738 
1739  /*
1740  * Note: we assume that tolower_l() will not be so broken as
1741  * to need an isupper_l() guard test. When using the default
1742  * collation, we apply the traditional Postgres behavior that
1743  * forces ASCII-style treatment of I/i, but in non-default
1744  * collations you get exactly what the collation says.
1745  */
1746  for (p = result; *p; p++)
1747  {
1748 #ifdef HAVE_LOCALE_T
1749  if (mylocale)
1750  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1751  else
1752 #endif
1753  *p = pg_tolower((unsigned char) *p);
1754  }
1755  }
1756  }
1757  }
1758 
1759  return result;
1760 }
1761 
1762 /*
1763  * collation-aware, wide-character-aware upper function
1764  *
1765  * We pass the number of bytes so we can pass varlena and char*
1766  * to this function. The result is a palloc'd, null-terminated string.
1767  */
1768 char *
1769 str_toupper(const char *buff, size_t nbytes, Oid collid)
1770 {
1771  char *result;
1772 
1773  if (!buff)
1774  return NULL;
1775 
1776  /* C/POSIX collations use this path regardless of database encoding */
1777  if (lc_ctype_is_c(collid))
1778  {
1779  result = asc_toupper(buff, nbytes);
1780  }
1781  else
1782  {
1783  pg_locale_t mylocale = 0;
1784 
1785  if (collid != DEFAULT_COLLATION_OID)
1786  {
1787  if (!OidIsValid(collid))
1788  {
1789  /*
1790  * This typically means that the parser could not resolve a
1791  * conflict of implicit collations, so report it that way.
1792  */
1793  ereport(ERROR,
1794  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1795  errmsg("could not determine which collation to use for %s function",
1796  "upper()"),
1797  errhint("Use the COLLATE clause to set the collation explicitly.")));
1798  }
1799  mylocale = pg_newlocale_from_collation(collid);
1800  }
1801 
1802 #ifdef USE_ICU
1803  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1804  {
1805  int32_t len_uchar,
1806  len_conv;
1807  UChar *buff_uchar;
1808  UChar *buff_conv;
1809 
1810  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1811  len_conv = icu_convert_case(u_strToUpper, mylocale,
1812  &buff_conv, buff_uchar, len_uchar);
1813  icu_from_uchar(&result, buff_conv, len_conv);
1814  pfree(buff_uchar);
1815  pfree(buff_conv);
1816  }
1817  else
1818 #endif
1819  {
1821  {
1822  wchar_t *workspace;
1823  size_t curr_char;
1824  size_t result_size;
1825 
1826  /* Overflow paranoia */
1827  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1828  ereport(ERROR,
1829  (errcode(ERRCODE_OUT_OF_MEMORY),
1830  errmsg("out of memory")));
1831 
1832  /* Output workspace cannot have more codes than input bytes */
1833  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1834 
1835  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1836 
1837  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1838  {
1839 #ifdef HAVE_LOCALE_T
1840  if (mylocale)
1841  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1842  else
1843 #endif
1844  workspace[curr_char] = towupper(workspace[curr_char]);
1845  }
1846 
1847  /*
1848  * Make result large enough; case change might change number
1849  * of bytes
1850  */
1851  result_size = curr_char * pg_database_encoding_max_length() + 1;
1852  result = palloc(result_size);
1853 
1854  wchar2char(result, workspace, result_size, mylocale);
1855  pfree(workspace);
1856  }
1857  else
1858  {
1859  char *p;
1860 
1861  result = pnstrdup(buff, nbytes);
1862 
1863  /*
1864  * Note: we assume that toupper_l() will not be so broken as
1865  * to need an islower_l() guard test. When using the default
1866  * collation, we apply the traditional Postgres behavior that
1867  * forces ASCII-style treatment of I/i, but in non-default
1868  * collations you get exactly what the collation says.
1869  */
1870  for (p = result; *p; p++)
1871  {
1872 #ifdef HAVE_LOCALE_T
1873  if (mylocale)
1874  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1875  else
1876 #endif
1877  *p = pg_toupper((unsigned char) *p);
1878  }
1879  }
1880  }
1881  }
1882 
1883  return result;
1884 }
1885 
1886 /*
1887  * collation-aware, wide-character-aware initcap function
1888  *
1889  * We pass the number of bytes so we can pass varlena and char*
1890  * to this function. The result is a palloc'd, null-terminated string.
1891  */
1892 char *
1893 str_initcap(const char *buff, size_t nbytes, Oid collid)
1894 {
1895  char *result;
1896  int wasalnum = false;
1897 
1898  if (!buff)
1899  return NULL;
1900 
1901  /* C/POSIX collations use this path regardless of database encoding */
1902  if (lc_ctype_is_c(collid))
1903  {
1904  result = asc_initcap(buff, nbytes);
1905  }
1906  else
1907  {
1908  pg_locale_t mylocale = 0;
1909 
1910  if (collid != DEFAULT_COLLATION_OID)
1911  {
1912  if (!OidIsValid(collid))
1913  {
1914  /*
1915  * This typically means that the parser could not resolve a
1916  * conflict of implicit collations, so report it that way.
1917  */
1918  ereport(ERROR,
1919  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1920  errmsg("could not determine which collation to use for %s function",
1921  "initcap()"),
1922  errhint("Use the COLLATE clause to set the collation explicitly.")));
1923  }
1924  mylocale = pg_newlocale_from_collation(collid);
1925  }
1926 
1927 #ifdef USE_ICU
1928  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1929  {
1930  int32_t len_uchar,
1931  len_conv;
1932  UChar *buff_uchar;
1933  UChar *buff_conv;
1934 
1935  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1936  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1937  &buff_conv, buff_uchar, len_uchar);
1938  icu_from_uchar(&result, buff_conv, len_conv);
1939  pfree(buff_uchar);
1940  pfree(buff_conv);
1941  }
1942  else
1943 #endif
1944  {
1946  {
1947  wchar_t *workspace;
1948  size_t curr_char;
1949  size_t result_size;
1950 
1951  /* Overflow paranoia */
1952  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1953  ereport(ERROR,
1954  (errcode(ERRCODE_OUT_OF_MEMORY),
1955  errmsg("out of memory")));
1956 
1957  /* Output workspace cannot have more codes than input bytes */
1958  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1959 
1960  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1961 
1962  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1963  {
1964 #ifdef HAVE_LOCALE_T
1965  if (mylocale)
1966  {
1967  if (wasalnum)
1968  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1969  else
1970  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1971  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1972  }
1973  else
1974 #endif
1975  {
1976  if (wasalnum)
1977  workspace[curr_char] = towlower(workspace[curr_char]);
1978  else
1979  workspace[curr_char] = towupper(workspace[curr_char]);
1980  wasalnum = iswalnum(workspace[curr_char]);
1981  }
1982  }
1983 
1984  /*
1985  * Make result large enough; case change might change number
1986  * of bytes
1987  */
1988  result_size = curr_char * pg_database_encoding_max_length() + 1;
1989  result = palloc(result_size);
1990 
1991  wchar2char(result, workspace, result_size, mylocale);
1992  pfree(workspace);
1993  }
1994  else
1995  {
1996  char *p;
1997 
1998  result = pnstrdup(buff, nbytes);
1999 
2000  /*
2001  * Note: we assume that toupper_l()/tolower_l() will not be so
2002  * broken as to need guard tests. When using the default
2003  * collation, we apply the traditional Postgres behavior that
2004  * forces ASCII-style treatment of I/i, but in non-default
2005  * collations you get exactly what the collation says.
2006  */
2007  for (p = result; *p; p++)
2008  {
2009 #ifdef HAVE_LOCALE_T
2010  if (mylocale)
2011  {
2012  if (wasalnum)
2013  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2014  else
2015  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2016  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2017  }
2018  else
2019 #endif
2020  {
2021  if (wasalnum)
2022  *p = pg_tolower((unsigned char) *p);
2023  else
2024  *p = pg_toupper((unsigned char) *p);
2025  wasalnum = isalnum((unsigned char) *p);
2026  }
2027  }
2028  }
2029  }
2030  }
2031 
2032  return result;
2033 }
2034 
2035 /*
2036  * ASCII-only lower function
2037  *
2038  * We pass the number of bytes so we can pass varlena and char*
2039  * to this function. The result is a palloc'd, null-terminated string.
2040  */
2041 char *
2042 asc_tolower(const char *buff, size_t nbytes)
2043 {
2044  char *result;
2045  char *p;
2046 
2047  if (!buff)
2048  return NULL;
2049 
2050  result = pnstrdup(buff, nbytes);
2051 
2052  for (p = result; *p; p++)
2053  *p = pg_ascii_tolower((unsigned char) *p);
2054 
2055  return result;
2056 }
2057 
2058 /*
2059  * ASCII-only upper function
2060  *
2061  * We pass the number of bytes so we can pass varlena and char*
2062  * to this function. The result is a palloc'd, null-terminated string.
2063  */
2064 char *
2065 asc_toupper(const char *buff, size_t nbytes)
2066 {
2067  char *result;
2068  char *p;
2069 
2070  if (!buff)
2071  return NULL;
2072 
2073  result = pnstrdup(buff, nbytes);
2074 
2075  for (p = result; *p; p++)
2076  *p = pg_ascii_toupper((unsigned char) *p);
2077 
2078  return result;
2079 }
2080 
2081 /*
2082  * ASCII-only initcap function
2083  *
2084  * We pass the number of bytes so we can pass varlena and char*
2085  * to this function. The result is a palloc'd, null-terminated string.
2086  */
2087 char *
2088 asc_initcap(const char *buff, size_t nbytes)
2089 {
2090  char *result;
2091  char *p;
2092  int wasalnum = false;
2093 
2094  if (!buff)
2095  return NULL;
2096 
2097  result = pnstrdup(buff, nbytes);
2098 
2099  for (p = result; *p; p++)
2100  {
2101  char c;
2102 
2103  if (wasalnum)
2104  *p = c = pg_ascii_tolower((unsigned char) *p);
2105  else
2106  *p = c = pg_ascii_toupper((unsigned char) *p);
2107  /* we don't trust isalnum() here */
2108  wasalnum = ((c >= 'A' && c <= 'Z') ||
2109  (c >= 'a' && c <= 'z') ||
2110  (c >= '0' && c <= '9'));
2111  }
2112 
2113  return result;
2114 }
2115 
2116 /* convenience routines for when the input is null-terminated */
2117 
2118 static char *
2119 str_tolower_z(const char *buff, Oid collid)
2120 {
2121  return str_tolower(buff, strlen(buff), collid);
2122 }
2123 
2124 static char *
2125 str_toupper_z(const char *buff, Oid collid)
2126 {
2127  return str_toupper(buff, strlen(buff), collid);
2128 }
2129 
2130 static char *
2131 str_initcap_z(const char *buff, Oid collid)
2132 {
2133  return str_initcap(buff, strlen(buff), collid);
2134 }
2135 
2136 static char *
2137 asc_tolower_z(const char *buff)
2138 {
2139  return asc_tolower(buff, strlen(buff));
2140 }
2141 
2142 static char *
2143 asc_toupper_z(const char *buff)
2144 {
2145  return asc_toupper(buff, strlen(buff));
2146 }
2147 
2148 /* asc_initcap_z is not currently needed */
2149 
2150 
2151 /* ----------
2152  * Skip TM / th in FROM_CHAR
2153  *
2154  * If S_THth is on, skip two chars, assuming there are two available
2155  * ----------
2156  */
2157 #define SKIP_THth(ptr, _suf) \
2158  do { \
2159  if (S_THth(_suf)) \
2160  { \
2161  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2162  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2163  } \
2164  } while (0)
2165 
2166 
2167 #ifdef DEBUG_TO_FROM_CHAR
2168 /* -----------
2169  * DEBUG: Call for debug and for index checking; (Show ASCII char
2170  * and defined keyword for each used position
2171  * ----------
2172  */
2173 static void
2174 dump_index(const KeyWord *k, const int *index)
2175 {
2176  int i,
2177  count = 0,
2178  free_i = 0;
2179 
2180  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2181 
2182  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2183  {
2184  if (index[i] != -1)
2185  {
2186  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2187  count++;
2188  }
2189  else
2190  {
2191  free_i++;
2192  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2193  }
2194  }
2195  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2196  count, free_i);
2197 }
2198 #endif /* DEBUG */
2199 
2200 /* ----------
2201  * Return true if next format picture is not digit value
2202  * ----------
2203  */
2204 static bool
2206 {
2207  if (n->type == NODE_TYPE_END)
2208  return false;
2209 
2210  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2211  return true;
2212 
2213  /*
2214  * Next node
2215  */
2216  n++;
2217 
2218  /* end of format string is treated like a non-digit separator */
2219  if (n->type == NODE_TYPE_END)
2220  return true;
2221 
2222  if (n->type == NODE_TYPE_ACTION)
2223  {
2224  if (n->key->is_digit)
2225  return false;
2226 
2227  return true;
2228  }
2229  else if (n->character[1] == '\0' &&
2230  isdigit((unsigned char) n->character[0]))
2231  return false;
2232 
2233  return true; /* some non-digit input (separator) */
2234 }
2235 
2236 
2237 static int
2239 {
2240  /*
2241  * Adjust all dates toward 2020; this is effectively what happens when we
2242  * assume '70' is 1970 and '69' is 2069.
2243  */
2244  /* Force 0-69 into the 2000's */
2245  if (year < 70)
2246  return year + 2000;
2247  /* Force 70-99 into the 1900's */
2248  else if (year < 100)
2249  return year + 1900;
2250  /* Force 100-519 into the 2000's */
2251  else if (year < 520)
2252  return year + 2000;
2253  /* Force 520-999 into the 1000's */
2254  else if (year < 1000)
2255  return year + 1000;
2256  else
2257  return year;
2258 }
2259 
2260 
2261 static int
2263 {
2264  int len = 0;
2265 
2266  while (*str && isspace((unsigned char) *str))
2267  {
2268  str++;
2269  len++;
2270  }
2271  return len;
2272 }
2273 
2274 /*
2275  * Set the date mode of a from-char conversion.
2276  *
2277  * Puke if the date mode has already been set, and the caller attempts to set
2278  * it to a conflicting mode.
2279  *
2280  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2281  */
2282 static void
2283 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
2284 {
2285  if (mode != FROM_CHAR_DATE_NONE)
2286  {
2287  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2288  tmfc->mode = mode;
2289  else if (tmfc->mode != mode)
2291  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2292  errmsg("invalid combination of date conventions"),
2293  errhint("Do not mix Gregorian and ISO week date "
2294  "conventions in a formatting template."))));
2295  }
2296 
2297 on_error:
2298  return;
2299 }
2300 
2301 /*
2302  * Set the integer pointed to by 'dest' to the given value.
2303  *
2304  * Puke if the destination integer has previously been set to some other
2305  * non-zero value.
2306  *
2307  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
2308  */
2309 static void
2310 from_char_set_int(int *dest, const int value, const FormatNode *node,
2311  bool *have_error)
2312 {
2313  if (*dest != 0 && *dest != value)
2315  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2316  errmsg("conflicting values for \"%s\" field in "
2317  "formatting string",
2318  node->key->name),
2319  errdetail("This value contradicts a previous setting "
2320  "for the same field type."))));
2321  *dest = value;
2322 
2323 on_error:
2324  return;
2325 }
2326 
2327 /*
2328  * Read a single integer from the source string, into the int pointed to by
2329  * 'dest'. If 'dest' is NULL, the result is discarded.
2330  *
2331  * In fixed-width mode (the node does not have the FM suffix), consume at most
2332  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2333  *
2334  * We use strtol() to recover the integer value from the source string, in
2335  * accordance with the given FormatNode.
2336  *
2337  * If the conversion completes successfully, src will have been advanced to
2338  * point at the character immediately following the last character used in the
2339  * conversion.
2340  *
2341  * Return the number of characters consumed.
2342  *
2343  * Note that from_char_parse_int() provides a more convenient wrapper where
2344  * the length of the field is the same as the length of the format keyword (as
2345  * with DD and MI).
2346  *
2347  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
2348  * and -1 is returned.
2349  */
2350 static int
2351 from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node,
2352  bool *have_error)
2353 {
2354  long result;
2355  char copy[DCH_MAX_ITEM_SIZ + 1];
2356  char *init = *src;
2357  int used;
2358 
2359  /*
2360  * Skip any whitespace before parsing the integer.
2361  */
2362  *src += strspace_len(*src);
2363 
2364  Assert(len <= DCH_MAX_ITEM_SIZ);
2365  used = (int) strlcpy(copy, *src, len + 1);
2366 
2367  if (S_FM(node->suffix) || is_next_separator(node))
2368  {
2369  /*
2370  * This node is in Fill Mode, or the next node is known to be a
2371  * non-digit value, so we just slurp as many characters as we can get.
2372  */
2373  errno = 0;
2374  result = strtol(init, src, 10);
2375  }
2376  else
2377  {
2378  /*
2379  * We need to pull exactly the number of characters given in 'len' out
2380  * of the string, and convert those.
2381  */
2382  char *last;
2383 
2384  if (used < len)
2386  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2387  errmsg("source string too short for \"%s\" "
2388  "formatting field",
2389  node->key->name),
2390  errdetail("Field requires %d characters, "
2391  "but only %d remain.",
2392  len, used),
2393  errhint("If your source string is not fixed-width, "
2394  "try using the \"FM\" modifier."))));
2395 
2396  errno = 0;
2397  result = strtol(copy, &last, 10);
2398  used = last - copy;
2399 
2400  if (used > 0 && used < len)
2402  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2403  errmsg("invalid value \"%s\" for \"%s\"",
2404  copy, node->key->name),
2405  errdetail("Field requires %d characters, "
2406  "but only %d could be parsed.",
2407  len, used),
2408  errhint("If your source string is not fixed-width, "
2409  "try using the \"FM\" modifier."))));
2410 
2411  *src += used;
2412  }
2413 
2414  if (*src == init)
2416  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2417  errmsg("invalid value \"%s\" for \"%s\"",
2418  copy, node->key->name),
2419  errdetail("Value must be an integer."))));
2420 
2421  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2423  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2424  errmsg("value for \"%s\" in source string is out of range",
2425  node->key->name),
2426  errdetail("Value must be in the range %d to %d.",
2427  INT_MIN, INT_MAX))));
2428 
2429  if (dest != NULL)
2430  {
2431  from_char_set_int(dest, (int) result, node, have_error);
2432  CHECK_ERROR;
2433  }
2434 
2435  return *src - init;
2436 
2437 on_error:
2438  return -1;
2439 }
2440 
2441 /*
2442  * Call from_char_parse_int_len(), using the length of the format keyword as
2443  * the expected length of the field.
2444  *
2445  * Don't call this function if the field differs in length from the format
2446  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2447  * In such cases, call from_char_parse_int_len() instead to specify the
2448  * required length explicitly.
2449  */
2450 static int
2451 from_char_parse_int(int *dest, char **src, FormatNode *node, bool *have_error)
2452 {
2453  return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
2454 }
2455 
2456 /* ----------
2457  * Sequential search with to upper/lower conversion
2458  * ----------
2459  */
2460 static int
2461 seq_search(char *name, const char *const *array, int type, int max, int *len)
2462 {
2463  const char *p;
2464  const char *const *a;
2465  char *n;
2466  int last,
2467  i;
2468 
2469  *len = 0;
2470 
2471  if (!*name)
2472  return -1;
2473 
2474  /* set first char */
2475  if (type == ONE_UPPER || type == ALL_UPPER)
2476  *name = pg_toupper((unsigned char) *name);
2477  else if (type == ALL_LOWER)
2478  *name = pg_tolower((unsigned char) *name);
2479 
2480  for (last = 0, a = array; *a != NULL; a++)
2481  {
2482  /* compare first chars */
2483  if (*name != **a)
2484  continue;
2485 
2486  for (i = 1, p = *a + 1, n = name + 1;; n++, p++, i++)
2487  {
2488  /* search fragment (max) only */
2489  if (max && i == max)
2490  {
2491  *len = i;
2492  return a - array;
2493  }
2494  /* full size */
2495  if (*p == '\0')
2496  {
2497  *len = i;
2498  return a - array;
2499  }
2500  /* Not found in array 'a' */
2501  if (*n == '\0')
2502  break;
2503 
2504  /*
2505  * Convert (but convert new chars only)
2506  */
2507  if (i > last)
2508  {
2509  if (type == ONE_UPPER || type == ALL_LOWER)
2510  *n = pg_tolower((unsigned char) *n);
2511  else if (type == ALL_UPPER)
2512  *n = pg_toupper((unsigned char) *n);
2513  last = i;
2514  }
2515 
2516 #ifdef DEBUG_TO_FROM_CHAR
2517  elog(DEBUG_elog_output, "N: %c, P: %c, A: %s (%s)",
2518  *n, *p, *a, name);
2519 #endif
2520  if (*n != *p)
2521  break;
2522  }
2523  }
2524 
2525  return -1;
2526 }
2527 
2528 /*
2529  * Perform a sequential search in 'array' for text matching the first 'max'
2530  * characters of the source string.
2531  *
2532  * If a match is found, copy the array index of the match into the integer
2533  * pointed to by 'dest', advance 'src' to the end of the part of the string
2534  * which matched, and return the number of characters consumed.
2535  *
2536  * If the string doesn't match, throw an error if 'have_error' is NULL,
2537  * otherwise set '*have_error' and return -1.
2538  */
2539 static int
2540 from_char_seq_search(int *dest, char **src, const char *const *array, int type,
2541  int max, FormatNode *node, bool *have_error)
2542 {
2543  int len;
2544 
2545  *dest = seq_search(*src, array, type, max, &len);
2546  if (len <= 0)
2547  {
2548  char copy[DCH_MAX_ITEM_SIZ + 1];
2549 
2550  Assert(max <= DCH_MAX_ITEM_SIZ);
2551  strlcpy(copy, *src, max + 1);
2552 
2554  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2555  errmsg("invalid value \"%s\" for \"%s\"",
2556  copy, node->key->name),
2557  errdetail("The given value did not match any of "
2558  "the allowed values for this field."))));
2559  }
2560  *src += len;
2561  return len;
2562 
2563 on_error:
2564  return -1;
2565 }
2566 
2567 /* ----------
2568  * Process a TmToChar struct as denoted by a list of FormatNodes.
2569  * The formatted data is written to the string pointed to by 'out'.
2570  * ----------
2571  */
2572 static void
2573 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2574 {
2575  FormatNode *n;
2576  char *s;
2577  struct pg_tm *tm = &in->tm;
2578  int i;
2579 
2580  /* cache localized days and months */
2582 
2583  s = out;
2584  for (n = node; n->type != NODE_TYPE_END; n++)
2585  {
2586  if (n->type != NODE_TYPE_ACTION)
2587  {
2588  strcpy(s, n->character);
2589  s += strlen(s);
2590  continue;
2591  }
2592 
2593  switch (n->key->id)
2594  {
2595  case DCH_A_M:
2596  case DCH_P_M:
2597  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2598  ? P_M_STR : A_M_STR);
2599  s += strlen(s);
2600  break;
2601  case DCH_AM:
2602  case DCH_PM:
2603  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2604  ? PM_STR : AM_STR);
2605  s += strlen(s);
2606  break;
2607  case DCH_a_m:
2608  case DCH_p_m:
2609  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2610  ? p_m_STR : a_m_STR);
2611  s += strlen(s);
2612  break;
2613  case DCH_am:
2614  case DCH_pm:
2615  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2616  ? pm_STR : am_STR);
2617  s += strlen(s);
2618  break;
2619  case DCH_HH:
2620  case DCH_HH12:
2621 
2622  /*
2623  * display time as shown on a 12-hour clock, even for
2624  * intervals
2625  */
2626  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2627  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2628  tm->tm_hour % (HOURS_PER_DAY / 2));
2629  if (S_THth(n->suffix))
2630  str_numth(s, s, S_TH_TYPE(n->suffix));
2631  s += strlen(s);
2632  break;
2633  case DCH_HH24:
2634  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2635  tm->tm_hour);
2636  if (S_THth(n->suffix))
2637  str_numth(s, s, S_TH_TYPE(n->suffix));
2638  s += strlen(s);
2639  break;
2640  case DCH_MI:
2641  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2642  tm->tm_min);
2643  if (S_THth(n->suffix))
2644  str_numth(s, s, S_TH_TYPE(n->suffix));
2645  s += strlen(s);
2646  break;
2647  case DCH_SS:
2648  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2649  tm->tm_sec);
2650  if (S_THth(n->suffix))
2651  str_numth(s, s, S_TH_TYPE(n->suffix));
2652  s += strlen(s);
2653  break;
2654 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2655  sprintf(s, frac_fmt, (int) (frac_val)); \
2656  if (S_THth(n->suffix)) \
2657  str_numth(s, s, S_TH_TYPE(n->suffix)); \
2658  s += strlen(s);
2659  case DCH_FF1: /* tenth of second */
2660  DCH_to_char_fsec("%01d", in->fsec / 100000);
2661  break;
2662  case DCH_FF2: /* hundredth of second */
2663  DCH_to_char_fsec("%02d", in->fsec / 10000);
2664  break;
2665  case DCH_FF3:
2666  case DCH_MS: /* millisecond */
2667  DCH_to_char_fsec("%03d", in->fsec / 1000);
2668  break;
2669  case DCH_FF4: /* tenth of a millisecond */
2670  DCH_to_char_fsec("%04d", in->fsec / 100);
2671  break;
2672  case DCH_FF5: /* hundredth of a millisecond */
2673  DCH_to_char_fsec("%05d", in->fsec / 10);
2674  break;
2675  case DCH_FF6:
2676  case DCH_US: /* microsecond */
2677  DCH_to_char_fsec("%06d", in->fsec);
2678  break;
2679 #undef DCH_to_char_fsec
2680  case DCH_SSSS:
2681  sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2682  tm->tm_min * SECS_PER_MINUTE +
2683  tm->tm_sec);
2684  if (S_THth(n->suffix))
2685  str_numth(s, s, S_TH_TYPE(n->suffix));
2686  s += strlen(s);
2687  break;
2688  case DCH_tz:
2690  if (tmtcTzn(in))
2691  {
2692  /* We assume here that timezone names aren't localized */
2693  char *p = asc_tolower_z(tmtcTzn(in));
2694 
2695  strcpy(s, p);
2696  pfree(p);
2697  s += strlen(s);
2698  }
2699  break;
2700  case DCH_TZ:
2702  if (tmtcTzn(in))
2703  {
2704  strcpy(s, tmtcTzn(in));
2705  s += strlen(s);
2706  }
2707  break;
2708  case DCH_TZH:
2710  sprintf(s, "%c%02d",
2711  (tm->tm_gmtoff >= 0) ? '+' : '-',
2712  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2713  s += strlen(s);
2714  break;
2715  case DCH_TZM:
2717  sprintf(s, "%02d",
2718  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2719  s += strlen(s);
2720  break;
2721  case DCH_OF:
2723  sprintf(s, "%c%0*d",
2724  (tm->tm_gmtoff >= 0) ? '+' : '-',
2725  S_FM(n->suffix) ? 0 : 2,
2726  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2727  s += strlen(s);
2728  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2729  {
2730  sprintf(s, ":%02d",
2731  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2732  s += strlen(s);
2733  }
2734  break;
2735  case DCH_A_D:
2736  case DCH_B_C:
2738  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2739  s += strlen(s);
2740  break;
2741  case DCH_AD:
2742  case DCH_BC:
2744  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2745  s += strlen(s);
2746  break;
2747  case DCH_a_d:
2748  case DCH_b_c:
2750  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2751  s += strlen(s);
2752  break;
2753  case DCH_ad:
2754  case DCH_bc:
2756  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2757  s += strlen(s);
2758  break;
2759  case DCH_MONTH:
2761  if (!tm->tm_mon)
2762  break;
2763  if (S_TM(n->suffix))
2764  {
2765  char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2766 
2767  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2768  strcpy(s, str);
2769  else
2770  ereport(ERROR,
2771  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2772  errmsg("localized string format value too long")));
2773  }
2774  else
2775  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2776  asc_toupper_z(months_full[tm->tm_mon - 1]));
2777  s += strlen(s);
2778  break;
2779  case DCH_Month:
2781  if (!tm->tm_mon)
2782  break;
2783  if (S_TM(n->suffix))
2784  {
2785  char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2786 
2787  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2788  strcpy(s, str);
2789  else
2790  ereport(ERROR,
2791  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2792  errmsg("localized string format value too long")));
2793  }
2794  else
2795  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2796  months_full[tm->tm_mon - 1]);
2797  s += strlen(s);
2798  break;
2799  case DCH_month:
2801  if (!tm->tm_mon)
2802  break;
2803  if (S_TM(n->suffix))
2804  {
2805  char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2806 
2807  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2808  strcpy(s, str);
2809  else
2810  ereport(ERROR,
2811  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2812  errmsg("localized string format value too long")));
2813  }
2814  else
2815  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2816  asc_tolower_z(months_full[tm->tm_mon - 1]));
2817  s += strlen(s);
2818  break;
2819  case DCH_MON:
2821  if (!tm->tm_mon)
2822  break;
2823  if (S_TM(n->suffix))
2824  {
2825  char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2826 
2827  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2828  strcpy(s, str);
2829  else
2830  ereport(ERROR,
2831  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2832  errmsg("localized string format value too long")));
2833  }
2834  else
2835  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2836  s += strlen(s);
2837  break;
2838  case DCH_Mon:
2840  if (!tm->tm_mon)
2841  break;
2842  if (S_TM(n->suffix))
2843  {
2844  char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2845 
2846  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2847  strcpy(s, str);
2848  else
2849  ereport(ERROR,
2850  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2851  errmsg("localized string format value too long")));
2852  }
2853  else
2854  strcpy(s, months[tm->tm_mon - 1]);
2855  s += strlen(s);
2856  break;
2857  case DCH_mon:
2859  if (!tm->tm_mon)
2860  break;
2861  if (S_TM(n->suffix))
2862  {
2863  char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2864 
2865  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2866  strcpy(s, str);
2867  else
2868  ereport(ERROR,
2869  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2870  errmsg("localized string format value too long")));
2871  }
2872  else
2873  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2874  s += strlen(s);
2875  break;
2876  case DCH_MM:
2877  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2878  tm->tm_mon);
2879  if (S_THth(n->suffix))
2880  str_numth(s, s, S_TH_TYPE(n->suffix));
2881  s += strlen(s);
2882  break;
2883  case DCH_DAY:
2885  if (S_TM(n->suffix))
2886  {
2887  char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2888 
2889  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2890  strcpy(s, str);
2891  else
2892  ereport(ERROR,
2893  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2894  errmsg("localized string format value too long")));
2895  }
2896  else
2897  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2898  asc_toupper_z(days[tm->tm_wday]));
2899  s += strlen(s);
2900  break;
2901  case DCH_Day:
2903  if (S_TM(n->suffix))
2904  {
2905  char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2906 
2907  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2908  strcpy(s, str);
2909  else
2910  ereport(ERROR,
2911  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2912  errmsg("localized string format value too long")));
2913  }
2914  else
2915  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2916  days[tm->tm_wday]);
2917  s += strlen(s);
2918  break;
2919  case DCH_day:
2921  if (S_TM(n->suffix))
2922  {
2923  char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
2924 
2925  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2926  strcpy(s, str);
2927  else
2928  ereport(ERROR,
2929  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2930  errmsg("localized string format value too long")));
2931  }
2932  else
2933  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2934  asc_tolower_z(days[tm->tm_wday]));
2935  s += strlen(s);
2936  break;
2937  case DCH_DY:
2939  if (S_TM(n->suffix))
2940  {
2941  char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
2942 
2943  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2944  strcpy(s, str);
2945  else
2946  ereport(ERROR,
2947  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2948  errmsg("localized string format value too long")));
2949  }
2950  else
2951  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
2952  s += strlen(s);
2953  break;
2954  case DCH_Dy:
2956  if (S_TM(n->suffix))
2957  {
2958  char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
2959 
2960  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2961  strcpy(s, str);
2962  else
2963  ereport(ERROR,
2964  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2965  errmsg("localized string format value too long")));
2966  }
2967  else
2968  strcpy(s, days_short[tm->tm_wday]);
2969  s += strlen(s);
2970  break;
2971  case DCH_dy:
2973  if (S_TM(n->suffix))
2974  {
2975  char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
2976 
2977  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2978  strcpy(s, str);
2979  else
2980  ereport(ERROR,
2981  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2982  errmsg("localized string format value too long")));
2983  }
2984  else
2985  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
2986  s += strlen(s);
2987  break;
2988  case DCH_DDD:
2989  case DCH_IDDD:
2990  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
2991  (n->key->id == DCH_DDD) ?
2992  tm->tm_yday :
2993  date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
2994  if (S_THth(n->suffix))
2995  str_numth(s, s, S_TH_TYPE(n->suffix));
2996  s += strlen(s);
2997  break;
2998  case DCH_DD:
2999  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3000  if (S_THth(n->suffix))
3001  str_numth(s, s, S_TH_TYPE(n->suffix));
3002  s += strlen(s);
3003  break;
3004  case DCH_D:
3006  sprintf(s, "%d", tm->tm_wday + 1);
3007  if (S_THth(n->suffix))
3008  str_numth(s, s, S_TH_TYPE(n->suffix));
3009  s += strlen(s);
3010  break;
3011  case DCH_ID:
3013  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3014  if (S_THth(n->suffix))
3015  str_numth(s, s, S_TH_TYPE(n->suffix));
3016  s += strlen(s);
3017  break;
3018  case DCH_WW:
3019  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3020  (tm->tm_yday - 1) / 7 + 1);
3021  if (S_THth(n->suffix))
3022  str_numth(s, s, S_TH_TYPE(n->suffix));
3023  s += strlen(s);
3024  break;
3025  case DCH_IW:
3026  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3027  date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3028  if (S_THth(n->suffix))
3029  str_numth(s, s, S_TH_TYPE(n->suffix));
3030  s += strlen(s);
3031  break;
3032  case DCH_Q:
3033  if (!tm->tm_mon)
3034  break;
3035  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3036  if (S_THth(n->suffix))
3037  str_numth(s, s, S_TH_TYPE(n->suffix));
3038  s += strlen(s);
3039  break;
3040  case DCH_CC:
3041  if (is_interval) /* straight calculation */
3042  i = tm->tm_year / 100;
3043  else
3044  {
3045  if (tm->tm_year > 0)
3046  /* Century 20 == 1901 - 2000 */
3047  i = (tm->tm_year - 1) / 100 + 1;
3048  else
3049  /* Century 6BC == 600BC - 501BC */
3050  i = tm->tm_year / 100 - 1;
3051  }
3052  if (i <= 99 && i >= -99)
3053  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3054  else
3055  sprintf(s, "%d", i);
3056  if (S_THth(n->suffix))
3057  str_numth(s, s, S_TH_TYPE(n->suffix));
3058  s += strlen(s);
3059  break;
3060  case DCH_Y_YYY:
3061  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3062  sprintf(s, "%d,%03d", i,
3063  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3064  if (S_THth(n->suffix))
3065  str_numth(s, s, S_TH_TYPE(n->suffix));
3066  s += strlen(s);
3067  break;
3068  case DCH_YYYY:
3069  case DCH_IYYY:
3070  sprintf(s, "%0*d",
3071  S_FM(n->suffix) ? 0 :
3072  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3073  (n->key->id == DCH_YYYY ?
3074  ADJUST_YEAR(tm->tm_year, is_interval) :
3076  tm->tm_mon,
3077  tm->tm_mday),
3078  is_interval)));
3079  if (S_THth(n->suffix))
3080  str_numth(s, s, S_TH_TYPE(n->suffix));
3081  s += strlen(s);
3082  break;
3083  case DCH_YYY:
3084  case DCH_IYY:
3085  sprintf(s, "%0*d",
3086  S_FM(n->suffix) ? 0 :
3087  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3088  (n->key->id == DCH_YYY ?
3089  ADJUST_YEAR(tm->tm_year, is_interval) :
3091  tm->tm_mon,
3092  tm->tm_mday),
3093  is_interval)) % 1000);
3094  if (S_THth(n->suffix))
3095  str_numth(s, s, S_TH_TYPE(n->suffix));
3096  s += strlen(s);
3097  break;
3098  case DCH_YY:
3099  case DCH_IY:
3100  sprintf(s, "%0*d",
3101  S_FM(n->suffix) ? 0 :
3102  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3103  (n->key->id == DCH_YY ?
3104  ADJUST_YEAR(tm->tm_year, is_interval) :
3106  tm->tm_mon,
3107  tm->tm_mday),
3108  is_interval)) % 100);
3109  if (S_THth(n->suffix))
3110  str_numth(s, s, S_TH_TYPE(n->suffix));
3111  s += strlen(s);
3112  break;
3113  case DCH_Y:
3114  case DCH_I:
3115  sprintf(s, "%1d",
3116  (n->key->id == DCH_Y ?
3117  ADJUST_YEAR(tm->tm_year, is_interval) :
3119  tm->tm_mon,
3120  tm->tm_mday),
3121  is_interval)) % 10);
3122  if (S_THth(n->suffix))
3123  str_numth(s, s, S_TH_TYPE(n->suffix));
3124  s += strlen(s);
3125  break;
3126  case DCH_RM:
3127  if (!tm->tm_mon)
3128  break;
3129  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3131  s += strlen(s);
3132  break;
3133  case DCH_rm:
3134  if (!tm->tm_mon)
3135  break;
3136  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3138  s += strlen(s);
3139  break;
3140  case DCH_W:
3141  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3142  if (S_THth(n->suffix))
3143  str_numth(s, s, S_TH_TYPE(n->suffix));
3144  s += strlen(s);
3145  break;
3146  case DCH_J:
3147  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3148  if (S_THth(n->suffix))
3149  str_numth(s, s, S_TH_TYPE(n->suffix));
3150  s += strlen(s);
3151  break;
3152  }
3153  }
3154 
3155  *s = '\0';
3156 }
3157 
3158 /* ----------
3159  * Process a string as denoted by a list of FormatNodes.
3160  * The TmFromChar struct pointed to by 'out' is populated with the results.
3161  *
3162  * Note: we currently don't have any to_interval() function, so there
3163  * is no need here for INVALID_FOR_INTERVAL checks.
3164  *
3165  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3166  * ----------
3167  */
3168 static void
3169 DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std,
3170  bool *have_error)
3171 {
3172  FormatNode *n;
3173  char *s;
3174  int len,
3175  value;
3176  bool fx_mode = std;
3177 
3178  /* number of extra skipped characters (more than given in format string) */
3179  int extra_skip = 0;
3180 
3181  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3182  {
3183  /*
3184  * Ignore spaces at the beginning of the string and before fields when
3185  * not in FX (fixed width) mode.
3186  */
3187  if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3188  (n->type == NODE_TYPE_ACTION || n == node))
3189  {
3190  while (*s != '\0' && isspace((unsigned char) *s))
3191  {
3192  s++;
3193  extra_skip++;
3194  }
3195  }
3196 
3197  if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3198  {
3199  if (std)
3200  {
3201  /*
3202  * Standard mode requires strict matching between format
3203  * string separators/spaces and input string.
3204  */
3205  Assert(n->character[0] && !n->character[1]);
3206 
3207  if (*s == n->character[0])
3208  s++;
3209  else
3211  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3212  errmsg("unmatched format separator \"%c\"",
3213  n->character[0]))));
3214  }
3215  else if (!fx_mode)
3216  {
3217  /*
3218  * In non FX (fixed format) mode one format string space or
3219  * separator match to one space or separator in input string.
3220  * Or match nothing if there is no space or separator in the
3221  * current position of input string.
3222  */
3223  extra_skip--;
3224  if (isspace((unsigned char) *s) || is_separator_char(s))
3225  {
3226  s++;
3227  extra_skip++;
3228  }
3229  }
3230  else
3231  {
3232  /*
3233  * In FX mode, on format string space or separator we consume
3234  * exactly one character from input string. Notice we don't
3235  * insist that the consumed character match the format's
3236  * character.
3237  */
3238  s += pg_mblen(s);
3239  }
3240  continue;
3241  }
3242  else if (n->type != NODE_TYPE_ACTION)
3243  {
3244  /*
3245  * Text character, so consume one character from input string.
3246  * Notice we don't insist that the consumed character match the
3247  * format's character.
3248  */
3249  if (!fx_mode)
3250  {
3251  /*
3252  * In non FX mode we might have skipped some extra characters
3253  * (more than specified in format string) before. In this
3254  * case we don't skip input string character, because it might
3255  * be part of field.
3256  */
3257  if (extra_skip > 0)
3258  extra_skip--;
3259  else
3260  s += pg_mblen(s);
3261  }
3262  else
3263  {
3264  s += pg_mblen(s);
3265  }
3266  continue;
3267  }
3268 
3269  from_char_set_mode(out, n->key->date_mode, have_error);
3270  CHECK_ERROR;
3271 
3272  switch (n->key->id)
3273  {
3274  case DCH_FX:
3275  fx_mode = true;
3276  break;
3277  case DCH_A_M:
3278  case DCH_P_M:
3279  case DCH_a_m:
3280  case DCH_p_m:
3282  ALL_UPPER, n->key->len, n, have_error);
3283  CHECK_ERROR;
3284  from_char_set_int(&out->pm, value % 2, n, have_error);
3285  CHECK_ERROR;
3286  out->clock = CLOCK_12_HOUR;
3287  break;
3288  case DCH_AM:
3289  case DCH_PM:
3290  case DCH_am:
3291  case DCH_pm:
3292  from_char_seq_search(&value, &s, ampm_strings,
3293  ALL_UPPER, n->key->len, n, have_error);
3294  CHECK_ERROR;
3295  from_char_set_int(&out->pm, value % 2, n, have_error);
3296  CHECK_ERROR;
3297  out->clock = CLOCK_12_HOUR;
3298  break;
3299  case DCH_HH:
3300  case DCH_HH12:
3301  from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3302  CHECK_ERROR;
3303  out->clock = CLOCK_12_HOUR;
3304  SKIP_THth(s, n->suffix);
3305  break;
3306  case DCH_HH24:
3307  from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
3308  CHECK_ERROR;
3309  SKIP_THth(s, n->suffix);
3310  break;
3311  case DCH_MI:
3312  from_char_parse_int(&out->mi, &s, n, have_error);
3313  CHECK_ERROR;
3314  SKIP_THth(s, n->suffix);
3315  break;
3316  case DCH_SS:
3317  from_char_parse_int(&out->ss, &s, n, have_error);
3318  CHECK_ERROR;
3319  SKIP_THth(s, n->suffix);
3320  break;
3321  case DCH_MS: /* millisecond */
3322  len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
3323  CHECK_ERROR;
3324 
3325  /*
3326  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3327  */
3328  out->ms *= len == 1 ? 100 :
3329  len == 2 ? 10 : 1;
3330 
3331  SKIP_THth(s, n->suffix);
3332  break;
3333  case DCH_FF1:
3334  case DCH_FF2:
3335  case DCH_FF3:
3336  case DCH_FF4:
3337  case DCH_FF5:
3338  case DCH_FF6:
3339  out->ff = n->key->id - DCH_FF1 + 1;
3340  /* fall through */
3341  case DCH_US: /* microsecond */
3342  len = from_char_parse_int_len(&out->us, &s,
3343  n->key->id == DCH_US ? 6 :
3344  out->ff, n, have_error);
3345  CHECK_ERROR;
3346 
3347  out->us *= len == 1 ? 100000 :
3348  len == 2 ? 10000 :
3349  len == 3 ? 1000 :
3350  len == 4 ? 100 :
3351  len == 5 ? 10 : 1;
3352 
3353  SKIP_THth(s, n->suffix);
3354  break;
3355  case DCH_SSSS:
3356  from_char_parse_int(&out->ssss, &s, n, have_error);
3357  CHECK_ERROR;
3358  SKIP_THth(s, n->suffix);
3359  break;
3360  case DCH_tz:
3361  case DCH_TZ:
3362  case DCH_OF:
3364  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3365  errmsg("formatting field \"%s\" is only supported in to_char",
3366  n->key->name))));
3367  CHECK_ERROR;
3368  break;
3369  case DCH_TZH:
3370 
3371  /*
3372  * Value of TZH might be negative. And the issue is that we
3373  * might swallow minus sign as the separator. So, if we have
3374  * skipped more characters than specified in the format
3375  * string, then we consider prepending last skipped minus to
3376  * TZH.
3377  */
3378  if (*s == '+' || *s == '-' || *s == ' ')
3379  {
3380  out->tzsign = *s == '-' ? -1 : +1;
3381  s++;
3382  }
3383  else
3384  {
3385  if (extra_skip > 0 && *(s - 1) == '-')
3386  out->tzsign = -1;
3387  else
3388  out->tzsign = +1;
3389  }
3390 
3391  from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
3392  CHECK_ERROR;
3393  break;
3394  case DCH_TZM:
3395  /* assign positive timezone sign if TZH was not seen before */
3396  if (!out->tzsign)
3397  out->tzsign = +1;
3398  from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
3399  CHECK_ERROR;
3400  break;
3401  case DCH_A_D:
3402  case DCH_B_C:
3403  case DCH_a_d:
3404  case DCH_b_c:
3406  ALL_UPPER, n->key->len, n, have_error);
3407  CHECK_ERROR;
3408  from_char_set_int(&out->bc, value % 2, n, have_error);
3409  CHECK_ERROR;
3410  break;
3411  case DCH_AD:
3412  case DCH_BC:
3413  case DCH_ad:
3414  case DCH_bc:
3415  from_char_seq_search(&value, &s, adbc_strings,
3416  ALL_UPPER, n->key->len, n, have_error);
3417  CHECK_ERROR;
3418  from_char_set_int(&out->bc, value % 2, n, have_error);
3419  CHECK_ERROR;
3420  break;
3421  case DCH_MONTH:
3422  case DCH_Month:
3423  case DCH_month:
3425  MAX_MONTH_LEN, n, have_error);
3426  CHECK_ERROR;
3427  from_char_set_int(&out->mm, value + 1, n, have_error);
3428  CHECK_ERROR;
3429  break;
3430  case DCH_MON:
3431  case DCH_Mon:
3432  case DCH_mon:
3433  from_char_seq_search(&value, &s, months, ONE_UPPER,
3434  MAX_MON_LEN, n, have_error);
3435  CHECK_ERROR;
3436  from_char_set_int(&out->mm, value + 1, n, have_error);
3437  CHECK_ERROR;
3438  break;
3439  case DCH_MM:
3440  from_char_parse_int(&out->mm, &s, n, have_error);
3441  CHECK_ERROR;
3442  SKIP_THth(s, n->suffix);
3443  break;
3444  case DCH_DAY:
3445  case DCH_Day:
3446  case DCH_day:
3447  from_char_seq_search(&value, &s, days, ONE_UPPER,
3448  MAX_DAY_LEN, n, have_error);
3449  CHECK_ERROR;
3450  from_char_set_int(&out->d, value, n, have_error);
3451  CHECK_ERROR;
3452  out->d++;
3453  break;
3454  case DCH_DY:
3455  case DCH_Dy:
3456  case DCH_dy:
3457  from_char_seq_search(&value, &s, days, ONE_UPPER,
3458  MAX_DY_LEN, n, have_error);
3459  CHECK_ERROR;
3460  from_char_set_int(&out->d, value, n, have_error);
3461  CHECK_ERROR;
3462  out->d++;
3463  break;
3464  case DCH_DDD:
3465  from_char_parse_int(&out->ddd, &s, n, have_error);
3466  CHECK_ERROR;
3467  SKIP_THth(s, n->suffix);
3468  break;
3469  case DCH_IDDD:
3470  from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
3471  CHECK_ERROR;
3472  SKIP_THth(s, n->suffix);
3473  break;
3474  case DCH_DD:
3475  from_char_parse_int(&out->dd, &s, n, have_error);
3476  CHECK_ERROR;
3477  SKIP_THth(s, n->suffix);
3478  break;
3479  case DCH_D:
3480  from_char_parse_int(&out->d, &s, n, have_error);
3481  CHECK_ERROR;
3482  SKIP_THth(s, n->suffix);
3483  break;
3484  case DCH_ID:
3485  from_char_parse_int_len(&out->d, &s, 1, n, have_error);
3486  CHECK_ERROR;
3487  /* Shift numbering to match Gregorian where Sunday = 1 */
3488  if (++out->d > 7)
3489  out->d = 1;
3490  SKIP_THth(s, n->suffix);
3491  break;
3492  case DCH_WW:
3493  case DCH_IW:
3494  from_char_parse_int(&out->ww, &s, n, have_error);
3495  CHECK_ERROR;
3496  SKIP_THth(s, n->suffix);
3497  break;
3498  case DCH_Q:
3499 
3500  /*
3501  * We ignore 'Q' when converting to date because it is unclear
3502  * which date in the quarter to use, and some people specify
3503  * both quarter and month, so if it was honored it might
3504  * conflict with the supplied month. That is also why we don't
3505  * throw an error.
3506  *
3507  * We still parse the source string for an integer, but it
3508  * isn't stored anywhere in 'out'.
3509  */
3510  from_char_parse_int((int *) NULL, &s, n, have_error);
3511  CHECK_ERROR;
3512  SKIP_THth(s, n->suffix);
3513  break;
3514  case DCH_CC:
3515  from_char_parse_int(&out->cc, &s, n, have_error);
3516  CHECK_ERROR;
3517  SKIP_THth(s, n->suffix);
3518  break;
3519  case DCH_Y_YYY:
3520  {
3521  int matched,
3522  years,
3523  millennia,
3524  nch;
3525 
3526  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3527  if (matched < 2)
3529  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3530  errmsg("invalid input string for \"Y,YYY\""))));
3531  years += (millennia * 1000);
3532  from_char_set_int(&out->year, years, n, have_error);
3533  CHECK_ERROR;
3534  out->yysz = 4;
3535  s += nch;
3536  SKIP_THth(s, n->suffix);
3537  }
3538  break;
3539  case DCH_YYYY:
3540  case DCH_IYYY:
3541  from_char_parse_int(&out->year, &s, n, have_error);
3542  CHECK_ERROR;
3543  out->yysz = 4;
3544  SKIP_THth(s, n->suffix);
3545  break;
3546  case DCH_YYY:
3547  case DCH_IYY:
3548  len = from_char_parse_int(&out->year, &s, n, have_error);
3549  CHECK_ERROR;
3550  if (len < 4)
3551  out->year = adjust_partial_year_to_2020(out->year);
3552  out->yysz = 3;
3553  SKIP_THth(s, n->suffix);
3554  break;
3555  case DCH_YY:
3556  case DCH_IY:
3557  len = from_char_parse_int(&out->year, &s, n, have_error);
3558  CHECK_ERROR;
3559  if (len < 4)
3560  out->year = adjust_partial_year_to_2020(out->year);
3561  out->yysz = 2;
3562  SKIP_THth(s, n->suffix);
3563  break;
3564  case DCH_Y:
3565  case DCH_I:
3566  len = from_char_parse_int(&out->year, &s, n, have_error);
3567  CHECK_ERROR;
3568  if (len < 4)
3569  out->year = adjust_partial_year_to_2020(out->year);
3570  out->yysz = 1;
3571  SKIP_THth(s, n->suffix);
3572  break;
3573  case DCH_RM:
3575  ALL_UPPER, MAX_RM_LEN, n, have_error);
3576  CHECK_ERROR;
3577  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3578  n, have_error);
3579  CHECK_ERROR;
3580  break;
3581  case DCH_rm:
3583  ALL_LOWER, MAX_RM_LEN, n, have_error);
3584  CHECK_ERROR;
3585  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
3586  n, have_error);
3587  CHECK_ERROR;
3588  break;
3589  case DCH_W:
3590  from_char_parse_int(&out->w, &s, n, have_error);
3591  CHECK_ERROR;
3592  SKIP_THth(s, n->suffix);
3593  break;
3594  case DCH_J:
3595  from_char_parse_int(&out->j, &s, n, have_error);
3596  CHECK_ERROR;
3597  SKIP_THth(s, n->suffix);
3598  break;
3599  }
3600 
3601  /* Ignore all spaces after fields */
3602  if (!fx_mode)
3603  {
3604  extra_skip = 0;
3605  while (*s != '\0' && isspace((unsigned char) *s))
3606  {
3607  s++;
3608  extra_skip++;
3609  }
3610  }
3611  }
3612 
3613  /*
3614  * Standard parsing mode doesn't allow unmatched format patterns or
3615  * trailing characters in the input string.
3616  */
3617  if (std)
3618  {
3619  if (n->type != NODE_TYPE_END)
3621  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3622  errmsg("input string is too short for datetime format"))));
3623 
3624  while (*s != '\0' && isspace((unsigned char) *s))
3625  s++;
3626 
3627  if (*s != '\0')
3629  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3630  errmsg("trailing characters remain in input string "
3631  "after datetime format"))));
3632  }
3633 
3634 on_error:
3635  return;
3636 }
3637 
3638 /*
3639  * The invariant for DCH cache entry management is that DCHCounter is equal
3640  * to the maximum age value among the existing entries, and we increment it
3641  * whenever an access occurs. If we approach overflow, deal with that by
3642  * halving all the age values, so that we retain a fairly accurate idea of
3643  * which entries are oldest.
3644  */
3645 static inline void
3647 {
3648  if (DCHCounter >= (INT_MAX - 1))
3649  {
3650  for (int i = 0; i < n_DCHCache; i++)
3651  DCHCache[i]->age >>= 1;
3652  DCHCounter >>= 1;
3653  }
3654 }
3655 
3656 /*
3657  * Get mask of date/time/zone components present in format nodes.
3658  *
3659  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
3660  */
3661 static int
3662 DCH_datetime_type(FormatNode *node, bool *have_error)
3663 {
3664  FormatNode *n;
3665  int flags = 0;
3666 
3667  for (n = node; n->type != NODE_TYPE_END; n++)
3668  {
3669  if (n->type != NODE_TYPE_ACTION)
3670  continue;
3671 
3672  switch (n->key->id)
3673  {
3674  case DCH_FX:
3675  break;
3676  case DCH_A_M:
3677  case DCH_P_M:
3678  case DCH_a_m:
3679  case DCH_p_m:
3680  case DCH_AM:
3681  case DCH_PM:
3682  case DCH_am:
3683  case DCH_pm:
3684  case DCH_HH:
3685  case DCH_HH12:
3686  case DCH_HH24:
3687  case DCH_MI:
3688  case DCH_SS:
3689  case DCH_MS: /* millisecond */
3690  case DCH_US: /* microsecond */
3691  case DCH_FF1:
3692  case DCH_FF2:
3693  case DCH_FF3:
3694  case DCH_FF4:
3695  case DCH_FF5:
3696  case DCH_FF6:
3697  case DCH_SSSS:
3698  flags |= DCH_TIMED;
3699  break;
3700  case DCH_tz:
3701  case DCH_TZ:
3702  case DCH_OF:
3704  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3705  errmsg("formatting field \"%s\" is only supported in to_char",
3706  n->key->name))));
3707  flags |= DCH_ZONED;
3708  break;
3709  case DCH_TZH:
3710  case DCH_TZM:
3711  flags |= DCH_ZONED;
3712  break;
3713  case DCH_A_D:
3714  case DCH_B_C:
3715  case DCH_a_d:
3716  case DCH_b_c:
3717  case DCH_AD:
3718  case DCH_BC:
3719  case DCH_ad:
3720  case DCH_bc:
3721  case DCH_MONTH:
3722  case DCH_Month:
3723  case DCH_month:
3724  case DCH_MON:
3725  case DCH_Mon:
3726  case DCH_mon:
3727  case DCH_MM:
3728  case DCH_DAY:
3729  case DCH_Day:
3730  case DCH_day:
3731  case DCH_DY:
3732  case DCH_Dy:
3733  case DCH_dy:
3734  case DCH_DDD:
3735  case DCH_IDDD:
3736  case DCH_DD:
3737  case DCH_D:
3738  case DCH_ID:
3739  case DCH_WW:
3740  case DCH_Q:
3741  case DCH_CC:
3742  case DCH_Y_YYY:
3743  case DCH_YYYY:
3744  case DCH_IYYY:
3745  case DCH_YYY:
3746  case DCH_IYY:
3747  case DCH_YY:
3748  case DCH_IY:
3749  case DCH_Y:
3750  case DCH_I:
3751  case DCH_RM:
3752  case DCH_rm:
3753  case DCH_W:
3754  case DCH_J:
3755  flags |= DCH_DATED;
3756  break;
3757  }
3758  }
3759 
3760 on_error:
3761  return flags;
3762 }
3763 
3764 /* select a DCHCacheEntry to hold the given format picture */
3765 static DCHCacheEntry *
3766 DCH_cache_getnew(const char *str, bool std)
3767 {
3768  DCHCacheEntry *ent;
3769 
3770  /* Ensure we can advance DCHCounter below */
3772 
3773  /*
3774  * If cache is full, remove oldest entry (or recycle first not-valid one)
3775  */
3777  {
3778  DCHCacheEntry *old = DCHCache[0];
3779 
3780 #ifdef DEBUG_TO_FROM_CHAR
3781  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3782 #endif
3783  if (old->valid)
3784  {
3785  for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
3786  {
3787  ent = DCHCache[i];
3788  if (!ent->valid)
3789  {
3790  old = ent;
3791  break;
3792  }
3793  if (ent->age < old->age)
3794  old = ent;
3795  }
3796  }
3797 #ifdef DEBUG_TO_FROM_CHAR
3798  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3799 #endif
3800  old->valid = false;
3801  StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
3802  old->age = (++DCHCounter);
3803  /* caller is expected to fill format, then set valid */
3804  return old;
3805  }
3806  else
3807  {
3808 #ifdef DEBUG_TO_FROM_CHAR
3809  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3810 #endif
3811  Assert(DCHCache[n_DCHCache] == NULL);
3812  DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
3814  ent->valid = false;
3815  StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3816  ent->std = std;
3817  ent->age = (++DCHCounter);
3818  /* caller is expected to fill format, then set valid */
3819  ++n_DCHCache;
3820  return ent;
3821  }
3822 }
3823 
3824 /* look for an existing DCHCacheEntry matching the given format picture */
3825 static DCHCacheEntry *
3826 DCH_cache_search(const char *str, bool std)
3827 {
3828  /* Ensure we can advance DCHCounter below */
3830 
3831  for (int i = 0; i < n_DCHCache; i++)
3832  {
3833  DCHCacheEntry *ent = DCHCache[i];
3834 
3835  if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
3836  {
3837  ent->age = (++DCHCounter);
3838  return ent;
3839  }
3840  }
3841 
3842  return NULL;
3843 }
3844 
3845 /* Find or create a DCHCacheEntry for the given format picture */
3846 static DCHCacheEntry *
3847 DCH_cache_fetch(const char *str, bool std)
3848 {
3849  DCHCacheEntry *ent;
3850 
3851  if ((ent = DCH_cache_search(str, std)) == NULL)
3852  {
3853  /*
3854  * Not in the cache, must run parser and save a new format-picture to
3855  * the cache. Do not mark the cache entry valid until parsing
3856  * succeeds.
3857  */
3858  ent = DCH_cache_getnew(str, std);
3859 
3860  parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
3861  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
3862 
3863  ent->valid = true;
3864  }
3865  return ent;
3866 }
3867 
3868 /*
3869  * Format a date/time or interval into a string according to fmt.
3870  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
3871  * for formatting.
3872  */
3873 static text *
3874 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
3875 {
3876  FormatNode *format;
3877  char *fmt_str,
3878  *result;
3879  bool incache;
3880  int fmt_len;
3881  text *res;
3882 
3883  /*
3884  * Convert fmt to C string
3885  */
3886  fmt_str = text_to_cstring(fmt);
3887  fmt_len = strlen(fmt_str);
3888 
3889  /*
3890  * Allocate workspace for result as C string
3891  */
3892  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
3893  *result = '\0';
3894 
3895  if (fmt_len > DCH_CACHE_SIZE)
3896  {
3897  /*
3898  * Allocate new memory if format picture is bigger than static cache
3899  * and do not use cache (call parser always)
3900  */
3901  incache = false;
3902 
3903  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3904 
3905  parse_format(format, fmt_str, DCH_keywords,
3906  DCH_suff, DCH_index, DCH_FLAG, NULL);
3907  }
3908  else
3909  {
3910  /*
3911  * Use cache buffers
3912  */
3913  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
3914 
3915  incache = true;
3916  format = ent->format;
3917  }
3918 
3919  /* The real work is here */
3920  DCH_to_char(format, is_interval, tmtc, result, collid);
3921 
3922  if (!incache)
3923  pfree(format);
3924 
3925  pfree(fmt_str);
3926 
3927  /* convert C-string result to TEXT format */
3928  res = cstring_to_text(result);
3929 
3930  pfree(result);
3931  return res;
3932 }
3933 
3934 /****************************************************************************
3935  * Public routines
3936  ***************************************************************************/
3937 
3938 /* -------------------
3939  * TIMESTAMP to_char()
3940  * -------------------
3941  */
3942 Datum
3944 {
3946  text *fmt = PG_GETARG_TEXT_PP(1),
3947  *res;
3948  TmToChar tmtc;
3949  struct pg_tm *tm;
3950  int thisdate;
3951 
3952  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3953  PG_RETURN_NULL();
3954 
3955  ZERO_tmtc(&tmtc);
3956  tm = tmtcTm(&tmtc);
3957 
3958  if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
3959  ereport(ERROR,
3960  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3961  errmsg("timestamp out of range")));
3962 
3963  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3964  tm->tm_wday = (thisdate + 1) % 7;
3965  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3966 
3967  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3968  PG_RETURN_NULL();
3969 
3970  PG_RETURN_TEXT_P(res);
3971 }
3972 
3973 Datum
3975 {
3977  text *fmt = PG_GETARG_TEXT_PP(1),
3978  *res;
3979  TmToChar tmtc;
3980  int tz;
3981  struct pg_tm *tm;
3982  int thisdate;
3983 
3984  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3985  PG_RETURN_NULL();
3986 
3987  ZERO_tmtc(&tmtc);
3988  tm = tmtcTm(&tmtc);
3989 
3990  if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
3991  ereport(ERROR,
3992  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3993  errmsg("timestamp out of range")));
3994 
3995  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3996  tm->tm_wday = (thisdate + 1) % 7;
3997  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3998 
3999  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4000  PG_RETURN_NULL();
4001 
4002  PG_RETURN_TEXT_P(res);
4003 }
4004 
4005 
4006 /* -------------------
4007  * INTERVAL to_char()
4008  * -------------------
4009  */
4010 Datum
4012 {
4013  Interval *it = PG_GETARG_INTERVAL_P(0);
4014  text *fmt = PG_GETARG_TEXT_PP(1),
4015  *res;
4016  TmToChar tmtc;
4017  struct pg_tm *tm;
4018 
4019  if (VARSIZE_ANY_EXHDR(fmt) <= 0)
4020  PG_RETURN_NULL();
4021 
4022  ZERO_tmtc(&tmtc);
4023  tm = tmtcTm(&tmtc);
4024 
4025  if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
4026  PG_RETURN_NULL();
4027 
4028  /* wday is meaningless, yday approximates the total span in days */
4029  tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4030 
4031  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4032  PG_RETURN_NULL();
4033 
4034  PG_RETURN_TEXT_P(res);
4035 }
4036 
4037 /* ---------------------
4038  * TO_TIMESTAMP()
4039  *
4040  * Make Timestamp from date_str which is formatted at argument 'fmt'
4041  * ( to_timestamp is reverse to_char() )
4042  * ---------------------
4043  */
4044 Datum
4046 {
4047  text *date_txt = PG_GETARG_TEXT_PP(0);
4048  text *fmt = PG_GETARG_TEXT_PP(1);
4049  Timestamp result;
4050  int tz;
4051  struct pg_tm tm;
4052  fsec_t fsec;
4053  int fprec;
4054 
4055  do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec, NULL, NULL);
4056 
4057  /* Use the specified time zone, if any. */
4058  if (tm.tm_zone)
4059  {
4060  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
4061 
4062  if (dterr)
4063  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4064  }
4065  else
4067 
4068  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4069  ereport(ERROR,
4070  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4071  errmsg("timestamp out of range")));
4072 
4073  /* Use the specified fractional precision, if any. */
4074  if (fprec)
4075  AdjustTimestampForTypmod(&result, fprec);
4076 
4077  PG_RETURN_TIMESTAMP(result);
4078 }
4079 
4080 /* ----------
4081  * TO_DATE
4082  * Make Date from date_str which is formatted at argument 'fmt'
4083  * ----------
4084  */
4085 Datum
4087 {
4088  text *date_txt = PG_GETARG_TEXT_PP(0);
4089  text *fmt = PG_GETARG_TEXT_PP(1);
4090  DateADT result;
4091  struct pg_tm tm;
4092  fsec_t fsec;
4093 
4094  do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL, NULL, NULL);
4095 
4096  /* Prevent overflow in Julian-day routines */
4097  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4098  ereport(ERROR,
4099  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4100  errmsg("date out of range: \"%s\"",
4101  text_to_cstring(date_txt))));
4102 
4103  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4104 
4105  /* Now check for just-out-of-range dates */
4106  if (!IS_VALID_DATE(result))
4107  ereport(ERROR,
4108  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4109  errmsg("date out of range: \"%s\"",
4110  text_to_cstring(date_txt))));
4111 
4112  PG_RETURN_DATEADT(result);
4113 }
4114 
4115 /*
4116  * Convert the 'date_txt' input to a datetime type using argument 'fmt' as a format string.
4117  * The actual data type (returned in 'typid', 'typmod') is determined by
4118  * the presence of date/time/zone components in the format string.
4119  *
4120  * When timezone component is present, the corresponding offset is set to '*tz'.
4121  *
4122  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
4123  * and zero value is returned.
4124  */
4125 Datum
4126 parse_datetime(text *date_txt, text *fmt, bool strict, Oid *typid,
4127  int32 *typmod, int *tz, bool *have_error)
4128 {
4129  struct pg_tm tm;
4130  fsec_t fsec;
4131  int fprec = 0;
4132  uint32 flags;
4133 
4134  do_to_timestamp(date_txt, fmt, strict, &tm, &fsec, &fprec, &flags, have_error);
4135  CHECK_ERROR;
4136 
4137  *typmod = fprec ? fprec : -1; /* fractional part precision */
4138 
4139  if (flags & DCH_DATED)
4140  {
4141  if (flags & DCH_TIMED)
4142  {
4143  if (flags & DCH_ZONED)
4144  {
4145  TimestampTz result;
4146 
4147  if (tm.tm_zone)
4148  {
4149  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4150 
4151  if (dterr)
4152  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
4153  }
4154  else
4155  {
4156  /*
4157  * Time zone is present in format string, but not in input
4158  * string. Assuming do_to_timestamp() triggers no error
4159  * this should be possible only in non-strict case.
4160  */
4161  Assert(!strict);
4162 
4164  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4165  errmsg("missing time zone in input string for type timestamptz"))));
4166  }
4167 
4168  if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4170  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4171  errmsg("timestamptz out of range"))));
4172 
4173  AdjustTimestampForTypmod(&result, *typmod);
4174 
4175  *typid = TIMESTAMPTZOID;
4176  return TimestampTzGetDatum(result);
4177  }
4178  else
4179  {
4180  Timestamp result;
4181 
4182  if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4184  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4185  errmsg("timestamp out of range"))));
4186 
4187  AdjustTimestampForTypmod(&result, *typmod);
4188 
4189  *typid = TIMESTAMPOID;
4190  return TimestampGetDatum(result);
4191  }
4192  }
4193  else
4194  {
4195  if (flags & DCH_ZONED)
4196  {
4198  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4199  errmsg("datetime format is zoned but not timed"))));
4200  }
4201  else
4202  {
4203  DateADT result;
4204 
4205  /* Prevent overflow in Julian-day routines */
4206  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4208  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4209  errmsg("date out of range: \"%s\"",
4210  text_to_cstring(date_txt)))));
4211 
4212  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4214 
4215  /* Now check for just-out-of-range dates */
4216  if (!IS_VALID_DATE(result))
4218  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4219  errmsg("date out of range: \"%s\"",
4220  text_to_cstring(date_txt)))));
4221 
4222  *typid = DATEOID;
4223  return DateADTGetDatum(result);
4224  }
4225  }
4226  }
4227  else if (flags & DCH_TIMED)
4228  {
4229  if (flags & DCH_ZONED)
4230  {
4231  TimeTzADT *result = palloc(sizeof(TimeTzADT));
4232 
4233  if (tm.tm_zone)
4234  {
4235  int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
4236 
4237  if (dterr)
4238  RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
4239  }
4240  else
4241  {
4242  /*
4243  * Time zone is present in format string, but not in input
4244  * string. Assuming do_to_timestamp() triggers no error this
4245  * should be possible only in non-strict case.
4246  */
4247  Assert(!strict);
4248 
4250  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4251  errmsg("missing time zone in input string for type timetz"))));
4252  }
4253 
4254  if (tm2timetz(&tm, fsec, *tz, result) != 0)
4256  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4257  errmsg("timetz out of range"))));
4258 
4259  AdjustTimeForTypmod(&result->time, *typmod);
4260 
4261  *typid = TIMETZOID;
4262  return TimeTzADTPGetDatum(result);
4263  }
4264  else
4265  {
4266  TimeADT result;
4267 
4268  if (tm2time(&tm, fsec, &result) != 0)
4270  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4271  errmsg("time out of range"))));
4272 
4273  AdjustTimeForTypmod(&result, *typmod);
4274 
4275  *typid = TIMEOID;
4276  return TimeADTGetDatum(result);
4277  }
4278  }
4279  else
4280  {
4282  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4283  errmsg("datetime format is not dated and not timed"))));
4284  }
4285 
4286 on_error:
4287  return (Datum) 0;
4288 }
4289 
4290 /*
4291  * do_to_timestamp: shared code for to_timestamp and to_date
4292  *
4293  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4294  * fractional seconds, and fractional precision.
4295  *
4296  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4297  * DCH_from_char to populate a TmFromChar with the parsed contents of
4298  * 'date_txt'.
4299  *
4300  * The TmFromChar is then analysed and converted into the final results in
4301  * struct 'tm' and 'fsec'.
4302  *
4303  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags'.
4304  *
4305  * 'std' specifies standard parsing mode.
4306  *
4307  * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
4308  */
4309 static void
4310 do_to_timestamp(text *date_txt, text *fmt, bool std,
4311  struct pg_tm *tm, fsec_t *fsec, int *fprec,
4312  uint32 *flags, bool *have_error)
4313 {
4314  FormatNode *format = NULL;
4315  TmFromChar tmfc;
4316  int fmt_len;
4317  char *date_str;
4318  int fmask;
4319  bool incache = false;
4320 
4321  date_str = text_to_cstring(date_txt);
4322 
4323  ZERO_tmfc(&tmfc);
4324  ZERO_tm(tm);
4325  *fsec = 0;
4326  fmask = 0; /* bit mask for ValidateDate() */
4327 
4328  fmt_len = VARSIZE_ANY_EXHDR(fmt);
4329 
4330  if (fmt_len)
4331  {
4332  char *fmt_str;
4333 
4334  fmt_str = text_to_cstring(fmt);
4335 
4336  if (fmt_len > DCH_CACHE_SIZE)
4337  {
4338  /*
4339  * Allocate new memory if format picture is bigger than static
4340  * cache and do not use cache (call parser always)
4341  */
4342  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4343 
4344  parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4345  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4346  }
4347  else
4348  {
4349  /*
4350  * Use cache buffers
4351  */
4352  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4353 
4354  incache = true;
4355  format = ent->format;
4356  }
4357 
4358 #ifdef DEBUG_TO_FROM_CHAR
4359  /* dump_node(format, fmt_len); */
4360  /* dump_index(DCH_keywords, DCH_index); */
4361 #endif
4362 
4363  DCH_from_char(format, date_str, &tmfc, std, have_error);
4364  CHECK_ERROR;
4365 
4366  pfree(fmt_str);
4367 
4368  if (flags)
4369  *flags = DCH_datetime_type(format, have_error);
4370 
4371  if (!incache)
4372  {
4373  pfree(format);
4374  format = NULL;
4375  }
4376 
4377  CHECK_ERROR;
4378  }
4379 
4380  DEBUG_TMFC(&tmfc);
4381 
4382  /*
4383  * Convert to_date/to_timestamp input fields to standard 'tm'
4384  */
4385  if (tmfc.ssss)
4386  {
4387  int x = tmfc.ssss;
4388 
4389  tm->tm_hour = x / SECS_PER_HOUR;
4390  x %= SECS_PER_HOUR;
4391  tm->tm_min = x / SECS_PER_MINUTE;
4392  x %= SECS_PER_MINUTE;
4393  tm->tm_sec = x;
4394  }
4395 
4396  if (tmfc.ss)
4397  tm->tm_sec = tmfc.ss;
4398  if (tmfc.mi)
4399  tm->tm_min = tmfc.mi;
4400  if (tmfc.hh)
4401  tm->tm_hour = tmfc.hh;
4402 
4403  if (tmfc.clock == CLOCK_12_HOUR)
4404  {
4405  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4406  {
4408  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4409  errmsg("hour \"%d\" is invalid for the 12-hour clock",
4410  tm->tm_hour),
4411  errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
4412  }
4413 
4414  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4415  tm->tm_hour += HOURS_PER_DAY / 2;
4416  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4417  tm->tm_hour = 0;
4418  }
4419 
4420  if (tmfc.year)
4421  {
4422  /*
4423  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4424  * the year in the given century. Keep in mind that the 21st century
4425  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4426  * 600BC to 501BC.
4427  */
4428  if (tmfc.cc && tmfc.yysz <= 2)
4429  {
4430  if (tmfc.bc)
4431  tmfc.cc = -tmfc.cc;
4432  tm->tm_year = tmfc.year % 100;
4433  if (tm->tm_year)
4434  {
4435  if (tmfc.cc >= 0)
4436  tm->tm_year += (tmfc.cc - 1) * 100;
4437  else
4438  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4439  }
4440  else
4441  {
4442  /* find century year for dates ending in "00" */
4443  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4444  }
4445  }
4446  else
4447  {
4448  /* If a 4-digit year is provided, we use that and ignore CC. */
4449  tm->tm_year = tmfc.year;
4450  if (tmfc.bc && tm->tm_year > 0)
4451  tm->tm_year = -(tm->tm_year - 1);
4452  }
4453  fmask |= DTK_M(YEAR);
4454  }
4455  else if (tmfc.cc)
4456  {
4457  /* use first year of century */
4458  if (tmfc.bc)
4459  tmfc.cc = -tmfc.cc;
4460  if (tmfc.cc >= 0)
4461  /* +1 because 21st century started in 2001 */
4462  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4463  else
4464  /* +1 because year == 599 is 600 BC */
4465  tm->tm_year = tmfc.cc * 100 + 1;
4466  fmask |= DTK_M(YEAR);
4467  }
4468 
4469  if (tmfc.j)
4470  {
4471  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4472  fmask |= DTK_DATE_M;
4473  }
4474 
4475  if (tmfc.ww)
4476  {
4477  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4478  {
4479  /*
4480  * If tmfc.d is not set, then the date is left at the beginning of
4481  * the ISO week (Monday).
4482  */
4483  if (tmfc.d)
4484  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4485  else
4486  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4487  fmask |= DTK_DATE_M;
4488  }
4489  else
4490  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4491  }
4492 
4493  if (tmfc.w)
4494  tmfc.dd = (tmfc.w - 1) * 7 + 1;
4495  if (tmfc.dd)
4496  {
4497  tm->tm_mday = tmfc.dd;
4498  fmask |= DTK_M(DAY);
4499  }
4500  if (tmfc.mm)
4501  {
4502  tm->tm_mon = tmfc.mm;
4503  fmask |= DTK_M(MONTH);
4504  }
4505 
4506  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4507  {
4508  /*
4509  * The month and day field have not been set, so we use the
4510  * day-of-year field to populate them. Depending on the date mode,
4511  * this field may be interpreted as a Gregorian day-of-year, or an ISO
4512  * week date day-of-year.
4513  */
4514 
4515  if (!tm->tm_year && !tmfc.bc)
4516  {
4518  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4519  errmsg("cannot calculate day of year without year information"))));
4520  }
4521 
4522  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4523  {
4524  int j0; /* zeroth day of the ISO year, in Julian */
4525 
4526  j0 = isoweek2j(tm->tm_year, 1) - 1;
4527 
4528  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4529  fmask |= DTK_DATE_M;
4530  }
4531  else
4532  {
4533  const int *y;
4534  int i;
4535 
4536  static const int ysum[2][13] = {
4537  {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4538  {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4539 
4540  y = ysum[isleap(tm->tm_year)];
4541 
4542  for (i = 1; i <= MONTHS_PER_YEAR; i++)
4543  {
4544  if (tmfc.ddd <= y[i])
4545  break;
4546  }
4547  if (tm->tm_mon <= 1)
4548  tm->tm_mon = i;
4549 
4550  if (tm->tm_mday <= 1)
4551  tm->tm_mday = tmfc.ddd - y[i - 1];
4552 
4553  fmask |= DTK_M(MONTH) | DTK_M(DAY);
4554  }
4555  }
4556 
4557  if (tmfc.ms)
4558  *fsec += tmfc.ms * 1000;
4559  if (tmfc.us)
4560  *fsec += tmfc.us;
4561  if (fprec)
4562  *fprec = tmfc.ff; /* fractional precision, if specified */
4563 
4564  /* Range-check date fields according to bit mask computed above */
4565  if (fmask != 0)
4566  {
4567  /* We already dealt with AD/BC, so pass isjulian = true */
4568  int dterr = ValidateDate(fmask, true, false, false, tm);
4569 
4570  if (dterr != 0)
4571  {
4572  /*
4573  * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4574  * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4575  * irrelevant hint about datestyle.
4576  */
4577  RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4578  }
4579  }
4580 
4581  /* Range-check time fields too */
4582  if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4583  tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4584  tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4585  *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4586  {
4587  RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
4588  }
4589 
4590  /* Save parsed time-zone into tm->tm_zone if it was specified */
4591  if (tmfc.tzsign)
4592  {
4593  char *tz;
4594 
4595  if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4596  tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4597  {
4598  RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
4599  }
4600 
4601  tz = psprintf("%c%02d:%02d",
4602  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
4603 
4604  tm->tm_zone = tz;
4605  }
4606 
4607  DEBUG_TM(tm);
4608 
4609 on_error:
4610 
4611  if (format && !incache)
4612  pfree(format);
4613 
4614  pfree(date_str);
4615 }
4616 
4617 
4618 /**********************************************************************
4619  * the NUMBER version part
4620  *********************************************************************/
4621 
4622 
4623 static char *
4624 fill_str(char *str, int c, int max)
4625 {
4626  memset(str, c, max);
4627  *(str + max) = '\0';
4628  return str;
4629 }
4630 
4631 #define zeroize_NUM(_n) \
4632 do { \
4633  (_n)->flag = 0; \
4634  (_n)->lsign = 0; \
4635  (_n)->pre = 0; \
4636  (_n)->post = 0; \
4637  (_n)->pre_lsign_num = 0; \
4638  (_n)->need_locale = 0; \
4639  (_n)->multi = 0; \
4640  (_n)->zero_start = 0; \
4641  (_n)->zero_end = 0; \
4642 } while(0)
4643 
4644 /* This works the same as DCH_prevent_counter_overflow */
4645 static inline void
4647 {
4648  if (NUMCounter >= (INT_MAX - 1))
4649  {
4650  for (int i = 0; i < n_NUMCache; i++)
4651  NUMCache[i]->age >>= 1;
4652  NUMCounter >>= 1;
4653  }
4654 }
4655 
4656 /* select a NUMCacheEntry to hold the given format picture */
4657 static NUMCacheEntry *
4658 NUM_cache_getnew(const char *str)
4659 {
4660  NUMCacheEntry *ent;
4661 
4662  /* Ensure we can advance NUMCounter below */
4664 
4665  /*
4666  * If cache is full, remove oldest entry (or recycle first not-valid one)
4667  */
4669  {
4670  NUMCacheEntry *old = NUMCache[0];
4671 
4672 #ifdef DEBUG_TO_FROM_CHAR
4673  elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
4674 #endif
4675  if (old->valid)
4676  {
4677  for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
4678  {
4679  ent = NUMCache[i];
4680  if (!ent->valid)
4681  {
4682  old = ent;
4683  break;
4684  }
4685  if (ent->age < old->age)
4686  old = ent;
4687  }
4688  }
4689 #ifdef DEBUG_TO_FROM_CHAR
4690  elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
4691 #endif
4692  old->valid = false;
4693  StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
4694  old->age = (++NUMCounter);
4695  /* caller is expected to fill format and Num, then set valid */
4696  return old;
4697  }
4698  else
4699  {
4700 #ifdef DEBUG_TO_FROM_CHAR
4701  elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4702 #endif
4703  Assert(NUMCache[n_NUMCache] == NULL);
4704  NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
4706  ent->valid = false;
4707  StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
4708  ent->age = (++NUMCounter);
4709  /* caller is expected to fill format and Num, then set valid */
4710  ++n_NUMCache;
4711  return ent;
4712  }
4713 }
4714 
4715 /* look for an existing NUMCacheEntry matching the given format picture */
4716 static NUMCacheEntry *
4717 NUM_cache_search(const char *str)
4718 {
4719  /* Ensure we can advance NUMCounter below */
4721 
4722  for (int i = 0; i < n_NUMCache; i++)
4723  {
4724  NUMCacheEntry *ent = NUMCache[i];
4725 
4726  if (ent->valid && strcmp(ent->str, str) == 0)
4727  {
4728  ent->age = (++NUMCounter);
4729  return ent;
4730  }
4731  }
4732 
4733  return NULL;
4734 }
4735 
4736 /* Find or create a NUMCacheEntry for the given format picture */
4737 static NUMCacheEntry *
4738 NUM_cache_fetch(const char *str)
4739 {
4740  NUMCacheEntry *ent;
4741 
4742  if ((ent = NUM_cache_search(str)) == NULL)
4743  {
4744  /*
4745  * Not in the cache, must run parser and save a new format-picture to
4746  * the cache. Do not mark the cache entry valid until parsing
4747  * succeeds.
4748  */
4749  ent = NUM_cache_getnew(str);
4750 
4751  zeroize_NUM(&ent->Num);
4752 
4753  parse_format(ent->format, str, NUM_keywords,
4754  NULL, NUM_index, NUM_FLAG, &ent->Num);
4755 
4756  ent->valid = true;
4757  }
4758  return ent;
4759 }
4760 
4761 /* ----------
4762  * Cache routine for NUM to_char version
4763  * ----------
4764  */
4765 static FormatNode *
4766 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4767 {
4768  FormatNode *format = NULL;
4769  char *str;
4770 
4771  str = text_to_cstring(pars_str);
4772 
4773  if (len > NUM_CACHE_SIZE)
4774  {
4775  /*
4776  * Allocate new memory if format picture is bigger than static cache
4777  * and do not use cache (call parser always)
4778  */
4779  format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4780 
4781  *shouldFree = true;
4782 
4783  zeroize_NUM(Num);
4784 
4785  parse_format(format, str, NUM_keywords,
4786  NULL, NUM_index, NUM_FLAG, Num);
4787  }
4788  else
4789  {
4790  /*
4791  * Use cache buffers
4792  */
4793  NUMCacheEntry *ent = NUM_cache_fetch(str);
4794 
4795  *shouldFree = false;
4796 
4797  format = ent->format;
4798 
4799  /*
4800  * Copy cache to used struct
4801  */
4802  Num->flag = ent->Num.flag;
4803  Num->lsign = ent->Num.lsign;
4804  Num->pre = ent->Num.pre;
4805  Num->post = ent->Num.post;
4806  Num->pre_lsign_num = ent->Num.pre_lsign_num;
4807  Num->need_locale = ent->Num.need_locale;
4808  Num->multi = ent->Num.multi;
4809  Num->zero_start = ent->Num.zero_start;
4810  Num->zero_end = ent->Num.zero_end;
4811  }
4812 
4813 #ifdef DEBUG_TO_FROM_CHAR
4814  /* dump_node(format, len); */
4815  dump_index(NUM_keywords, NUM_index);
4816 #endif
4817 
4818  pfree(str);
4819  return format;
4820 }
4821 
4822 
4823 static char *
4824 int_to_roman(int number)
4825 {
4826  int len = 0,
4827  num = 0;
4828  char *p = NULL,
4829  *result,
4830  numstr[12];
4831 
4832  result = (char *) palloc(16);
4833  *result = '\0';
4834 
4835  if (number > 3999 || number < 1)
4836  {
4837  fill_str(result, '#', 15);
4838  return result;
4839  }
4840  len = snprintf(numstr, sizeof(numstr), "%d", number);
4841 
4842  for (p = numstr; *p != '\0'; p++, --len)
4843  {
4844  num = *p - 49; /* 48 ascii + 1 */
4845  if (num < 0)
4846  continue;
4847 
4848  if (len > 3)
4849  {
4850  while (num-- != -1)
4851  strcat(result, "M");
4852  }
4853  else
4854  {
4855  if (len == 3)
4856  strcat(result, rm100[num]);
4857  else if (len == 2)
4858  strcat(result, rm10[num]);
4859  else if (len == 1)
4860  strcat(result, rm1[num]);
4861  }
4862  }
4863  return result;
4864 }
4865 
4866 
4867 
4868 /* ----------
4869  * Locale
4870  * ----------
4871  */
4872 static void
4874 {
4875  if (Np->Num->need_locale)
4876  {
4877  struct lconv *lconv;
4878 
4879  /*
4880  * Get locales
4881  */
4882  lconv = PGLC_localeconv();
4883 
4884  /*
4885  * Positive / Negative number sign
4886  */
4887  if (lconv->negative_sign && *lconv->negative_sign)
4888  Np->L_negative_sign = lconv->negative_sign;
4889  else
4890  Np->L_negative_sign = "-";
4891 
4892  if (lconv->positive_sign && *lconv->positive_sign)
4893  Np->L_positive_sign = lconv->positive_sign;
4894  else
4895  Np->L_positive_sign = "+";
4896 
4897  /*
4898  * Number decimal point
4899  */
4900  if (lconv->decimal_point && *lconv->decimal_point)
4901  Np->decimal = lconv->decimal_point;
4902 
4903  else
4904  Np->decimal = ".";
4905 
4906  if (!IS_LDECIMAL(Np->Num))
4907  Np->decimal = ".";
4908 
4909  /*
4910  * Number thousands separator
4911  *
4912  * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
4913  * but "" for thousands_sep, so we set the thousands_sep too.
4914  * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
4915  */
4916  if (lconv->thousands_sep && *lconv->thousands_sep)
4917  Np->L_thousands_sep = lconv->thousands_sep;
4918  /* Make sure thousands separator doesn't match decimal point symbol. */
4919  else if (strcmp(Np->decimal, ",") !=0)
4920  Np->L_thousands_sep = ",";
4921  else
4922  Np->L_thousands_sep = ".";
4923 
4924  /*
4925  * Currency symbol
4926  */
4927  if (lconv->currency_symbol && *lconv->currency_symbol)
4928  Np->L_currency_symbol = lconv->currency_symbol;
4929  else
4930  Np->L_currency_symbol = " ";
4931  }
4932  else
4933  {
4934  /*
4935  * Default values
4936  */
4937  Np->L_negative_sign = "-";
4938  Np->L_positive_sign = "+";
4939  Np->decimal = ".";
4940 
4941  Np->L_thousands_sep = ",";
4942  Np->L_currency_symbol = " ";
4943  }
4944 }
4945 
4946 /* ----------
4947  * Return pointer of last relevant number after decimal point
4948  * 12.0500 --> last relevant is '5'
4949  * 12.0000 --> last relevant is '.'
4950  * If there is no decimal point, return NULL (which will result in same
4951  * behavior as if FM hadn't been specified).
4952  * ----------
4953  */
4954 static char *
4956 {
4957  char *result,
4958  *p = strchr(num, '.');
4959 
4960 #ifdef DEBUG_TO_FROM_CHAR
4961  elog(DEBUG_elog_output, "get_last_relevant_decnum()");
4962 #endif
4963 
4964  if (!p)
4965  return NULL;
4966 
4967  result = p;
4968 
4969  while (*(++p))
4970  {
4971  if (*p != '0')
4972  result = p;
4973  }
4974 
4975  return result;
4976 }
4977 
4978 /*
4979  * These macros are used in NUM_processor() and its subsidiary routines.
4980  * OVERLOAD_TEST: true if we've reached end of input string
4981  * AMOUNT_TEST(s): true if at least s bytes remain in string
4982  */
4983 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
4984 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
4985 
4986 /* ----------
4987  * Number extraction for TO_NUMBER()
4988  * ----------
4989  */
4990 static void
4991 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
4992 {
4993  bool isread = false;
4994 
4995 #ifdef DEBUG_TO_FROM_CHAR
4996  elog(DEBUG_elog_output, " --- scan start --- id=%s",
4997  (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
4998 #endif
4999 
5000  if (OVERLOAD_TEST)
5001  return;
5002 
5003  if (*Np->inout_p == ' ')
5004  Np->inout_p++;
5005 
5006  if (OVERLOAD_TEST)
5007  return;
5008 
5009  /*
5010  * read sign before number
5011  */
5012  if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5013  (Np->read_pre + Np->read_post) == 0)
5014  {
5015 #ifdef DEBUG_TO_FROM_CHAR
5016  elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5017  *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5018 #endif
5019 
5020  /*
5021  * locale sign
5022  */
5023  if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5024  {
5025  int x = 0;
5026 
5027 #ifdef DEBUG_TO_FROM_CHAR
5028  elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5029 #endif
5030  if ((x = strlen(Np->L_negative_sign)) &&
5031  AMOUNT_TEST(x) &&
5032  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5033  {
5034  Np->inout_p += x;
5035  *Np->number = '-';
5036  }
5037  else if ((x = strlen(Np->L_positive_sign)) &&
5038  AMOUNT_TEST(x) &&
5039  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5040  {
5041  Np->inout_p += x;
5042  *Np->number = '+';
5043  }
5044  }
5045  else
5046  {
5047 #ifdef DEBUG_TO_FROM_CHAR
5048  elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5049 #endif
5050 
5051  /*
5052  * simple + - < >
5053  */
5054  if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5055  *Np->inout_p == '<'))
5056  {
5057  *Np->number = '-'; /* set - */
5058  Np->inout_p++;
5059  }
5060  else if (*Np->inout_p == '+')
5061  {
5062  *Np->number = '+'; /* set + */
5063  Np->inout_p++;
5064  }
5065  }
5066  }
5067 
5068  if (OVERLOAD_TEST)
5069  return;
5070 
5071 #ifdef DEBUG_TO_FROM_CHAR
5072  elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5073 #endif
5074 
5075  /*
5076  * read digit or decimal point
5077  */
5078  if (isdigit((unsigned char) *Np->inout_p))
5079  {
5080  if (Np->read_dec && Np->read_post == Np->Num->post)
5081  return;
5082 
5083  *Np->number_p = *Np->inout_p;
5084  Np->number_p++;
5085 
5086  if (Np->read_dec)
5087  Np->read_post++;
5088  else
5089  Np->read_pre++;
5090 
5091  isread = true;
5092 
5093 #ifdef DEBUG_TO_FROM_CHAR
5094  elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5095 #endif
5096  }
5097  else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5098  {
5099  /*
5100  * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5101  * Np->decimal is always just "." if we don't have a D format token.
5102  * So we just unconditionally match to Np->decimal.
5103  */
5104  int x = strlen(Np->decimal);
5105 
5106 #ifdef DEBUG_TO_FROM_CHAR
5107  elog(DEBUG_elog_output, "Try read decimal point (%c)",
5108  *Np->inout_p);
5109 #endif
5110  if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5111  {
5112  Np->inout_p += x - 1;
5113  *Np->number_p = '.';
5114  Np->number_p++;
5115  Np->read_dec = true;
5116  isread = true;
5117  }
5118  }
5119 
5120  if (OVERLOAD_TEST)
5121  return;
5122 
5123  /*
5124  * Read sign behind "last" number
5125  *
5126  * We need sign detection because determine exact position of post-sign is
5127  * difficult:
5128  *
5129  * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
5130  * 5.01-
5131  */
5132  if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5133  {
5134  /*
5135  * locale sign (NUM_S) is always anchored behind a last number, if: -
5136  * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5137  * next char is not digit
5138  */
5139  if (IS_LSIGN(Np->Num) && isread &&
5140  (Np->inout_p + 1) < Np->inout + input_len &&
5141  !isdigit((unsigned char) *(Np->inout_p + 1)))
5142  {
5143  int x;
5144  char *tmp = Np->inout_p++;
5145 
5146 #ifdef DEBUG_TO_FROM_CHAR
5147  elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5148 #endif
5149  if ((x = strlen(Np->L_negative_sign)) &&
5150  AMOUNT_TEST(x) &&
5151  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5152  {
5153  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5154  *Np->number = '-';
5155  }
5156  else if ((x = strlen(Np->L_positive_sign)) &&
5157  AMOUNT_TEST(x) &&
5158  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5159  {
5160  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5161  *Np->number = '+';
5162  }
5163  if (*Np->number == ' ')
5164  /* no sign read */
5165  Np->inout_p = tmp;
5166  }
5167 
5168  /*
5169  * try read non-locale sign, it's happen only if format is not exact
5170  * and we cannot determine sign position of MI/PL/SG, an example:
5171  *
5172  * FM9.999999MI -> 5.01-
5173  *
5174  * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
5175  * like to_number('1 -', '9S') where sign is not anchored to last
5176  * number.
5177  */
5178  else if (isread == false && IS_LSIGN(Np->Num) == false &&
5179  (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
5180  {
5181 #ifdef DEBUG_TO_FROM_CHAR
5182  elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
5183 #endif
5184 
5185  /*
5186  * simple + -
5187  */
5188  if (*Np->inout_p == '-' || *Np->inout_p == '+')
5189  /* NUM_processor() do inout_p++ */
5190  *Np->number = *Np->inout_p;
5191  }
5192  }
5193 }
5194 
5195 #define IS_PREDEC_SPACE(_n) \
5196  (IS_ZERO((_n)->Num)==false && \
5197  (_n)->number == (_n)->number_p && \
5198  *(_n)->number == '0' && \
5199  (_n)->Num->post != 0)
5200 
5201 /* ----------
5202  * Add digit or sign to number-string
5203  * ----------
5204  */
5205 static void
5207 {
5208  int end;
5209 
5210  if (IS_ROMAN(Np->Num))
5211  return;
5212 
5213  /* Note: in this elog() output not set '\0' in 'inout' */
5214 
5215 #ifdef DEBUG_TO_FROM_CHAR
5216 
5217  /*
5218  * Np->num_curr is number of current item in format-picture, it is not
5219  * current position in inout!
5220  */
5221  elog(DEBUG_elog_output,
5222  "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
5223  Np->sign_wrote,
5224  Np->num_curr,
5225  Np->number_p,
5226  Np->inout);
5227 #endif
5228  Np->num_in = false;
5229 
5230  /*
5231  * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
5232  * handle "9.9" --> " .1"
5233  */
5234  if (Np->sign_wrote == false &&
5235  (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
5236  (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
5237  {
5238  if (IS_LSIGN(Np->Num))
5239  {
5240  if (Np->Num->lsign == NUM_LSIGN_PRE)
5241  {
5242  if (Np->sign == '-')
5243  strcpy(Np->inout_p, Np->L_negative_sign);
5244  else
5245  strcpy(Np->inout_p, Np->L_positive_sign);
5246  Np->inout_p += strlen(Np->inout_p);
5247  Np->sign_wrote = true;
5248  }
5249  }
5250  else if (IS_BRACKET(Np->Num))