PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2018, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for abstime
53  * - add support for roman number to standard number conversion
54  * - add support for number spelling
55  * - add support for string to string formatting (we must be better
56  * than Oracle :-),
57  * to_char('Hello', 'X X X X X') -> 'H e l l o'
58  *
59  * -----------------------------------------------------------------------
60  */
61 
62 #ifdef DEBUG_TO_FROM_CHAR
63 #define DEBUG_elog_output DEBUG3
64 #endif
65 
66 #include "postgres.h"
67 
68 #include <ctype.h>
69 #include <unistd.h>
70 #include <math.h>
71 #include <float.h>
72 #include <limits.h>
73 
74 /*
75  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
76  * declare them in <wchar.h>.
77  */
78 #ifdef HAVE_WCHAR_H
79 #include <wchar.h>
80 #endif
81 #ifdef HAVE_WCTYPE_H
82 #include <wctype.h>
83 #endif
84 
85 #ifdef USE_ICU
86 #include <unicode/ustring.h>
87 #endif
88 
89 #include "catalog/pg_collation.h"
90 #include "mb/pg_wchar.h"
91 #include "utils/builtins.h"
92 #include "utils/date.h"
93 #include "utils/datetime.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/numeric.h"
97 #include "utils/pg_locale.h"
98 
99 /* ----------
100  * Routines type
101  * ----------
102  */
103 #define DCH_TYPE 1 /* DATE-TIME version */
104 #define NUM_TYPE 2 /* NUMBER version */
105 
106 /* ----------
107  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
108  * ----------
109  */
110 #define KeyWord_INDEX_SIZE ('~' - ' ')
111 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
112 
113 /* ----------
114  * Maximal length of one node
115  * ----------
116  */
117 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
118 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
119 
120 
121 /* ----------
122  * Format parser structs
123  * ----------
124  */
125 typedef struct
126 {
127  char *name; /* suffix string */
128  int len, /* suffix length */
129  id, /* used in node->suffix */
130  type; /* prefix / postfix */
131 } KeySuffix;
132 
133 /* ----------
134  * FromCharDateMode
135  * ----------
136  *
137  * This value is used to nominate one of several distinct (and mutually
138  * exclusive) date conventions that a keyword can belong to.
139  */
140 typedef enum
141 {
142  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
143  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
144  FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
146 
147 typedef struct
148 {
149  const char *name;
150  int len;
151  int id;
152  bool is_digit;
154 } KeyWord;
155 
156 typedef struct
157 {
158  int type; /* NODE_TYPE_XXX, see below */
159  const KeyWord *key; /* if type is ACTION */
160  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
161  int suffix; /* keyword prefix/suffix code, if any */
162 } FormatNode;
163 
164 #define NODE_TYPE_END 1
165 #define NODE_TYPE_ACTION 2
166 #define NODE_TYPE_CHAR 3
167 
168 #define SUFFTYPE_PREFIX 1
169 #define SUFFTYPE_POSTFIX 2
170 
171 #define CLOCK_24_HOUR 0
172 #define CLOCK_12_HOUR 1
173 
174 
175 /* ----------
176  * Full months
177  * ----------
178  */
179 static const char *const months_full[] = {
180  "January", "February", "March", "April", "May", "June", "July",
181  "August", "September", "October", "November", "December", NULL
182 };
183 
184 static const char *const days_short[] = {
185  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
186 };
187 
188 /* ----------
189  * AD / BC
190  * ----------
191  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
192  * positive and map year == -1 to year zero, and shift all negative
193  * years up one. For interval years, we just return the year.
194  */
195 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
196 
197 #define A_D_STR "A.D."
198 #define a_d_STR "a.d."
199 #define AD_STR "AD"
200 #define ad_STR "ad"
201 
202 #define B_C_STR "B.C."
203 #define b_c_STR "b.c."
204 #define BC_STR "BC"
205 #define bc_STR "bc"
206 
207 /*
208  * AD / BC strings for seq_search.
209  *
210  * These are given in two variants, a long form with periods and a standard
211  * form without.
212  *
213  * The array is laid out such that matches for AD have an even index, and
214  * matches for BC have an odd index. So the boolean value for BC is given by
215  * taking the array index of the match, modulo 2.
216  */
217 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
218 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
219 
220 /* ----------
221  * AM / PM
222  * ----------
223  */
224 #define A_M_STR "A.M."
225 #define a_m_STR "a.m."
226 #define AM_STR "AM"
227 #define am_STR "am"
228 
229 #define P_M_STR "P.M."
230 #define p_m_STR "p.m."
231 #define PM_STR "PM"
232 #define pm_STR "pm"
233 
234 /*
235  * AM / PM strings for seq_search.
236  *
237  * These are given in two variants, a long form with periods and a standard
238  * form without.
239  *
240  * The array is laid out such that matches for AM have an even index, and
241  * matches for PM have an odd index. So the boolean value for PM is given by
242  * taking the array index of the match, modulo 2.
243  */
244 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
245 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
246 
247 /* ----------
248  * Months in roman-numeral
249  * (Must be in reverse order for seq_search (in FROM_CHAR), because
250  * 'VIII' must have higher precedence than 'V')
251  * ----------
252  */
253 static const char *const rm_months_upper[] =
254 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
255 
256 static const char *const rm_months_lower[] =
257 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
258 
259 /* ----------
260  * Roman numbers
261  * ----------
262  */
263 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
264 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
265 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
266 
267 /* ----------
268  * Ordinal postfixes
269  * ----------
270  */
271 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
272 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
273 
274 /* ----------
275  * Flags & Options:
276  * ----------
277  */
278 #define ONE_UPPER 1 /* Name */
279 #define ALL_UPPER 2 /* NAME */
280 #define ALL_LOWER 3 /* name */
281 
282 #define FULL_SIZ 0
283 
284 #define MAX_MONTH_LEN 9
285 #define MAX_MON_LEN 3
286 #define MAX_DAY_LEN 9
287 #define MAX_DY_LEN 3
288 #define MAX_RM_LEN 4
289 
290 #define TH_UPPER 1
291 #define TH_LOWER 2
292 
293 /* ----------
294  * Number description struct
295  * ----------
296  */
297 typedef struct
298 {
299  int pre, /* (count) numbers before decimal */
300  post, /* (count) numbers after decimal */
301  lsign, /* want locales sign */
302  flag, /* number parameters */
303  pre_lsign_num, /* tmp value for lsign */
304  multi, /* multiplier for 'V' */
305  zero_start, /* position of first zero */
306  zero_end, /* position of last zero */
307  need_locale; /* needs it locale */
308 } NUMDesc;
309 
310 /* ----------
311  * Flags for NUMBER version
312  * ----------
313  */
314 #define NUM_F_DECIMAL (1 << 1)
315 #define NUM_F_LDECIMAL (1 << 2)
316 #define NUM_F_ZERO (1 << 3)
317 #define NUM_F_BLANK (1 << 4)
318 #define NUM_F_FILLMODE (1 << 5)
319 #define NUM_F_LSIGN (1 << 6)
320 #define NUM_F_BRACKET (1 << 7)
321 #define NUM_F_MINUS (1 << 8)
322 #define NUM_F_PLUS (1 << 9)
323 #define NUM_F_ROMAN (1 << 10)
324 #define NUM_F_MULTI (1 << 11)
325 #define NUM_F_PLUS_POST (1 << 12)
326 #define NUM_F_MINUS_POST (1 << 13)
327 #define NUM_F_EEEE (1 << 14)
328 
329 #define NUM_LSIGN_PRE (-1)
330 #define NUM_LSIGN_POST 1
331 #define NUM_LSIGN_NONE 0
332 
333 /* ----------
334  * Tests
335  * ----------
336  */
337 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
338 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
339 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
340 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
341 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
342 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
343 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
344 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
345 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
346 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
347 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
348 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
349 
350 /* ----------
351  * Format picture cache
352  *
353  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
354  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
355  *
356  * For simplicity, the cache entries are fixed-size, so they allow for the
357  * worst case of a FormatNode for each byte in the picture string.
358  *
359  * The max number of entries in the caches is DCH_CACHE_ENTRIES
360  * resp. NUM_CACHE_ENTRIES.
361  * ----------
362  */
363 #define NUM_CACHE_SIZE 64
364 #define NUM_CACHE_ENTRIES 20
365 #define DCH_CACHE_SIZE 128
366 #define DCH_CACHE_ENTRIES 20
367 
368 typedef struct
369 {
371  char str[DCH_CACHE_SIZE + 1];
372  bool valid;
373  int age;
374 } DCHCacheEntry;
375 
376 typedef struct
377 {
379  char str[NUM_CACHE_SIZE + 1];
380  bool valid;
381  int age;
383 } NUMCacheEntry;
384 
385 /* global cache for date/time format pictures */
387 static int n_DCHCache = 0; /* current number of entries */
388 static int DCHCounter = 0; /* aging-event counter */
389 
390 /* global cache for number format pictures */
392 static int n_NUMCache = 0; /* current number of entries */
393 static int NUMCounter = 0; /* aging-event counter */
394 
395 /* ----------
396  * For char->date/time conversion
397  * ----------
398  */
399 typedef struct
400 {
402  int hh,
403  pm,
404  mi,
405  ss,
406  ssss,
407  d, /* stored as 1-7, Sunday = 1, 0 means missing */
408  dd,
409  ddd,
410  mm,
411  ms,
412  year,
413  bc,
414  ww,
415  w,
416  cc,
417  j,
418  us,
419  yysz, /* is it YY or YYYY ? */
420  clock, /* 12 or 24 hour clock? */
421  tzsign, /* +1, -1 or 0 if timezone info is absent */
422  tzh,
423  tzm;
424 } TmFromChar;
425 
426 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
427 
428 /* ----------
429  * Debug
430  * ----------
431  */
432 #ifdef DEBUG_TO_FROM_CHAR
433 #define DEBUG_TMFC(_X) \
434  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
435  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
436  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
437  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
438  (_X)->yysz, (_X)->clock);
439 #define DEBUG_TM(_X) \
440  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
441  (_X)->tm_sec, (_X)->tm_year,\
442  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
443  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
444 #else
445 #define DEBUG_TMFC(_X)
446 #define DEBUG_TM(_X)
447 #endif
448 
449 /* ----------
450  * Datetime to char conversion
451  * ----------
452  */
453 typedef struct TmToChar
454 {
455  struct pg_tm tm; /* classic 'tm' struct */
456  fsec_t fsec; /* fractional seconds */
457  const char *tzn; /* timezone */
458 } TmToChar;
459 
460 #define tmtcTm(_X) (&(_X)->tm)
461 #define tmtcTzn(_X) ((_X)->tzn)
462 #define tmtcFsec(_X) ((_X)->fsec)
463 
464 #define ZERO_tm(_X) \
465 do { \
466  (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
467  (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
468  (_X)->tm_mday = (_X)->tm_mon = 1; \
469  (_X)->tm_zone = NULL; \
470 } while(0)
471 
472 #define ZERO_tmtc(_X) \
473 do { \
474  ZERO_tm( tmtcTm(_X) ); \
475  tmtcFsec(_X) = 0; \
476  tmtcTzn(_X) = NULL; \
477 } while(0)
478 
479 /*
480  * to_char(time) appears to to_char() as an interval, so this check
481  * is really for interval and time data types.
482  */
483 #define INVALID_FOR_INTERVAL \
484 do { \
485  if (is_interval) \
486  ereport(ERROR, \
487  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
488  errmsg("invalid format specification for an interval value"), \
489  errhint("Intervals are not tied to specific calendar dates."))); \
490 } while(0)
491 
492 /*****************************************************************************
493  * KeyWord definitions
494  *****************************************************************************/
495 
496 /* ----------
497  * Suffixes:
498  * ----------
499  */
500 #define DCH_S_FM 0x01
501 #define DCH_S_TH 0x02
502 #define DCH_S_th 0x04
503 #define DCH_S_SP 0x08
504 #define DCH_S_TM 0x10
505 
506 /* ----------
507  * Suffix tests
508  * ----------
509  */
510 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
511 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
512 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
513 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
514 
515 /* Oracle toggles FM behavior, we don't; see docs. */
516 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
517 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
518 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
519 
520 /* ----------
521  * Suffixes definition for DATE-TIME TO/FROM CHAR
522  * ----------
523  */
524 #define TM_SUFFIX_LEN 2
525 
526 static const KeySuffix DCH_suff[] = {
527  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
528  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
529  {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
530  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
531  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
532  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
533  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
534  /* last */
535  {NULL, 0, 0, 0}
536 };
537 
538 
539 /* ----------
540  * Format-pictures (KeyWord).
541  *
542  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
543  * complicated -to-> easy:
544  *
545  * (example: "DDD","DD","Day","D" )
546  *
547  * (this specific sort needs the algorithm for sequential search for strings,
548  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
549  * or "HH12"? You must first try "HH12", because "HH" is in string, but
550  * it is not good.
551  *
552  * (!)
553  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
554  * (!)
555  *
556  * For fast search is used the 'int index[]', index is ascii table from position
557  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
558  * position or -1 if char is not used in the KeyWord. Search example for
559  * string "MM":
560  * 1) see in index to index['M' - 32],
561  * 2) take keywords position (enum DCH_MI) from index
562  * 3) run sequential search in keywords[] from this position
563  *
564  * ----------
565  */
566 
567 typedef enum
568 {
583  DCH_FX, /* global suffix */
664 
665  /* last */
667 } DCH_poz;
668 
669 typedef enum
670 {
707 
708  /* last */
710 } NUM_poz;
711 
712 /* ----------
713  * KeyWords for DATE-TIME version
714  * ----------
715  */
716 static const KeyWord DCH_keywords[] = {
717 /* name, len, id, is_digit, date_mode */
718  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
719  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
720  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
721  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
722  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
723  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
724  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
725  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
726  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
727  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
728  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
729  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
730  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
731  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
732  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* F */
733  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
734  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
735  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
736  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
737  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
738  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
739  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
740  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
741  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
742  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
743  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
744  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
745  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
746  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
747  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
748  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
749  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
750  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
751  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
752  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
753  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
754  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
755  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
756  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
757  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
758  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
759  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
760  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
761  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
762  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
763  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
764  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
765  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
766  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
767  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
768  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
769  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
770  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
771  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
772  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
773  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
774  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
775  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
776  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
777  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
778  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
779  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
780  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
781  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* f */
782  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
783  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
784  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
785  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
786  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
787  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
788  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
789  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
790  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
791  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
792  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
793  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
794  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
795  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
796  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
797  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
798  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
799  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
800  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
801  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
802  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
803  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
804  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
805  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
806  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
807  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
808  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
809  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
810  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
811  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
812  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
813 
814  /* last */
815  {NULL, 0, 0, 0, 0}
816 };
817 
818 /* ----------
819  * KeyWords for NUMBER version
820  *
821  * The is_digit and date_mode fields are not relevant here.
822  * ----------
823  */
824 static const KeyWord NUM_keywords[] = {
825 /* name, len, id is in Index */
826  {",", 1, NUM_COMMA}, /* , */
827  {".", 1, NUM_DEC}, /* . */
828  {"0", 1, NUM_0}, /* 0 */
829  {"9", 1, NUM_9}, /* 9 */
830  {"B", 1, NUM_B}, /* B */
831  {"C", 1, NUM_C}, /* C */
832  {"D", 1, NUM_D}, /* D */
833  {"EEEE", 4, NUM_E}, /* E */
834  {"FM", 2, NUM_FM}, /* F */
835  {"G", 1, NUM_G}, /* G */
836  {"L", 1, NUM_L}, /* L */
837  {"MI", 2, NUM_MI}, /* M */
838  {"PL", 2, NUM_PL}, /* P */
839  {"PR", 2, NUM_PR},
840  {"RN", 2, NUM_RN}, /* R */
841  {"SG", 2, NUM_SG}, /* S */
842  {"SP", 2, NUM_SP},
843  {"S", 1, NUM_S},
844  {"TH", 2, NUM_TH}, /* T */
845  {"V", 1, NUM_V}, /* V */
846  {"b", 1, NUM_B}, /* b */
847  {"c", 1, NUM_C}, /* c */
848  {"d", 1, NUM_D}, /* d */
849  {"eeee", 4, NUM_E}, /* e */
850  {"fm", 2, NUM_FM}, /* f */
851  {"g", 1, NUM_G}, /* g */
852  {"l", 1, NUM_L}, /* l */
853  {"mi", 2, NUM_MI}, /* m */
854  {"pl", 2, NUM_PL}, /* p */
855  {"pr", 2, NUM_PR},
856  {"rn", 2, NUM_rn}, /* r */
857  {"sg", 2, NUM_SG}, /* s */
858  {"sp", 2, NUM_SP},
859  {"s", 1, NUM_S},
860  {"th", 2, NUM_th}, /* t */
861  {"v", 1, NUM_V}, /* v */
862 
863  /* last */
864  {NULL, 0, 0}
865 };
866 
867 
868 /* ----------
869  * KeyWords index for DATE-TIME version
870  * ----------
871  */
872 static const int DCH_index[KeyWord_INDEX_SIZE] = {
873 /*
874 0 1 2 3 4 5 6 7 8 9
875 */
876  /*---- first 0..31 chars are skipped ----*/
877 
878  -1, -1, -1, -1, -1, -1, -1, -1,
879  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
880  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
881  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
882  DCH_FX, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
884  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
885  DCH_day, -1, DCH_fx, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
886  -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_ssss, DCH_tz, DCH_us, -1, DCH_ww,
887  -1, DCH_y_yyy, -1, -1, -1, -1
888 
889  /*---- chars over 126 are skipped ----*/
890 };
891 
892 /* ----------
893  * KeyWords index for NUMBER version
894  * ----------
895  */
896 static const int NUM_index[KeyWord_INDEX_SIZE] = {
897 /*
898 0 1 2 3 4 5 6 7 8 9
899 */
900  /*---- first 0..31 chars are skipped ----*/
901 
902  -1, -1, -1, -1, -1, -1, -1, -1,
903  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
904  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
905  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
906  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
907  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
908  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
909  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
910  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
911  -1, -1, -1, -1, -1, -1
912 
913  /*---- chars over 126 are skipped ----*/
914 };
915 
916 /* ----------
917  * Number processor struct
918  * ----------
919  */
920 typedef struct NUMProc
921 {
923  NUMDesc *Num; /* number description */
924 
925  int sign, /* '-' or '+' */
926  sign_wrote, /* was sign write */
927  num_count, /* number of write digits */
928  num_in, /* is inside number */
929  num_curr, /* current position in number */
930  out_pre_spaces, /* spaces before first digit */
931 
932  read_dec, /* to_number - was read dec. point */
933  read_post, /* to_number - number of dec. digit */
934  read_pre; /* to_number - number non-dec. digit */
935 
936  char *number, /* string with number */
937  *number_p, /* pointer to current number position */
938  *inout, /* in / out buffer */
939  *inout_p, /* pointer to current inout position */
940  *last_relevant, /* last relevant number after decimal point */
941 
942  *L_negative_sign, /* Locale */
943  *L_positive_sign,
944  *decimal,
945  *L_thousands_sep,
946  *L_currency_symbol;
947 } NUMProc;
948 
949 
950 /* ----------
951  * Functions
952  * ----------
953  */
954 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
955  const int *index);
956 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
957 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
958 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
959  const KeySuffix *suf, const int *index, int ver, NUMDesc *Num);
960 
961 static void DCH_to_char(FormatNode *node, bool is_interval,
962  TmToChar *in, char *out, Oid collid);
963 static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out);
964 
965 #ifdef DEBUG_TO_FROM_CHAR
966 static void dump_index(const KeyWord *k, const int *index);
967 static void dump_node(FormatNode *node, int max);
968 #endif
969 
970 static const char *get_th(char *num, int type);
971 static char *str_numth(char *dest, char *num, int type);
972 static int adjust_partial_year_to_2020(int year);
973 static int strspace_len(char *str);
974 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode);
975 static void from_char_set_int(int *dest, const int value, const FormatNode *node);
976 static int from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node);
977 static int from_char_parse_int(int *dest, char **src, FormatNode *node);
978 static int seq_search(char *name, const char *const *array, int type, int max, int *len);
979 static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node);
980 static void do_to_timestamp(text *date_txt, text *fmt,
981  struct pg_tm *tm, fsec_t *fsec);
982 static char *fill_str(char *str, int c, int max);
983 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
984 static char *int_to_roman(int number);
985 static void NUM_prepare_locale(NUMProc *Np);
986 static char *get_last_relevant_decnum(char *num);
987 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
988 static void NUM_numpart_to_char(NUMProc *Np, int id);
989 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
990  char *number, int input_len, int to_char_out_pre_spaces,
991  int sign, bool is_to_char, Oid collid);
992 static DCHCacheEntry *DCH_cache_getnew(const char *str);
993 static DCHCacheEntry *DCH_cache_search(const char *str);
994 static DCHCacheEntry *DCH_cache_fetch(const char *str);
995 static NUMCacheEntry *NUM_cache_getnew(const char *str);
996 static NUMCacheEntry *NUM_cache_search(const char *str);
997 static NUMCacheEntry *NUM_cache_fetch(const char *str);
998 
999 
1000 /* ----------
1001  * Fast sequential search, use index for data selection which
1002  * go to seq. cycle (it is very fast for unwanted strings)
1003  * (can't be used binary search in format parsing)
1004  * ----------
1005  */
1006 static const KeyWord *
1007 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1008 {
1009  int poz;
1010 
1011  if (!KeyWord_INDEX_FILTER(*str))
1012  return NULL;
1013 
1014  if ((poz = *(index + (*str - ' '))) > -1)
1015  {
1016  const KeyWord *k = kw + poz;
1017 
1018  do
1019  {
1020  if (strncmp(str, k->name, k->len) == 0)
1021  return k;
1022  k++;
1023  if (!k->name)
1024  return NULL;
1025  } while (*str == *k->name);
1026  }
1027  return NULL;
1028 }
1029 
1030 static const KeySuffix *
1031 suff_search(const char *str, const KeySuffix *suf, int type)
1032 {
1033  const KeySuffix *s;
1034 
1035  for (s = suf; s->name != NULL; s++)
1036  {
1037  if (s->type != type)
1038  continue;
1039 
1040  if (strncmp(str, s->name, s->len) == 0)
1041  return s;
1042  }
1043  return NULL;
1044 }
1045 
1046 /* ----------
1047  * Prepare NUMDesc (number description struct) via FormatNode struct
1048  * ----------
1049  */
1050 static void
1052 {
1053  if (n->type != NODE_TYPE_ACTION)
1054  return;
1055 
1056  if (IS_EEEE(num) && n->key->id != NUM_E)
1057  ereport(ERROR,
1058  (errcode(ERRCODE_SYNTAX_ERROR),
1059  errmsg("\"EEEE\" must be the last pattern used")));
1060 
1061  switch (n->key->id)
1062  {
1063  case NUM_9:
1064  if (IS_BRACKET(num))
1065  ereport(ERROR,
1066  (errcode(ERRCODE_SYNTAX_ERROR),
1067  errmsg("\"9\" must be ahead of \"PR\"")));
1068  if (IS_MULTI(num))
1069  {
1070  ++num->multi;
1071  break;
1072  }
1073  if (IS_DECIMAL(num))
1074  ++num->post;
1075  else
1076  ++num->pre;
1077  break;
1078 
1079  case NUM_0:
1080  if (IS_BRACKET(num))
1081  ereport(ERROR,
1082  (errcode(ERRCODE_SYNTAX_ERROR),
1083  errmsg("\"0\" must be ahead of \"PR\"")));
1084  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1085  {
1086  num->flag |= NUM_F_ZERO;
1087  num->zero_start = num->pre + 1;
1088  }
1089  if (!IS_DECIMAL(num))
1090  ++num->pre;
1091  else
1092  ++num->post;
1093 
1094  num->zero_end = num->pre + num->post;
1095  break;
1096 
1097  case NUM_B:
1098  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1099  num->flag |= NUM_F_BLANK;
1100  break;
1101 
1102  case NUM_D:
1103  num->flag |= NUM_F_LDECIMAL;
1104  num->need_locale = true;
1105  /* FALLTHROUGH */
1106  case NUM_DEC:
1107  if (IS_DECIMAL(num))
1108  ereport(ERROR,
1109  (errcode(ERRCODE_SYNTAX_ERROR),
1110  errmsg("multiple decimal points")));
1111  if (IS_MULTI(num))
1112  ereport(ERROR,
1113  (errcode(ERRCODE_SYNTAX_ERROR),
1114  errmsg("cannot use \"V\" and decimal point together")));
1115  num->flag |= NUM_F_DECIMAL;
1116  break;
1117 
1118  case NUM_FM:
1119  num->flag |= NUM_F_FILLMODE;
1120  break;
1121 
1122  case NUM_S:
1123  if (IS_LSIGN(num))
1124  ereport(ERROR,
1125  (errcode(ERRCODE_SYNTAX_ERROR),
1126  errmsg("cannot use \"S\" twice")));
1127  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1128  ereport(ERROR,
1129  (errcode(ERRCODE_SYNTAX_ERROR),
1130  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1131  if (!IS_DECIMAL(num))
1132  {
1133  num->lsign = NUM_LSIGN_PRE;
1134  num->pre_lsign_num = num->pre;
1135  num->need_locale = true;
1136  num->flag |= NUM_F_LSIGN;
1137  }
1138  else if (num->lsign == NUM_LSIGN_NONE)
1139  {
1140  num->lsign = NUM_LSIGN_POST;
1141  num->need_locale = true;
1142  num->flag |= NUM_F_LSIGN;
1143  }
1144  break;
1145 
1146  case NUM_MI:
1147  if (IS_LSIGN(num))
1148  ereport(ERROR,
1149  (errcode(ERRCODE_SYNTAX_ERROR),
1150  errmsg("cannot use \"S\" and \"MI\" together")));
1151  num->flag |= NUM_F_MINUS;
1152  if (IS_DECIMAL(num))
1153  num->flag |= NUM_F_MINUS_POST;
1154  break;
1155 
1156  case NUM_PL:
1157  if (IS_LSIGN(num))
1158  ereport(ERROR,
1159  (errcode(ERRCODE_SYNTAX_ERROR),
1160  errmsg("cannot use \"S\" and \"PL\" together")));
1161  num->flag |= NUM_F_PLUS;
1162  if (IS_DECIMAL(num))
1163  num->flag |= NUM_F_PLUS_POST;
1164  break;
1165 
1166  case NUM_SG:
1167  if (IS_LSIGN(num))
1168  ereport(ERROR,
1169  (errcode(ERRCODE_SYNTAX_ERROR),
1170  errmsg("cannot use \"S\" and \"SG\" together")));
1171  num->flag |= NUM_F_MINUS;
1172  num->flag |= NUM_F_PLUS;
1173  break;
1174 
1175  case NUM_PR:
1176  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1177  ereport(ERROR,
1178  (errcode(ERRCODE_SYNTAX_ERROR),
1179  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1180  num->flag |= NUM_F_BRACKET;
1181  break;
1182 
1183  case NUM_rn:
1184  case NUM_RN:
1185  num->flag |= NUM_F_ROMAN;
1186  break;
1187 
1188  case NUM_L:
1189  case NUM_G:
1190  num->need_locale = true;
1191  break;
1192 
1193  case NUM_V:
1194  if (IS_DECIMAL(num))
1195  ereport(ERROR,
1196  (errcode(ERRCODE_SYNTAX_ERROR),
1197  errmsg("cannot use \"V\" and decimal point together")));
1198  num->flag |= NUM_F_MULTI;
1199  break;
1200 
1201  case NUM_E:
1202  if (IS_EEEE(num))
1203  ereport(ERROR,
1204  (errcode(ERRCODE_SYNTAX_ERROR),
1205  errmsg("cannot use \"EEEE\" twice")));
1206  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1207  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1208  IS_ROMAN(num) || IS_MULTI(num))
1209  ereport(ERROR,
1210  (errcode(ERRCODE_SYNTAX_ERROR),
1211  errmsg("\"EEEE\" is incompatible with other formats"),
1212  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1213  num->flag |= NUM_F_EEEE;
1214  break;
1215  }
1216 }
1217 
1218 /* ----------
1219  * Format parser, search small keywords and keyword's suffixes, and make
1220  * format-node tree.
1221  *
1222  * for DATE-TIME & NUMBER version
1223  * ----------
1224  */
1225 static void
1226 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1227  const KeySuffix *suf, const int *index, int ver, NUMDesc *Num)
1228 {
1229  FormatNode *n;
1230 
1231 #ifdef DEBUG_TO_FROM_CHAR
1232  elog(DEBUG_elog_output, "to_char/number(): run parser");
1233 #endif
1234 
1235  n = node;
1236 
1237  while (*str)
1238  {
1239  int suffix = 0;
1240  const KeySuffix *s;
1241 
1242  /*
1243  * Prefix
1244  */
1245  if (ver == DCH_TYPE &&
1246  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1247  {
1248  suffix |= s->id;
1249  if (s->len)
1250  str += s->len;
1251  }
1252 
1253  /*
1254  * Keyword
1255  */
1256  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1257  {
1258  n->type = NODE_TYPE_ACTION;
1259  n->suffix = suffix;
1260  if (n->key->len)
1261  str += n->key->len;
1262 
1263  /*
1264  * NUM version: Prepare global NUMDesc struct
1265  */
1266  if (ver == NUM_TYPE)
1267  NUMDesc_prepare(Num, n);
1268 
1269  /*
1270  * Postfix
1271  */
1272  if (ver == DCH_TYPE && *str &&
1273  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1274  {
1275  n->suffix |= s->id;
1276  if (s->len)
1277  str += s->len;
1278  }
1279 
1280  n++;
1281  }
1282  else if (*str)
1283  {
1284  int chlen;
1285 
1286  /*
1287  * Process double-quoted literal string, if any
1288  */
1289  if (*str == '"')
1290  {
1291  str++;
1292  while (*str)
1293  {
1294  if (*str == '"')
1295  {
1296  str++;
1297  break;
1298  }
1299  /* backslash quotes the next character, if any */
1300  if (*str == '\\' && *(str + 1))
1301  str++;
1302  chlen = pg_mblen(str);
1303  n->type = NODE_TYPE_CHAR;
1304  memcpy(n->character, str, chlen);
1305  n->character[chlen] = '\0';
1306  n->key = NULL;
1307  n->suffix = 0;
1308  n++;
1309  str += chlen;
1310  }
1311  }
1312  else
1313  {
1314  /*
1315  * Outside double-quoted strings, backslash is only special if
1316  * it immediately precedes a double quote.
1317  */
1318  if (*str == '\\' && *(str + 1) == '"')
1319  str++;
1320  chlen = pg_mblen(str);
1321  n->type = NODE_TYPE_CHAR;
1322  memcpy(n->character, str, chlen);
1323  n->character[chlen] = '\0';
1324  n->key = NULL;
1325  n->suffix = 0;
1326  n++;
1327  str += chlen;
1328  }
1329  }
1330  }
1331 
1332  n->type = NODE_TYPE_END;
1333  n->suffix = 0;
1334 }
1335 
1336 /* ----------
1337  * DEBUG: Dump the FormatNode Tree (debug)
1338  * ----------
1339  */
1340 #ifdef DEBUG_TO_FROM_CHAR
1341 
1342 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1343 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1344 
1345 static void
1346 dump_node(FormatNode *node, int max)
1347 {
1348  FormatNode *n;
1349  int a;
1350 
1351  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1352 
1353  for (a = 0, n = node; a <= max; n++, a++)
1354  {
1355  if (n->type == NODE_TYPE_ACTION)
1356  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1357  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1358  else if (n->type == NODE_TYPE_CHAR)
1359  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1360  a, n->character);
1361  else if (n->type == NODE_TYPE_END)
1362  {
1363  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1364  return;
1365  }
1366  else
1367  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1368  }
1369 }
1370 #endif /* DEBUG */
1371 
1372 /*****************************************************************************
1373  * Private utils
1374  *****************************************************************************/
1375 
1376 /* ----------
1377  * Return ST/ND/RD/TH for simple (1..9) numbers
1378  * type --> 0 upper, 1 lower
1379  * ----------
1380  */
1381 static const char *
1382 get_th(char *num, int type)
1383 {
1384  int len = strlen(num),
1385  last,
1386  seclast;
1387 
1388  last = *(num + (len - 1));
1389  if (!isdigit((unsigned char) last))
1390  ereport(ERROR,
1391  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1392  errmsg("\"%s\" is not a number", num)));
1393 
1394  /*
1395  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1396  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1397  */
1398  if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1399  last = 0;
1400 
1401  switch (last)
1402  {
1403  case '1':
1404  if (type == TH_UPPER)
1405  return numTH[0];
1406  return numth[0];
1407  case '2':
1408  if (type == TH_UPPER)
1409  return numTH[1];
1410  return numth[1];
1411  case '3':
1412  if (type == TH_UPPER)
1413  return numTH[2];
1414  return numth[2];
1415  default:
1416  if (type == TH_UPPER)
1417  return numTH[3];
1418  return numth[3];
1419  }
1420 }
1421 
1422 /* ----------
1423  * Convert string-number to ordinal string-number
1424  * type --> 0 upper, 1 lower
1425  * ----------
1426  */
1427 static char *
1428 str_numth(char *dest, char *num, int type)
1429 {
1430  if (dest != num)
1431  strcpy(dest, num);
1432  strcat(dest, get_th(num, type));
1433  return dest;
1434 }
1435 
1436 /*****************************************************************************
1437  * upper/lower/initcap functions
1438  *****************************************************************************/
1439 
1440 #ifdef USE_ICU
1441 
1442 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1443  const UChar *src, int32_t srcLength,
1444  const char *locale,
1445  UErrorCode *pErrorCode);
1446 
1447 static int32_t
1448 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1449  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1450 {
1451  UErrorCode status;
1452  int32_t len_dest;
1453 
1454  len_dest = len_source; /* try first with same length */
1455  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1456  status = U_ZERO_ERROR;
1457  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1458  mylocale->info.icu.locale, &status);
1459  if (status == U_BUFFER_OVERFLOW_ERROR)
1460  {
1461  /* try again with adjusted length */
1462  pfree(*buff_dest);
1463  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1464  status = U_ZERO_ERROR;
1465  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1466  mylocale->info.icu.locale, &status);
1467  }
1468  if (U_FAILURE(status))
1469  ereport(ERROR,
1470  (errmsg("case conversion failed: %s", u_errorName(status))));
1471  return len_dest;
1472 }
1473 
1474 static int32_t
1475 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1476  const UChar *src, int32_t srcLength,
1477  const char *locale,
1478  UErrorCode *pErrorCode)
1479 {
1480  return u_strToTitle(dest, destCapacity, src, srcLength,
1481  NULL, locale, pErrorCode);
1482 }
1483 
1484 #endif /* USE_ICU */
1485 
1486 /*
1487  * If the system provides the needed functions for wide-character manipulation
1488  * (which are all standardized by C99), then we implement upper/lower/initcap
1489  * using wide-character functions, if necessary. Otherwise we use the
1490  * traditional <ctype.h> functions, which of course will not work as desired
1491  * in multibyte character sets. Note that in either case we are effectively
1492  * assuming that the database character encoding matches the encoding implied
1493  * by LC_CTYPE.
1494  *
1495  * If the system provides locale_t and associated functions (which are
1496  * standardized by Open Group's XBD), we can support collations that are
1497  * neither default nor C. The code is written to handle both combinations
1498  * of have-wide-characters and have-locale_t, though it's rather unlikely
1499  * a platform would have the latter without the former.
1500  */
1501 
1502 /*
1503  * collation-aware, wide-character-aware lower function
1504  *
1505  * We pass the number of bytes so we can pass varlena and char*
1506  * to this function. The result is a palloc'd, null-terminated string.
1507  */
1508 char *
1509 str_tolower(const char *buff, size_t nbytes, Oid collid)
1510 {
1511  char *result;
1512 
1513  if (!buff)
1514  return NULL;
1515 
1516  /* C/POSIX collations use this path regardless of database encoding */
1517  if (lc_ctype_is_c(collid))
1518  {
1519  result = asc_tolower(buff, nbytes);
1520  }
1521  else
1522  {
1523  pg_locale_t mylocale = 0;
1524 
1525  if (collid != DEFAULT_COLLATION_OID)
1526  {
1527  if (!OidIsValid(collid))
1528  {
1529  /*
1530  * This typically means that the parser could not resolve a
1531  * conflict of implicit collations, so report it that way.
1532  */
1533  ereport(ERROR,
1534  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1535  errmsg("could not determine which collation to use for lower() function"),
1536  errhint("Use the COLLATE clause to set the collation explicitly.")));
1537  }
1538  mylocale = pg_newlocale_from_collation(collid);
1539  }
1540 
1541 #ifdef USE_ICU
1542  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1543  {
1544  int32_t len_uchar;
1545  int32_t len_conv;
1546  UChar *buff_uchar;
1547  UChar *buff_conv;
1548 
1549  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1550  len_conv = icu_convert_case(u_strToLower, mylocale,
1551  &buff_conv, buff_uchar, len_uchar);
1552  icu_from_uchar(&result, buff_conv, len_conv);
1553  pfree(buff_uchar);
1554  }
1555  else
1556 #endif
1557  {
1559  {
1560  wchar_t *workspace;
1561  size_t curr_char;
1562  size_t result_size;
1563 
1564  /* Overflow paranoia */
1565  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1566  ereport(ERROR,
1567  (errcode(ERRCODE_OUT_OF_MEMORY),
1568  errmsg("out of memory")));
1569 
1570  /* Output workspace cannot have more codes than input bytes */
1571  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1572 
1573  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1574 
1575  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1576  {
1577 #ifdef HAVE_LOCALE_T
1578  if (mylocale)
1579  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1580  else
1581 #endif
1582  workspace[curr_char] = towlower(workspace[curr_char]);
1583  }
1584 
1585  /*
1586  * Make result large enough; case change might change number
1587  * of bytes
1588  */
1589  result_size = curr_char * pg_database_encoding_max_length() + 1;
1590  result = palloc(result_size);
1591 
1592  wchar2char(result, workspace, result_size, mylocale);
1593  pfree(workspace);
1594  }
1595  else
1596  {
1597  char *p;
1598 
1599  result = pnstrdup(buff, nbytes);
1600 
1601  /*
1602  * Note: we assume that tolower_l() will not be so broken as
1603  * to need an isupper_l() guard test. When using the default
1604  * collation, we apply the traditional Postgres behavior that
1605  * forces ASCII-style treatment of I/i, but in non-default
1606  * collations you get exactly what the collation says.
1607  */
1608  for (p = result; *p; p++)
1609  {
1610 #ifdef HAVE_LOCALE_T
1611  if (mylocale)
1612  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1613  else
1614 #endif
1615  *p = pg_tolower((unsigned char) *p);
1616  }
1617  }
1618  }
1619  }
1620 
1621  return result;
1622 }
1623 
1624 /*
1625  * collation-aware, wide-character-aware upper function
1626  *
1627  * We pass the number of bytes so we can pass varlena and char*
1628  * to this function. The result is a palloc'd, null-terminated string.
1629  */
1630 char *
1631 str_toupper(const char *buff, size_t nbytes, Oid collid)
1632 {
1633  char *result;
1634 
1635  if (!buff)
1636  return NULL;
1637 
1638  /* C/POSIX collations use this path regardless of database encoding */
1639  if (lc_ctype_is_c(collid))
1640  {
1641  result = asc_toupper(buff, nbytes);
1642  }
1643  else
1644  {
1645  pg_locale_t mylocale = 0;
1646 
1647  if (collid != DEFAULT_COLLATION_OID)
1648  {
1649  if (!OidIsValid(collid))
1650  {
1651  /*
1652  * This typically means that the parser could not resolve a
1653  * conflict of implicit collations, so report it that way.
1654  */
1655  ereport(ERROR,
1656  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1657  errmsg("could not determine which collation to use for upper() function"),
1658  errhint("Use the COLLATE clause to set the collation explicitly.")));
1659  }
1660  mylocale = pg_newlocale_from_collation(collid);
1661  }
1662 
1663 #ifdef USE_ICU
1664  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1665  {
1666  int32_t len_uchar,
1667  len_conv;
1668  UChar *buff_uchar;
1669  UChar *buff_conv;
1670 
1671  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1672  len_conv = icu_convert_case(u_strToUpper, mylocale,
1673  &buff_conv, buff_uchar, len_uchar);
1674  icu_from_uchar(&result, buff_conv, len_conv);
1675  pfree(buff_uchar);
1676  }
1677  else
1678 #endif
1679  {
1681  {
1682  wchar_t *workspace;
1683  size_t curr_char;
1684  size_t result_size;
1685 
1686  /* Overflow paranoia */
1687  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1688  ereport(ERROR,
1689  (errcode(ERRCODE_OUT_OF_MEMORY),
1690  errmsg("out of memory")));
1691 
1692  /* Output workspace cannot have more codes than input bytes */
1693  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1694 
1695  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1696 
1697  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1698  {
1699 #ifdef HAVE_LOCALE_T
1700  if (mylocale)
1701  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1702  else
1703 #endif
1704  workspace[curr_char] = towupper(workspace[curr_char]);
1705  }
1706 
1707  /*
1708  * Make result large enough; case change might change number
1709  * of bytes
1710  */
1711  result_size = curr_char * pg_database_encoding_max_length() + 1;
1712  result = palloc(result_size);
1713 
1714  wchar2char(result, workspace, result_size, mylocale);
1715  pfree(workspace);
1716  }
1717  else
1718  {
1719  char *p;
1720 
1721  result = pnstrdup(buff, nbytes);
1722 
1723  /*
1724  * Note: we assume that toupper_l() will not be so broken as
1725  * to need an islower_l() guard test. When using the default
1726  * collation, we apply the traditional Postgres behavior that
1727  * forces ASCII-style treatment of I/i, but in non-default
1728  * collations you get exactly what the collation says.
1729  */
1730  for (p = result; *p; p++)
1731  {
1732 #ifdef HAVE_LOCALE_T
1733  if (mylocale)
1734  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1735  else
1736 #endif
1737  *p = pg_toupper((unsigned char) *p);
1738  }
1739  }
1740  }
1741  }
1742 
1743  return result;
1744 }
1745 
1746 /*
1747  * collation-aware, wide-character-aware initcap function
1748  *
1749  * We pass the number of bytes so we can pass varlena and char*
1750  * to this function. The result is a palloc'd, null-terminated string.
1751  */
1752 char *
1753 str_initcap(const char *buff, size_t nbytes, Oid collid)
1754 {
1755  char *result;
1756  int wasalnum = false;
1757 
1758  if (!buff)
1759  return NULL;
1760 
1761  /* C/POSIX collations use this path regardless of database encoding */
1762  if (lc_ctype_is_c(collid))
1763  {
1764  result = asc_initcap(buff, nbytes);
1765  }
1766  else
1767  {
1768  pg_locale_t mylocale = 0;
1769 
1770  if (collid != DEFAULT_COLLATION_OID)
1771  {
1772  if (!OidIsValid(collid))
1773  {
1774  /*
1775  * This typically means that the parser could not resolve a
1776  * conflict of implicit collations, so report it that way.
1777  */
1778  ereport(ERROR,
1779  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1780  errmsg("could not determine which collation to use for initcap() function"),
1781  errhint("Use the COLLATE clause to set the collation explicitly.")));
1782  }
1783  mylocale = pg_newlocale_from_collation(collid);
1784  }
1785 
1786 #ifdef USE_ICU
1787  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1788  {
1789  int32_t len_uchar,
1790  len_conv;
1791  UChar *buff_uchar;
1792  UChar *buff_conv;
1793 
1794  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1795  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1796  &buff_conv, buff_uchar, len_uchar);
1797  icu_from_uchar(&result, buff_conv, len_conv);
1798  pfree(buff_uchar);
1799  }
1800  else
1801 #endif
1802  {
1804  {
1805  wchar_t *workspace;
1806  size_t curr_char;
1807  size_t result_size;
1808 
1809  /* Overflow paranoia */
1810  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1811  ereport(ERROR,
1812  (errcode(ERRCODE_OUT_OF_MEMORY),
1813  errmsg("out of memory")));
1814 
1815  /* Output workspace cannot have more codes than input bytes */
1816  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1817 
1818  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1819 
1820  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1821  {
1822 #ifdef HAVE_LOCALE_T
1823  if (mylocale)
1824  {
1825  if (wasalnum)
1826  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1827  else
1828  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1829  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1830  }
1831  else
1832 #endif
1833  {
1834  if (wasalnum)
1835  workspace[curr_char] = towlower(workspace[curr_char]);
1836  else
1837  workspace[curr_char] = towupper(workspace[curr_char]);
1838  wasalnum = iswalnum(workspace[curr_char]);
1839  }
1840  }
1841 
1842  /*
1843  * Make result large enough; case change might change number
1844  * of bytes
1845  */
1846  result_size = curr_char * pg_database_encoding_max_length() + 1;
1847  result = palloc(result_size);
1848 
1849  wchar2char(result, workspace, result_size, mylocale);
1850  pfree(workspace);
1851  }
1852  else
1853  {
1854  char *p;
1855 
1856  result = pnstrdup(buff, nbytes);
1857 
1858  /*
1859  * Note: we assume that toupper_l()/tolower_l() will not be so
1860  * broken as to need guard tests. When using the default
1861  * collation, we apply the traditional Postgres behavior that
1862  * forces ASCII-style treatment of I/i, but in non-default
1863  * collations you get exactly what the collation says.
1864  */
1865  for (p = result; *p; p++)
1866  {
1867 #ifdef HAVE_LOCALE_T
1868  if (mylocale)
1869  {
1870  if (wasalnum)
1871  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1872  else
1873  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1874  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
1875  }
1876  else
1877 #endif
1878  {
1879  if (wasalnum)
1880  *p = pg_tolower((unsigned char) *p);
1881  else
1882  *p = pg_toupper((unsigned char) *p);
1883  wasalnum = isalnum((unsigned char) *p);
1884  }
1885  }
1886  }
1887  }
1888  }
1889 
1890  return result;
1891 }
1892 
1893 /*
1894  * ASCII-only lower function
1895  *
1896  * We pass the number of bytes so we can pass varlena and char*
1897  * to this function. The result is a palloc'd, null-terminated string.
1898  */
1899 char *
1900 asc_tolower(const char *buff, size_t nbytes)
1901 {
1902  char *result;
1903  char *p;
1904 
1905  if (!buff)
1906  return NULL;
1907 
1908  result = pnstrdup(buff, nbytes);
1909 
1910  for (p = result; *p; p++)
1911  *p = pg_ascii_tolower((unsigned char) *p);
1912 
1913  return result;
1914 }
1915 
1916 /*
1917  * ASCII-only upper function
1918  *
1919  * We pass the number of bytes so we can pass varlena and char*
1920  * to this function. The result is a palloc'd, null-terminated string.
1921  */
1922 char *
1923 asc_toupper(const char *buff, size_t nbytes)
1924 {
1925  char *result;
1926  char *p;
1927 
1928  if (!buff)
1929  return NULL;
1930 
1931  result = pnstrdup(buff, nbytes);
1932 
1933  for (p = result; *p; p++)
1934  *p = pg_ascii_toupper((unsigned char) *p);
1935 
1936  return result;
1937 }
1938 
1939 /*
1940  * ASCII-only initcap function
1941  *
1942  * We pass the number of bytes so we can pass varlena and char*
1943  * to this function. The result is a palloc'd, null-terminated string.
1944  */
1945 char *
1946 asc_initcap(const char *buff, size_t nbytes)
1947 {
1948  char *result;
1949  char *p;
1950  int wasalnum = false;
1951 
1952  if (!buff)
1953  return NULL;
1954 
1955  result = pnstrdup(buff, nbytes);
1956 
1957  for (p = result; *p; p++)
1958  {
1959  char c;
1960 
1961  if (wasalnum)
1962  *p = c = pg_ascii_tolower((unsigned char) *p);
1963  else
1964  *p = c = pg_ascii_toupper((unsigned char) *p);
1965  /* we don't trust isalnum() here */
1966  wasalnum = ((c >= 'A' && c <= 'Z') ||
1967  (c >= 'a' && c <= 'z') ||
1968  (c >= '0' && c <= '9'));
1969  }
1970 
1971  return result;
1972 }
1973 
1974 /* convenience routines for when the input is null-terminated */
1975 
1976 static char *
1977 str_tolower_z(const char *buff, Oid collid)
1978 {
1979  return str_tolower(buff, strlen(buff), collid);
1980 }
1981 
1982 static char *
1983 str_toupper_z(const char *buff, Oid collid)
1984 {
1985  return str_toupper(buff, strlen(buff), collid);
1986 }
1987 
1988 static char *
1989 str_initcap_z(const char *buff, Oid collid)
1990 {
1991  return str_initcap(buff, strlen(buff), collid);
1992 }
1993 
1994 static char *
1995 asc_tolower_z(const char *buff)
1996 {
1997  return asc_tolower(buff, strlen(buff));
1998 }
1999 
2000 static char *
2001 asc_toupper_z(const char *buff)
2002 {
2003  return asc_toupper(buff, strlen(buff));
2004 }
2005 
2006 /* asc_initcap_z is not currently needed */
2007 
2008 
2009 /* ----------
2010  * Skip TM / th in FROM_CHAR
2011  *
2012  * If S_THth is on, skip two chars, assuming there are two available
2013  * ----------
2014  */
2015 #define SKIP_THth(ptr, _suf) \
2016  do { \
2017  if (S_THth(_suf)) \
2018  { \
2019  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2020  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2021  } \
2022  } while (0)
2023 
2024 
2025 #ifdef DEBUG_TO_FROM_CHAR
2026 /* -----------
2027  * DEBUG: Call for debug and for index checking; (Show ASCII char
2028  * and defined keyword for each used position
2029  * ----------
2030  */
2031 static void
2032 dump_index(const KeyWord *k, const int *index)
2033 {
2034  int i,
2035  count = 0,
2036  free_i = 0;
2037 
2038  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2039 
2040  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2041  {
2042  if (index[i] != -1)
2043  {
2044  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2045  count++;
2046  }
2047  else
2048  {
2049  free_i++;
2050  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2051  }
2052  }
2053  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2054  count, free_i);
2055 }
2056 #endif /* DEBUG */
2057 
2058 /* ----------
2059  * Return true if next format picture is not digit value
2060  * ----------
2061  */
2062 static bool
2064 {
2065  if (n->type == NODE_TYPE_END)
2066  return false;
2067 
2068  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2069  return true;
2070 
2071  /*
2072  * Next node
2073  */
2074  n++;
2075 
2076  /* end of format string is treated like a non-digit separator */
2077  if (n->type == NODE_TYPE_END)
2078  return true;
2079 
2080  if (n->type == NODE_TYPE_ACTION)
2081  {
2082  if (n->key->is_digit)
2083  return false;
2084 
2085  return true;
2086  }
2087  else if (n->character[1] == '\0' &&
2088  isdigit((unsigned char) n->character[0]))
2089  return false;
2090 
2091  return true; /* some non-digit input (separator) */
2092 }
2093 
2094 
2095 static int
2097 {
2098  /*
2099  * Adjust all dates toward 2020; this is effectively what happens when we
2100  * assume '70' is 1970 and '69' is 2069.
2101  */
2102  /* Force 0-69 into the 2000's */
2103  if (year < 70)
2104  return year + 2000;
2105  /* Force 70-99 into the 1900's */
2106  else if (year < 100)
2107  return year + 1900;
2108  /* Force 100-519 into the 2000's */
2109  else if (year < 520)
2110  return year + 2000;
2111  /* Force 520-999 into the 1000's */
2112  else if (year < 1000)
2113  return year + 1000;
2114  else
2115  return year;
2116 }
2117 
2118 
2119 static int
2121 {
2122  int len = 0;
2123 
2124  while (*str && isspace((unsigned char) *str))
2125  {
2126  str++;
2127  len++;
2128  }
2129  return len;
2130 }
2131 
2132 /*
2133  * Set the date mode of a from-char conversion.
2134  *
2135  * Puke if the date mode has already been set, and the caller attempts to set
2136  * it to a conflicting mode.
2137  */
2138 static void
2140 {
2141  if (mode != FROM_CHAR_DATE_NONE)
2142  {
2143  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2144  tmfc->mode = mode;
2145  else if (tmfc->mode != mode)
2146  ereport(ERROR,
2147  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2148  errmsg("invalid combination of date conventions"),
2149  errhint("Do not mix Gregorian and ISO week date "
2150  "conventions in a formatting template.")));
2151  }
2152 }
2153 
2154 /*
2155  * Set the integer pointed to by 'dest' to the given value.
2156  *
2157  * Puke if the destination integer has previously been set to some other
2158  * non-zero value.
2159  */
2160 static void
2161 from_char_set_int(int *dest, const int value, const FormatNode *node)
2162 {
2163  if (*dest != 0 && *dest != value)
2164  ereport(ERROR,
2165  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2166  errmsg("conflicting values for \"%s\" field in formatting string",
2167  node->key->name),
2168  errdetail("This value contradicts a previous setting for "
2169  "the same field type.")));
2170  *dest = value;
2171 }
2172 
2173 /*
2174  * Read a single integer from the source string, into the int pointed to by
2175  * 'dest'. If 'dest' is NULL, the result is discarded.
2176  *
2177  * In fixed-width mode (the node does not have the FM suffix), consume at most
2178  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2179  *
2180  * We use strtol() to recover the integer value from the source string, in
2181  * accordance with the given FormatNode.
2182  *
2183  * If the conversion completes successfully, src will have been advanced to
2184  * point at the character immediately following the last character used in the
2185  * conversion.
2186  *
2187  * Return the number of characters consumed.
2188  *
2189  * Note that from_char_parse_int() provides a more convenient wrapper where
2190  * the length of the field is the same as the length of the format keyword (as
2191  * with DD and MI).
2192  */
2193 static int
2194 from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
2195 {
2196  long result;
2197  char copy[DCH_MAX_ITEM_SIZ + 1];
2198  char *init = *src;
2199  int used;
2200 
2201  /*
2202  * Skip any whitespace before parsing the integer.
2203  */
2204  *src += strspace_len(*src);
2205 
2206  Assert(len <= DCH_MAX_ITEM_SIZ);
2207  used = (int) strlcpy(copy, *src, len + 1);
2208 
2209  if (S_FM(node->suffix) || is_next_separator(node))
2210  {
2211  /*
2212  * This node is in Fill Mode, or the next node is known to be a
2213  * non-digit value, so we just slurp as many characters as we can get.
2214  */
2215  errno = 0;
2216  result = strtol(init, src, 10);
2217  }
2218  else
2219  {
2220  /*
2221  * We need to pull exactly the number of characters given in 'len' out
2222  * of the string, and convert those.
2223  */
2224  char *last;
2225 
2226  if (used < len)
2227  ereport(ERROR,
2228  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2229  errmsg("source string too short for \"%s\" formatting field",
2230  node->key->name),
2231  errdetail("Field requires %d characters, but only %d "
2232  "remain.",
2233  len, used),
2234  errhint("If your source string is not fixed-width, try "
2235  "using the \"FM\" modifier.")));
2236 
2237  errno = 0;
2238  result = strtol(copy, &last, 10);
2239  used = last - copy;
2240 
2241  if (used > 0 && used < len)
2242  ereport(ERROR,
2243  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2244  errmsg("invalid value \"%s\" for \"%s\"",
2245  copy, node->key->name),
2246  errdetail("Field requires %d characters, but only %d "
2247  "could be parsed.", len, used),
2248  errhint("If your source string is not fixed-width, try "
2249  "using the \"FM\" modifier.")));
2250 
2251  *src += used;
2252  }
2253 
2254  if (*src == init)
2255  ereport(ERROR,
2256  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2257  errmsg("invalid value \"%s\" for \"%s\"",
2258  copy, node->key->name),
2259  errdetail("Value must be an integer.")));
2260 
2261  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2262  ereport(ERROR,
2263  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2264  errmsg("value for \"%s\" in source string is out of range",
2265  node->key->name),
2266  errdetail("Value must be in the range %d to %d.",
2267  INT_MIN, INT_MAX)));
2268 
2269  if (dest != NULL)
2270  from_char_set_int(dest, (int) result, node);
2271  return *src - init;
2272 }
2273 
2274 /*
2275  * Call from_char_parse_int_len(), using the length of the format keyword as
2276  * the expected length of the field.
2277  *
2278  * Don't call this function if the field differs in length from the format
2279  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2280  * In such cases, call from_char_parse_int_len() instead to specify the
2281  * required length explicitly.
2282  */
2283 static int
2284 from_char_parse_int(int *dest, char **src, FormatNode *node)
2285 {
2286  return from_char_parse_int_len(dest, src, node->key->len, node);
2287 }
2288 
2289 /* ----------
2290  * Sequential search with to upper/lower conversion
2291  * ----------
2292  */
2293 static int
2294 seq_search(char *name, const char *const *array, int type, int max, int *len)
2295 {
2296  const char *p;
2297  const char *const *a;
2298  char *n;
2299  int last,
2300  i;
2301 
2302  *len = 0;
2303 
2304  if (!*name)
2305  return -1;
2306 
2307  /* set first char */
2308  if (type == ONE_UPPER || type == ALL_UPPER)
2309  *name = pg_toupper((unsigned char) *name);
2310  else if (type == ALL_LOWER)
2311  *name = pg_tolower((unsigned char) *name);
2312 
2313  for (last = 0, a = array; *a != NULL; a++)
2314  {
2315  /* compare first chars */
2316  if (*name != **a)
2317  continue;
2318 
2319  for (i = 1, p = *a + 1, n = name + 1;; n++, p++, i++)
2320  {
2321  /* search fragment (max) only */
2322  if (max && i == max)
2323  {
2324  *len = i;
2325  return a - array;
2326  }
2327  /* full size */
2328  if (*p == '\0')
2329  {
2330  *len = i;
2331  return a - array;
2332  }
2333  /* Not found in array 'a' */
2334  if (*n == '\0')
2335  break;
2336 
2337  /*
2338  * Convert (but convert new chars only)
2339  */
2340  if (i > last)
2341  {
2342  if (type == ONE_UPPER || type == ALL_LOWER)
2343  *n = pg_tolower((unsigned char) *n);
2344  else if (type == ALL_UPPER)
2345  *n = pg_toupper((unsigned char) *n);
2346  last = i;
2347  }
2348 
2349 #ifdef DEBUG_TO_FROM_CHAR
2350  elog(DEBUG_elog_output, "N: %c, P: %c, A: %s (%s)",
2351  *n, *p, *a, name);
2352 #endif
2353  if (*n != *p)
2354  break;
2355  }
2356  }
2357 
2358  return -1;
2359 }
2360 
2361 /*
2362  * Perform a sequential search in 'array' for text matching the first 'max'
2363  * characters of the source string.
2364  *
2365  * If a match is found, copy the array index of the match into the integer
2366  * pointed to by 'dest', advance 'src' to the end of the part of the string
2367  * which matched, and return the number of characters consumed.
2368  *
2369  * If the string doesn't match, throw an error.
2370  */
2371 static int
2372 from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max,
2373  FormatNode *node)
2374 {
2375  int len;
2376 
2377  *dest = seq_search(*src, array, type, max, &len);
2378  if (len <= 0)
2379  {
2380  char copy[DCH_MAX_ITEM_SIZ + 1];
2381 
2382  Assert(max <= DCH_MAX_ITEM_SIZ);
2383  strlcpy(copy, *src, max + 1);
2384 
2385  ereport(ERROR,
2386  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2387  errmsg("invalid value \"%s\" for \"%s\"",
2388  copy, node->key->name),
2389  errdetail("The given value did not match any of the allowed "
2390  "values for this field.")));
2391  }
2392  *src += len;
2393  return len;
2394 }
2395 
2396 /* ----------
2397  * Process a TmToChar struct as denoted by a list of FormatNodes.
2398  * The formatted data is written to the string pointed to by 'out'.
2399  * ----------
2400  */
2401 static void
2402 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2403 {
2404  FormatNode *n;
2405  char *s;
2406  struct pg_tm *tm = &in->tm;
2407  int i;
2408 
2409  /* cache localized days and months */
2411 
2412  s = out;
2413  for (n = node; n->type != NODE_TYPE_END; n++)
2414  {
2415  if (n->type != NODE_TYPE_ACTION)
2416  {
2417  strcpy(s, n->character);
2418  s += strlen(s);
2419  continue;
2420  }
2421 
2422  switch (n->key->id)
2423  {
2424  case DCH_A_M:
2425  case DCH_P_M:
2426  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2427  ? P_M_STR : A_M_STR);
2428  s += strlen(s);
2429  break;
2430  case DCH_AM:
2431  case DCH_PM:
2432  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2433  ? PM_STR : AM_STR);
2434  s += strlen(s);
2435  break;
2436  case DCH_a_m:
2437  case DCH_p_m:
2438  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2439  ? p_m_STR : a_m_STR);
2440  s += strlen(s);
2441  break;
2442  case DCH_am:
2443  case DCH_pm:
2444  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2445  ? pm_STR : am_STR);
2446  s += strlen(s);
2447  break;
2448  case DCH_HH:
2449  case DCH_HH12:
2450 
2451  /*
2452  * display time as shown on a 12-hour clock, even for
2453  * intervals
2454  */
2455  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2456  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2457  tm->tm_hour % (HOURS_PER_DAY / 2));
2458  if (S_THth(n->suffix))
2459  str_numth(s, s, S_TH_TYPE(n->suffix));
2460  s += strlen(s);
2461  break;
2462  case DCH_HH24:
2463  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2464  tm->tm_hour);
2465  if (S_THth(n->suffix))
2466  str_numth(s, s, S_TH_TYPE(n->suffix));
2467  s += strlen(s);
2468  break;
2469  case DCH_MI:
2470  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2471  tm->tm_min);
2472  if (S_THth(n->suffix))
2473  str_numth(s, s, S_TH_TYPE(n->suffix));
2474  s += strlen(s);
2475  break;
2476  case DCH_SS:
2477  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2478  tm->tm_sec);
2479  if (S_THth(n->suffix))
2480  str_numth(s, s, S_TH_TYPE(n->suffix));
2481  s += strlen(s);
2482  break;
2483  case DCH_MS: /* millisecond */
2484  sprintf(s, "%03d", (int) (in->fsec / INT64CONST(1000)));
2485  if (S_THth(n->suffix))
2486  str_numth(s, s, S_TH_TYPE(n->suffix));
2487  s += strlen(s);
2488  break;
2489  case DCH_US: /* microsecond */
2490  sprintf(s, "%06d", (int) in->fsec);
2491  if (S_THth(n->suffix))
2492  str_numth(s, s, S_TH_TYPE(n->suffix));
2493  s += strlen(s);
2494  break;
2495  case DCH_SSSS:
2496  sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2497  tm->tm_min * SECS_PER_MINUTE +
2498  tm->tm_sec);
2499  if (S_THth(n->suffix))
2500  str_numth(s, s, S_TH_TYPE(n->suffix));
2501  s += strlen(s);
2502  break;
2503  case DCH_tz:
2505  if (tmtcTzn(in))
2506  {
2507  /* We assume here that timezone names aren't localized */
2508  char *p = asc_tolower_z(tmtcTzn(in));
2509 
2510  strcpy(s, p);
2511  pfree(p);
2512  s += strlen(s);
2513  }
2514  break;
2515  case DCH_TZ:
2517  if (tmtcTzn(in))
2518  {
2519  strcpy(s, tmtcTzn(in));
2520  s += strlen(s);
2521  }
2522  break;
2523  case DCH_TZH:
2525  sprintf(s, "%c%02d",
2526  (tm->tm_gmtoff >= 0) ? '+' : '-',
2527  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2528  s += strlen(s);
2529  break;
2530  case DCH_TZM:
2532  sprintf(s, "%02d",
2533  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2534  s += strlen(s);
2535  break;
2536  case DCH_OF:
2538  sprintf(s, "%c%0*d",
2539  (tm->tm_gmtoff >= 0) ? '+' : '-',
2540  S_FM(n->suffix) ? 0 : 2,
2541  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2542  s += strlen(s);
2543  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2544  {
2545  sprintf(s, ":%02d",
2546  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2547  s += strlen(s);
2548  }
2549  break;
2550  case DCH_A_D:
2551  case DCH_B_C:
2553  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2554  s += strlen(s);
2555  break;
2556  case DCH_AD:
2557  case DCH_BC:
2559  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2560  s += strlen(s);
2561  break;
2562  case DCH_a_d:
2563  case DCH_b_c:
2565  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2566  s += strlen(s);
2567  break;
2568  case DCH_ad:
2569  case DCH_bc:
2571  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2572  s += strlen(s);
2573  break;
2574  case DCH_MONTH:
2576  if (!tm->tm_mon)
2577  break;
2578  if (S_TM(n->suffix))
2579  {
2580  char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2581 
2582  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2583  strcpy(s, str);
2584  else
2585  ereport(ERROR,
2586  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2587  errmsg("localized string format value too long")));
2588  }
2589  else
2590  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2591  asc_toupper_z(months_full[tm->tm_mon - 1]));
2592  s += strlen(s);
2593  break;
2594  case DCH_Month:
2596  if (!tm->tm_mon)
2597  break;
2598  if (S_TM(n->suffix))
2599  {
2600  char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2601 
2602  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2603  strcpy(s, str);
2604  else
2605  ereport(ERROR,
2606  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2607  errmsg("localized string format value too long")));
2608  }
2609  else
2610  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2611  months_full[tm->tm_mon - 1]);
2612  s += strlen(s);
2613  break;
2614  case DCH_month:
2616  if (!tm->tm_mon)
2617  break;
2618  if (S_TM(n->suffix))
2619  {
2620  char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2621 
2622  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2623  strcpy(s, str);
2624  else
2625  ereport(ERROR,
2626  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2627  errmsg("localized string format value too long")));
2628  }
2629  else
2630  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2631  asc_tolower_z(months_full[tm->tm_mon - 1]));
2632  s += strlen(s);
2633  break;
2634  case DCH_MON:
2636  if (!tm->tm_mon)
2637  break;
2638  if (S_TM(n->suffix))
2639  {
2640  char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2641 
2642  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2643  strcpy(s, str);
2644  else
2645  ereport(ERROR,
2646  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2647  errmsg("localized string format value too long")));
2648  }
2649  else
2650  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2651  s += strlen(s);
2652  break;
2653  case DCH_Mon:
2655  if (!tm->tm_mon)
2656  break;
2657  if (S_TM(n->suffix))
2658  {
2659  char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2660 
2661  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2662  strcpy(s, str);
2663  else
2664  ereport(ERROR,
2665  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2666  errmsg("localized string format value too long")));
2667  }
2668  else
2669  strcpy(s, months[tm->tm_mon - 1]);
2670  s += strlen(s);
2671  break;
2672  case DCH_mon:
2674  if (!tm->tm_mon)
2675  break;
2676  if (S_TM(n->suffix))
2677  {
2678  char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2679 
2680  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2681  strcpy(s, str);
2682  else
2683  ereport(ERROR,
2684  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2685  errmsg("localized string format value too long")));
2686  }
2687  else
2688  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2689  s += strlen(s);
2690  break;
2691  case DCH_MM:
2692  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2693  tm->tm_mon);
2694  if (S_THth(n->suffix))
2695  str_numth(s, s, S_TH_TYPE(n->suffix));
2696  s += strlen(s);
2697  break;
2698  case DCH_DAY:
2700  if (S_TM(n->suffix))
2701  {
2702  char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2703 
2704  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2705  strcpy(s, str);
2706  else
2707  ereport(ERROR,
2708  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2709  errmsg("localized string format value too long")));
2710  }
2711  else
2712  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2713  asc_toupper_z(days[tm->tm_wday]));
2714  s += strlen(s);
2715  break;
2716  case DCH_Day:
2718  if (S_TM(n->suffix))
2719  {
2720  char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2721 
2722  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2723  strcpy(s, str);
2724  else
2725  ereport(ERROR,
2726  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2727  errmsg("localized string format value too long")));
2728  }
2729  else
2730  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2731  days[tm->tm_wday]);
2732  s += strlen(s);
2733  break;
2734  case DCH_day:
2736  if (S_TM(n->suffix))
2737  {
2738  char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
2739 
2740  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2741  strcpy(s, str);
2742  else
2743  ereport(ERROR,
2744  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2745  errmsg("localized string format value too long")));
2746  }
2747  else
2748  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2749  asc_tolower_z(days[tm->tm_wday]));
2750  s += strlen(s);
2751  break;
2752  case DCH_DY:
2754  if (S_TM(n->suffix))
2755  {
2756  char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
2757 
2758  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2759  strcpy(s, str);
2760  else
2761  ereport(ERROR,
2762  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2763  errmsg("localized string format value too long")));
2764  }
2765  else
2766  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
2767  s += strlen(s);
2768  break;
2769  case DCH_Dy:
2771  if (S_TM(n->suffix))
2772  {
2773  char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
2774 
2775  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2776  strcpy(s, str);
2777  else
2778  ereport(ERROR,
2779  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2780  errmsg("localized string format value too long")));
2781  }
2782  else
2783  strcpy(s, days_short[tm->tm_wday]);
2784  s += strlen(s);
2785  break;
2786  case DCH_dy:
2788  if (S_TM(n->suffix))
2789  {
2790  char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
2791 
2792  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2793  strcpy(s, str);
2794  else
2795  ereport(ERROR,
2796  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2797  errmsg("localized string format value too long")));
2798  }
2799  else
2800  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
2801  s += strlen(s);
2802  break;
2803  case DCH_DDD:
2804  case DCH_IDDD:
2805  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
2806  (n->key->id == DCH_DDD) ?
2807  tm->tm_yday :
2808  date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
2809  if (S_THth(n->suffix))
2810  str_numth(s, s, S_TH_TYPE(n->suffix));
2811  s += strlen(s);
2812  break;
2813  case DCH_DD:
2814  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
2815  if (S_THth(n->suffix))
2816  str_numth(s, s, S_TH_TYPE(n->suffix));
2817  s += strlen(s);
2818  break;
2819  case DCH_D:
2821  sprintf(s, "%d", tm->tm_wday + 1);
2822  if (S_THth(n->suffix))
2823  str_numth(s, s, S_TH_TYPE(n->suffix));
2824  s += strlen(s);
2825  break;
2826  case DCH_ID:
2828  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
2829  if (S_THth(n->suffix))
2830  str_numth(s, s, S_TH_TYPE(n->suffix));
2831  s += strlen(s);
2832  break;
2833  case DCH_WW:
2834  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2835  (tm->tm_yday - 1) / 7 + 1);
2836  if (S_THth(n->suffix))
2837  str_numth(s, s, S_TH_TYPE(n->suffix));
2838  s += strlen(s);
2839  break;
2840  case DCH_IW:
2841  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2842  date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
2843  if (S_THth(n->suffix))
2844  str_numth(s, s, S_TH_TYPE(n->suffix));
2845  s += strlen(s);
2846  break;
2847  case DCH_Q:
2848  if (!tm->tm_mon)
2849  break;
2850  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
2851  if (S_THth(n->suffix))
2852  str_numth(s, s, S_TH_TYPE(n->suffix));
2853  s += strlen(s);
2854  break;
2855  case DCH_CC:
2856  if (is_interval) /* straight calculation */
2857  i = tm->tm_year / 100;
2858  else
2859  {
2860  if (tm->tm_year > 0)
2861  /* Century 20 == 1901 - 2000 */
2862  i = (tm->tm_year - 1) / 100 + 1;
2863  else
2864  /* Century 6BC == 600BC - 501BC */
2865  i = tm->tm_year / 100 - 1;
2866  }
2867  if (i <= 99 && i >= -99)
2868  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
2869  else
2870  sprintf(s, "%d", i);
2871  if (S_THth(n->suffix))
2872  str_numth(s, s, S_TH_TYPE(n->suffix));
2873  s += strlen(s);
2874  break;
2875  case DCH_Y_YYY:
2876  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
2877  sprintf(s, "%d,%03d", i,
2878  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
2879  if (S_THth(n->suffix))
2880  str_numth(s, s, S_TH_TYPE(n->suffix));
2881  s += strlen(s);
2882  break;
2883  case DCH_YYYY:
2884  case DCH_IYYY:
2885  sprintf(s, "%0*d",
2886  S_FM(n->suffix) ? 0 :
2887  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
2888  (n->key->id == DCH_YYYY ?
2889  ADJUST_YEAR(tm->tm_year, is_interval) :
2891  tm->tm_mon,
2892  tm->tm_mday),
2893  is_interval)));
2894  if (S_THth(n->suffix))
2895  str_numth(s, s, S_TH_TYPE(n->suffix));
2896  s += strlen(s);
2897  break;
2898  case DCH_YYY:
2899  case DCH_IYY:
2900  sprintf(s, "%0*d",
2901  S_FM(n->suffix) ? 0 :
2902  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
2903  (n->key->id == DCH_YYY ?
2904  ADJUST_YEAR(tm->tm_year, is_interval) :
2906  tm->tm_mon,
2907  tm->tm_mday),
2908  is_interval)) % 1000);
2909  if (S_THth(n->suffix))
2910  str_numth(s, s, S_TH_TYPE(n->suffix));
2911  s += strlen(s);
2912  break;
2913  case DCH_YY:
2914  case DCH_IY:
2915  sprintf(s, "%0*d",
2916  S_FM(n->suffix) ? 0 :
2917  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
2918  (n->key->id == DCH_YY ?
2919  ADJUST_YEAR(tm->tm_year, is_interval) :
2921  tm->tm_mon,
2922  tm->tm_mday),
2923  is_interval)) % 100);
2924  if (S_THth(n->suffix))
2925  str_numth(s, s, S_TH_TYPE(n->suffix));
2926  s += strlen(s);
2927  break;
2928  case DCH_Y:
2929  case DCH_I:
2930  sprintf(s, "%1d",
2931  (n->key->id == DCH_Y ?
2932  ADJUST_YEAR(tm->tm_year, is_interval) :
2934  tm->tm_mon,
2935  tm->tm_mday),
2936  is_interval)) % 10);
2937  if (S_THth(n->suffix))
2938  str_numth(s, s, S_TH_TYPE(n->suffix));
2939  s += strlen(s);
2940  break;
2941  case DCH_RM:
2942  if (!tm->tm_mon)
2943  break;
2944  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
2946  s += strlen(s);
2947  break;
2948  case DCH_rm:
2949  if (!tm->tm_mon)
2950  break;
2951  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
2953  s += strlen(s);
2954  break;
2955  case DCH_W:
2956  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
2957  if (S_THth(n->suffix))
2958  str_numth(s, s, S_TH_TYPE(n->suffix));
2959  s += strlen(s);
2960  break;
2961  case DCH_J:
2962  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
2963  if (S_THth(n->suffix))
2964  str_numth(s, s, S_TH_TYPE(n->suffix));
2965  s += strlen(s);
2966  break;
2967  }
2968  }
2969 
2970  *s = '\0';
2971 }
2972 
2973 /* ----------
2974  * Process a string as denoted by a list of FormatNodes.
2975  * The TmFromChar struct pointed to by 'out' is populated with the results.
2976  *
2977  * Note: we currently don't have any to_interval() function, so there
2978  * is no need here for INVALID_FOR_INTERVAL checks.
2979  * ----------
2980  */
2981 static void
2982 DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
2983 {
2984  FormatNode *n;
2985  char *s;
2986  int len,
2987  value;
2988  bool fx_mode = false;
2989 
2990  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
2991  {
2992  if (n->type != NODE_TYPE_ACTION)
2993  {
2994  /*
2995  * Separator, so consume one character from input string. Notice
2996  * we don't insist that the consumed character match the format's
2997  * character.
2998  */
2999  s += pg_mblen(s);
3000  continue;
3001  }
3002 
3003  /* Ignore spaces before fields when not in FX (fixed width) mode */
3004  if (!fx_mode && n->key->id != DCH_FX)
3005  {
3006  while (*s != '\0' && isspace((unsigned char) *s))
3007  s++;
3008  }
3009 
3010  from_char_set_mode(out, n->key->date_mode);
3011 
3012  switch (n->key->id)
3013  {
3014  case DCH_FX:
3015  fx_mode = true;
3016  break;
3017  case DCH_A_M:
3018  case DCH_P_M:
3019  case DCH_a_m:
3020  case DCH_p_m:
3022  ALL_UPPER, n->key->len, n);
3023  from_char_set_int(&out->pm, value % 2, n);
3024  out->clock = CLOCK_12_HOUR;
3025  break;
3026  case DCH_AM:
3027  case DCH_PM:
3028  case DCH_am:
3029  case DCH_pm:
3030  from_char_seq_search(&value, &s, ampm_strings,
3031  ALL_UPPER, n->key->len, n);
3032  from_char_set_int(&out->pm, value % 2, n);
3033  out->clock = CLOCK_12_HOUR;
3034  break;
3035  case DCH_HH:
3036  case DCH_HH12:
3037  from_char_parse_int_len(&out->hh, &s, 2, n);
3038  out->clock = CLOCK_12_HOUR;
3039  SKIP_THth(s, n->suffix);
3040  break;
3041  case DCH_HH24:
3042  from_char_parse_int_len(&out->hh, &s, 2, n);
3043  SKIP_THth(s, n->suffix);
3044  break;
3045  case DCH_MI:
3046  from_char_parse_int(&out->mi, &s, n);
3047  SKIP_THth(s, n->suffix);
3048  break;
3049  case DCH_SS:
3050  from_char_parse_int(&out->ss, &s, n);
3051  SKIP_THth(s, n->suffix);
3052  break;
3053  case DCH_MS: /* millisecond */
3054  len = from_char_parse_int_len(&out->ms, &s, 3, n);
3055 
3056  /*
3057  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3058  */
3059  out->ms *= len == 1 ? 100 :
3060  len == 2 ? 10 : 1;
3061 
3062  SKIP_THth(s, n->suffix);
3063  break;
3064  case DCH_US: /* microsecond */
3065  len = from_char_parse_int_len(&out->us, &s, 6, n);
3066 
3067  out->us *= len == 1 ? 100000 :
3068  len == 2 ? 10000 :
3069  len == 3 ? 1000 :
3070  len == 4 ? 100 :
3071  len == 5 ? 10 : 1;
3072 
3073  SKIP_THth(s, n->suffix);
3074  break;
3075  case DCH_SSSS:
3076  from_char_parse_int(&out->ssss, &s, n);
3077  SKIP_THth(s, n->suffix);
3078  break;
3079  case DCH_tz:
3080  case DCH_TZ:
3081  case DCH_OF:
3082  ereport(ERROR,
3083  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3084  errmsg("formatting field \"%s\" is only supported in to_char",
3085  n->key->name)));
3086  break;
3087  case DCH_TZH:
3088  out->tzsign = *s == '-' ? -1 : +1;
3089 
3090  if (*s == '+' || *s == '-' || *s == ' ')
3091  s++;
3092 
3093  from_char_parse_int_len(&out->tzh, &s, 2, n);
3094  break;
3095  case DCH_TZM:
3096  /* assign positive timezone sign if TZH was not seen before */
3097  if (!out->tzsign)
3098  out->tzsign = +1;
3099  from_char_parse_int_len(&out->tzm, &s, 2, n);
3100  break;
3101  case DCH_A_D:
3102  case DCH_B_C:
3103  case DCH_a_d:
3104  case DCH_b_c:
3106  ALL_UPPER, n->key->len, n);
3107  from_char_set_int(&out->bc, value % 2, n);
3108  break;
3109  case DCH_AD:
3110  case DCH_BC:
3111  case DCH_ad:
3112  case DCH_bc:
3113  from_char_seq_search(&value, &s, adbc_strings,
3114  ALL_UPPER, n->key->len, n);
3115  from_char_set_int(&out->bc, value % 2, n);
3116  break;
3117  case DCH_MONTH:
3118  case DCH_Month:
3119  case DCH_month:
3121  MAX_MONTH_LEN, n);
3122  from_char_set_int(&out->mm, value + 1, n);
3123  break;
3124  case DCH_MON:
3125  case DCH_Mon:
3126  case DCH_mon:
3127  from_char_seq_search(&value, &s, months, ONE_UPPER,
3128  MAX_MON_LEN, n);
3129  from_char_set_int(&out->mm, value + 1, n);
3130  break;
3131  case DCH_MM:
3132  from_char_parse_int(&out->mm, &s, n);
3133  SKIP_THth(s, n->suffix);
3134  break;
3135  case DCH_DAY:
3136  case DCH_Day:
3137  case DCH_day:
3138  from_char_seq_search(&value, &s, days, ONE_UPPER,
3139  MAX_DAY_LEN, n);
3140  from_char_set_int(&out->d, value, n);
3141  out->d++;
3142  break;
3143  case DCH_DY:
3144  case DCH_Dy:
3145  case DCH_dy:
3146  from_char_seq_search(&value, &s, days, ONE_UPPER,
3147  MAX_DY_LEN, n);
3148  from_char_set_int(&out->d, value, n);
3149  out->d++;
3150  break;
3151  case DCH_DDD:
3152  from_char_parse_int(&out->ddd, &s, n);
3153  SKIP_THth(s, n->suffix);
3154  break;
3155  case DCH_IDDD:
3156  from_char_parse_int_len(&out->ddd, &s, 3, n);
3157  SKIP_THth(s, n->suffix);
3158  break;
3159  case DCH_DD:
3160  from_char_parse_int(&out->dd, &s, n);
3161  SKIP_THth(s, n->suffix);
3162  break;
3163  case DCH_D:
3164  from_char_parse_int(&out->d, &s, n);
3165  SKIP_THth(s, n->suffix);
3166  break;
3167  case DCH_ID:
3168  from_char_parse_int_len(&out->d, &s, 1, n);
3169  /* Shift numbering to match Gregorian where Sunday = 1 */
3170  if (++out->d > 7)
3171  out->d = 1;
3172  SKIP_THth(s, n->suffix);
3173  break;
3174  case DCH_WW:
3175  case DCH_IW:
3176  from_char_parse_int(&out->ww, &s, n);
3177  SKIP_THth(s, n->suffix);
3178  break;
3179  case DCH_Q:
3180 
3181  /*
3182  * We ignore 'Q' when converting to date because it is unclear
3183  * which date in the quarter to use, and some people specify
3184  * both quarter and month, so if it was honored it might
3185  * conflict with the supplied month. That is also why we don't
3186  * throw an error.
3187  *
3188  * We still parse the source string for an integer, but it
3189  * isn't stored anywhere in 'out'.
3190  */
3191  from_char_parse_int((int *) NULL, &s, n);
3192  SKIP_THth(s, n->suffix);
3193  break;
3194  case DCH_CC:
3195  from_char_parse_int(&out->cc, &s, n);
3196  SKIP_THth(s, n->suffix);
3197  break;
3198  case DCH_Y_YYY:
3199  {
3200  int matched,
3201  years,
3202  millennia,
3203  nch;
3204 
3205  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3206  if (matched < 2)
3207  ereport(ERROR,
3208  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3209  errmsg("invalid input string for \"Y,YYY\"")));
3210  years += (millennia * 1000);
3211  from_char_set_int(&out->year, years, n);
3212  out->yysz = 4;
3213  s += nch;
3214  SKIP_THth(s, n->suffix);
3215  }
3216  break;
3217  case DCH_YYYY:
3218  case DCH_IYYY:
3219  from_char_parse_int(&out->year, &s, n);
3220  out->yysz = 4;
3221  SKIP_THth(s, n->suffix);
3222  break;
3223  case DCH_YYY:
3224  case DCH_IYY:
3225  if (from_char_parse_int(&out->year, &s, n) < 4)
3226  out->year = adjust_partial_year_to_2020(out->year);
3227  out->yysz = 3;
3228  SKIP_THth(s, n->suffix);
3229  break;
3230  case DCH_YY:
3231  case DCH_IY:
3232  if (from_char_parse_int(&out->year, &s, n) < 4)
3233  out->year = adjust_partial_year_to_2020(out->year);
3234  out->yysz = 2;
3235  SKIP_THth(s, n->suffix);
3236  break;
3237  case DCH_Y:
3238  case DCH_I:
3239  if (from_char_parse_int(&out->year, &s, n) < 4)
3240  out->year = adjust_partial_year_to_2020(out->year);
3241  out->yysz = 1;
3242  SKIP_THth(s, n->suffix);
3243  break;
3244  case DCH_RM:
3246  ALL_UPPER, MAX_RM_LEN, n);
3247  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n);
3248  break;
3249  case DCH_rm:
3251  ALL_LOWER, MAX_RM_LEN, n);
3252  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n);
3253  break;
3254  case DCH_W:
3255  from_char_parse_int(&out->w, &s, n);
3256  SKIP_THth(s, n->suffix);
3257  break;
3258  case DCH_J:
3259  from_char_parse_int(&out->j, &s, n);
3260  SKIP_THth(s, n->suffix);
3261  break;
3262  }
3263  }
3264 }
3265 
3266 /* select a DCHCacheEntry to hold the given format picture */
3267 static DCHCacheEntry *
3268 DCH_cache_getnew(const char *str)
3269 {
3270  DCHCacheEntry *ent;
3271 
3272  /* counter overflow check - paranoia? */
3273  if (DCHCounter >= (INT_MAX - DCH_CACHE_ENTRIES))
3274  {
3275  DCHCounter = 0;
3276 
3277  for (ent = DCHCache; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3278  ent->age = (++DCHCounter);
3279  }
3280 
3281  /*
3282  * If cache is full, remove oldest entry (or recycle first not-valid one)
3283  */
3285  {
3286  DCHCacheEntry *old = DCHCache + 0;
3287 
3288 #ifdef DEBUG_TO_FROM_CHAR
3289  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3290 #endif
3291  if (old->valid)
3292  {
3293  for (ent = DCHCache + 1; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3294  {
3295  if (!ent->valid)
3296  {
3297  old = ent;
3298  break;
3299  }
3300  if (ent->age < old->age)
3301  old = ent;
3302  }
3303  }
3304 #ifdef DEBUG_TO_FROM_CHAR
3305  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3306 #endif
3307  old->valid = false;
3308  StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
3309  old->age = (++DCHCounter);
3310  /* caller is expected to fill format, then set valid */
3311  return old;
3312  }
3313  else
3314  {
3315 #ifdef DEBUG_TO_FROM_CHAR
3316  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3317 #endif
3318  ent = DCHCache + n_DCHCache;
3319  ent->valid = false;
3320  StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3321  ent->age = (++DCHCounter);
3322  /* caller is expected to fill format, then set valid */
3323  ++n_DCHCache;
3324  return ent;
3325  }
3326 }
3327 
3328 /* look for an existing DCHCacheEntry matching the given format picture */
3329 static DCHCacheEntry *
3330 DCH_cache_search(const char *str)
3331 {
3332  int i;
3333  DCHCacheEntry *ent;
3334 
3335  /* counter overflow check - paranoia? */
3336  if (DCHCounter >= (INT_MAX - DCH_CACHE_ENTRIES))
3337  {
3338  DCHCounter = 0;
3339 
3340  for (ent = DCHCache; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3341  ent->age = (++DCHCounter);
3342  }
3343 
3344  for (i = 0, ent = DCHCache; i < n_DCHCache; i++, ent++)
3345  {
3346  if (ent->valid && strcmp(ent->str, str) == 0)
3347  {
3348  ent->age = (++DCHCounter);
3349  return ent;
3350  }
3351  }
3352 
3353  return NULL;
3354 }
3355 
3356 /* Find or create a DCHCacheEntry for the given format picture */
3357 static DCHCacheEntry *
3358 DCH_cache_fetch(const char *str)
3359 {
3360  DCHCacheEntry *ent;
3361 
3362  if ((ent = DCH_cache_search(str)) == NULL)
3363  {
3364  /*
3365  * Not in the cache, must run parser and save a new format-picture to
3366  * the cache. Do not mark the cache entry valid until parsing
3367  * succeeds.
3368  */
3369  ent = DCH_cache_getnew(str);
3370 
3371  parse_format(ent->format, str, DCH_keywords,
3372  DCH_suff, DCH_index, DCH_TYPE, NULL);
3373 
3374  ent->valid = true;
3375  }
3376  return ent;
3377 }
3378 
3379 /*
3380  * Format a date/time or interval into a string according to fmt.
3381  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
3382  * for formatting.
3383  */
3384 static text *
3385 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
3386 {
3387  FormatNode *format;
3388  char *fmt_str,
3389  *result;
3390  bool incache;
3391  int fmt_len;
3392  text *res;
3393 
3394  /*
3395  * Convert fmt to C string
3396  */
3397  fmt_str = text_to_cstring(fmt);
3398  fmt_len = strlen(fmt_str);
3399 
3400  /*
3401  * Allocate workspace for result as C string
3402  */
3403  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
3404  *result = '\0';
3405 
3406  if (fmt_len > DCH_CACHE_SIZE)
3407  {
3408  /*
3409  * Allocate new memory if format picture is bigger than static cache
3410  * and do not use cache (call parser always)
3411  */
3412  incache = false;
3413 
3414  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3415 
3416  parse_format(format, fmt_str, DCH_keywords,
3417  DCH_suff, DCH_index, DCH_TYPE, NULL);
3418  }
3419  else
3420  {
3421  /*
3422  * Use cache buffers
3423  */
3424  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3425 
3426  incache = true;
3427  format = ent->format;
3428  }
3429 
3430  /* The real work is here */
3431  DCH_to_char(format, is_interval, tmtc, result, collid);
3432 
3433  if (!incache)
3434  pfree(format);
3435 
3436  pfree(fmt_str);
3437 
3438  /* convert C-string result to TEXT format */
3439  res = cstring_to_text(result);
3440 
3441  pfree(result);
3442  return res;
3443 }
3444 
3445 /****************************************************************************
3446  * Public routines
3447  ***************************************************************************/
3448 
3449 /* -------------------
3450  * TIMESTAMP to_char()
3451  * -------------------
3452  */
3453 Datum
3455 {
3457  text *fmt = PG_GETARG_TEXT_PP(1),
3458  *res;
3459  TmToChar tmtc;
3460  struct pg_tm *tm;
3461  int thisdate;
3462 
3463  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3464  PG_RETURN_NULL();
3465 
3466  ZERO_tmtc(&tmtc);
3467  tm = tmtcTm(&tmtc);
3468 
3469  if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
3470  ereport(ERROR,
3471  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3472  errmsg("timestamp out of range")));
3473 
3474  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3475  tm->tm_wday = (thisdate + 1) % 7;
3476  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3477 
3478  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3479  PG_RETURN_NULL();
3480 
3481  PG_RETURN_TEXT_P(res);
3482 }
3483 
3484 Datum
3486 {
3488  text *fmt = PG_GETARG_TEXT_PP(1),
3489  *res;
3490  TmToChar tmtc;
3491  int tz;
3492  struct pg_tm *tm;
3493  int thisdate;
3494 
3495  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3496  PG_RETURN_NULL();
3497 
3498  ZERO_tmtc(&tmtc);
3499  tm = tmtcTm(&tmtc);
3500 
3501  if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
3502  ereport(ERROR,
3503  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3504  errmsg("timestamp out of range")));
3505 
3506  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3507  tm->tm_wday = (thisdate + 1) % 7;
3508  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3509 
3510  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3511  PG_RETURN_NULL();
3512 
3513  PG_RETURN_TEXT_P(res);
3514 }
3515 
3516 
3517 /* -------------------
3518  * INTERVAL to_char()
3519  * -------------------
3520  */
3521 Datum
3523 {
3524  Interval *it = PG_GETARG_INTERVAL_P(0);
3525  text *fmt = PG_GETARG_TEXT_PP(1),
3526  *res;
3527  TmToChar tmtc;
3528  struct pg_tm *tm;
3529 
3530  if (VARSIZE_ANY_EXHDR(fmt) <= 0)
3531  PG_RETURN_NULL();
3532 
3533  ZERO_tmtc(&tmtc);
3534  tm = tmtcTm(&tmtc);
3535 
3536  if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
3537  PG_RETURN_NULL();
3538 
3539  /* wday is meaningless, yday approximates the total span in days */
3540  tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
3541 
3542  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
3543  PG_RETURN_NULL();
3544 
3545  PG_RETURN_TEXT_P(res);
3546 }
3547 
3548 /* ---------------------
3549  * TO_TIMESTAMP()
3550  *
3551  * Make Timestamp from date_str which is formatted at argument 'fmt'
3552  * ( to_timestamp is reverse to_char() )
3553  * ---------------------
3554  */
3555 Datum
3557 {
3558  text *date_txt = PG_GETARG_TEXT_PP(0);
3559  text *fmt = PG_GETARG_TEXT_PP(1);
3560  Timestamp result;
3561  int tz;
3562  struct pg_tm tm;
3563  fsec_t fsec;
3564 
3565  do_to_timestamp(date_txt, fmt, &tm, &fsec);
3566 
3567  /* Use the specified time zone, if any. */
3568  if (tm.tm_zone)
3569  {
3570  int dterr = DecodeTimezone((char *) tm.tm_zone, &tz);
3571 
3572  if (dterr)
3573  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
3574  }
3575  else
3577 
3578  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
3579  ereport(ERROR,
3580  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3581  errmsg("timestamp out of range")));
3582 
3583  PG_RETURN_TIMESTAMP(result);
3584 }
3585 
3586 /* ----------
3587  * TO_DATE
3588  * Make Date from date_str which is formated at argument 'fmt'
3589  * ----------
3590  */
3591 Datum
3593 {
3594  text *date_txt = PG_GETARG_TEXT_PP(0);
3595  text *fmt = PG_GETARG_TEXT_PP(1);
3596  DateADT result;
3597  struct pg_tm tm;
3598  fsec_t fsec;
3599 
3600  do_to_timestamp(date_txt, fmt, &tm, &fsec);
3601 
3602  /* Prevent overflow in Julian-day routines */
3603  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
3604  ereport(ERROR,
3605  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3606  errmsg("date out of range: \"%s\"",
3607  text_to_cstring(date_txt))));
3608 
3609  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
3610 
3611  /* Now check for just-out-of-range dates */
3612  if (!IS_VALID_DATE(result))
3613  ereport(ERROR,
3614  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3615  errmsg("date out of range: \"%s\"",
3616  text_to_cstring(date_txt))));
3617 
3618  PG_RETURN_DATEADT(result);
3619 }
3620 
3621 /*
3622  * do_to_timestamp: shared code for to_timestamp and to_date
3623  *
3624  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm
3625  * and fractional seconds.
3626  *
3627  * We parse 'fmt' into a list of FormatNodes, which is then passed to
3628  * DCH_from_char to populate a TmFromChar with the parsed contents of
3629  * 'date_txt'.
3630  *
3631  * The TmFromChar is then analysed and converted into the final results in
3632  * struct 'tm' and 'fsec'.
3633  */
3634 static void
3635 do_to_timestamp(text *date_txt, text *fmt,
3636  struct pg_tm *tm, fsec_t *fsec)
3637 {
3638  FormatNode *format;
3639  TmFromChar tmfc;
3640  int fmt_len;
3641  char *date_str;
3642  int fmask;
3643 
3644  date_str = text_to_cstring(date_txt);
3645 
3646  ZERO_tmfc(&tmfc);
3647  ZERO_tm(tm);
3648  *fsec = 0;
3649  fmask = 0; /* bit mask for ValidateDate() */
3650 
3651  fmt_len = VARSIZE_ANY_EXHDR(fmt);
3652 
3653  if (fmt_len)
3654  {
3655  char *fmt_str;
3656  bool incache;
3657 
3658  fmt_str = text_to_cstring(fmt);
3659 
3660  if (fmt_len > DCH_CACHE_SIZE)
3661  {
3662  /*
3663  * Allocate new memory if format picture is bigger than static
3664  * cache and do not use cache (call parser always)
3665  */
3666  incache = false;
3667 
3668  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3669 
3670  parse_format(format, fmt_str, DCH_keywords,
3671  DCH_suff, DCH_index, DCH_TYPE, NULL);
3672  }
3673  else
3674  {
3675  /*
3676  * Use cache buffers
3677  */
3678  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3679 
3680  incache = true;
3681  format = ent->format;
3682  }
3683 
3684 #ifdef DEBUG_TO_FROM_CHAR
3685  /* dump_node(format, fmt_len); */
3686  /* dump_index(DCH_keywords, DCH_index); */
3687 #endif
3688 
3689  DCH_from_char(format, date_str, &tmfc);
3690 
3691  pfree(fmt_str);
3692  if (!incache)
3693  pfree(format);
3694  }
3695 
3696  DEBUG_TMFC(&tmfc);
3697 
3698  /*
3699  * Convert to_date/to_timestamp input fields to standard 'tm'
3700  */
3701  if (tmfc.ssss)
3702  {
3703  int x = tmfc.ssss;
3704 
3705  tm->tm_hour = x / SECS_PER_HOUR;
3706  x %= SECS_PER_HOUR;
3707  tm->tm_min = x / SECS_PER_MINUTE;
3708  x %= SECS_PER_MINUTE;
3709  tm->tm_sec = x;
3710  }
3711 
3712  if (tmfc.ss)
3713  tm->tm_sec = tmfc.ss;
3714  if (tmfc.mi)
3715  tm->tm_min = tmfc.mi;
3716  if (tmfc.hh)
3717  tm->tm_hour = tmfc.hh;
3718 
3719  if (tmfc.clock == CLOCK_12_HOUR)
3720  {
3721  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
3722  ereport(ERROR,
3723  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3724  errmsg("hour \"%d\" is invalid for the 12-hour clock",
3725  tm->tm_hour),
3726  errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
3727 
3728  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
3729  tm->tm_hour += HOURS_PER_DAY / 2;
3730  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
3731  tm->tm_hour = 0;
3732  }
3733 
3734  if (tmfc.year)
3735  {
3736  /*
3737  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
3738  * the year in the given century. Keep in mind that the 21st century
3739  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
3740  * 600BC to 501BC.
3741  */
3742  if (tmfc.cc && tmfc.yysz <= 2)
3743  {
3744  if (tmfc.bc)
3745  tmfc.cc = -tmfc.cc;
3746  tm->tm_year = tmfc.year % 100;
3747  if (tm->tm_year)
3748  {
3749  if (tmfc.cc >= 0)
3750  tm->tm_year += (tmfc.cc - 1) * 100;
3751  else
3752  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
3753  }
3754  else
3755  {
3756  /* find century year for dates ending in "00" */
3757  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
3758  }
3759  }
3760  else
3761  {
3762  /* If a 4-digit year is provided, we use that and ignore CC. */
3763  tm->tm_year = tmfc.year;
3764  if (tmfc.bc && tm->tm_year > 0)
3765  tm->tm_year = -(tm->tm_year - 1);
3766  }
3767  fmask |= DTK_M(YEAR);
3768  }
3769  else if (tmfc.cc)
3770  {
3771  /* use first year of century */
3772  if (tmfc.bc)
3773  tmfc.cc = -tmfc.cc;
3774  if (tmfc.cc >= 0)
3775  /* +1 because 21st century started in 2001 */
3776  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
3777  else
3778  /* +1 because year == 599 is 600 BC */
3779  tm->tm_year = tmfc.cc * 100 + 1;
3780  fmask |= DTK_M(YEAR);
3781  }
3782 
3783  if (tmfc.j)
3784  {
3785  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3786  fmask |= DTK_DATE_M;
3787  }
3788 
3789  if (tmfc.ww)
3790  {
3791  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3792  {
3793  /*
3794  * If tmfc.d is not set, then the date is left at the beginning of
3795  * the ISO week (Monday).
3796  */
3797  if (tmfc.d)
3798  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3799  else
3800  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3801  fmask |= DTK_DATE_M;
3802  }
3803  else
3804  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
3805  }
3806 
3807  if (tmfc.w)
3808  tmfc.dd = (tmfc.w - 1) * 7 + 1;
3809  if (tmfc.dd)
3810  {
3811  tm->tm_mday = tmfc.dd;
3812  fmask |= DTK_M(DAY);
3813  }
3814  if (tmfc.mm)
3815  {
3816  tm->tm_mon = tmfc.mm;
3817  fmask |= DTK_M(MONTH);
3818  }
3819 
3820  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
3821  {
3822  /*
3823  * The month and day field have not been set, so we use the
3824  * day-of-year field to populate them. Depending on the date mode,
3825  * this field may be interpreted as a Gregorian day-of-year, or an ISO
3826  * week date day-of-year.
3827  */
3828 
3829  if (!tm->tm_year && !tmfc.bc)
3830  ereport(ERROR,
3831  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3832  errmsg("cannot calculate day of year without year information")));
3833 
3834  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3835  {
3836  int j0; /* zeroth day of the ISO year, in Julian */
3837 
3838  j0 = isoweek2j(tm->tm_year, 1) - 1;
3839 
3840  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3841  fmask |= DTK_DATE_M;
3842  }
3843  else
3844  {
3845  const int *y;
3846  int i;
3847 
3848  static const int ysum[2][13] = {
3849  {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
3850  {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
3851 
3852  y = ysum[isleap(tm->tm_year)];
3853 
3854  for (i = 1; i <= MONTHS_PER_YEAR; i++)
3855  {
3856  if (tmfc.ddd <= y[i])
3857  break;
3858  }
3859  if (tm->tm_mon <= 1)
3860  tm->tm_mon = i;
3861 
3862  if (tm->tm_mday <= 1)
3863  tm->tm_mday = tmfc.ddd - y[i - 1];
3864 
3865  fmask |= DTK_M(MONTH) | DTK_M(DAY);
3866  }
3867  }
3868 
3869  if (tmfc.ms)
3870  *fsec += tmfc.ms * 1000;
3871  if (tmfc.us)
3872  *fsec += tmfc.us;
3873 
3874  /* Range-check date fields according to bit mask computed above */
3875  if (fmask != 0)
3876  {
3877  /* We already dealt with AD/BC, so pass isjulian = true */
3878  int dterr = ValidateDate(fmask, true, false, false, tm);
3879 
3880  if (dterr != 0)
3881  {
3882  /*
3883  * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
3884  * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
3885  * irrelevant hint about datestyle.
3886  */
3887  DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
3888  }
3889  }
3890 
3891  /* Range-check time fields too */
3892  if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
3893  tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
3894  tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
3895  *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
3896  DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
3897 
3898  /* Save parsed time-zone into tm->tm_zone if it was specified */
3899  if (tmfc.tzsign)
3900  {
3901  char *tz;
3902 
3903  if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
3904  tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
3905  DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp");
3906 
3907  tz = psprintf("%c%02d:%02d",
3908  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
3909 
3910  tm->tm_zone = tz;
3911  }
3912 
3913  DEBUG_TM(tm);
3914 
3915  pfree(date_str);
3916 }
3917 
3918 
3919 /**********************************************************************
3920  * the NUMBER version part
3921  *********************************************************************/
3922 
3923 
3924 static char *
3925 fill_str(char *str, int c, int max)
3926 {
3927  memset(str, c, max);
3928  *(str + max) = '\0';
3929  return str;
3930 }
3931 
3932 #define zeroize_NUM(_n) \
3933 do { \
3934  (_n)->flag = 0; \
3935  (_n)->lsign = 0; \
3936  (_n)->pre = 0; \
3937  (_n)->post = 0; \
3938  (_n)->pre_lsign_num = 0; \
3939  (_n)->need_locale = 0; \
3940  (_n)->multi = 0; \
3941  (_n)->zero_start = 0; \
3942  (_n)->zero_end = 0; \
3943 } while(0)
3944 
3945 /* select a NUMCacheEntry to hold the given format picture */
3946 static NUMCacheEntry *
3947 NUM_cache_getnew(const char *str)
3948 {
3949  NUMCacheEntry *ent;
3950 
3951  /* counter overflow check - paranoia? */
3952  if (NUMCounter >= (INT_MAX - NUM_CACHE_ENTRIES))
3953  {
3954  NUMCounter = 0;
3955 
3956  for (ent = NUMCache; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
3957  ent->age = (++NUMCounter);
3958  }
3959 
3960  /*
3961  * If cache is full, remove oldest entry (or recycle first not-valid one)
3962  */
3964  {
3965  NUMCacheEntry *old = NUMCache + 0;
3966 
3967 #ifdef DEBUG_TO_FROM_CHAR
3968  elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
3969 #endif
3970  if (old->valid)
3971  {
3972  for (ent = NUMCache + 1; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
3973  {
3974  if (!ent->valid)
3975  {
3976  old = ent;
3977  break;
3978  }
3979  if (ent->age < old->age)
3980  old = ent;
3981  }
3982  }
3983 #ifdef DEBUG_TO_FROM_CHAR
3984  elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
3985 #endif
3986  old->valid = false;
3987  StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
3988  old->age = (++NUMCounter);
3989  /* caller is expected to fill format and Num, then set valid */
3990  return old;
3991  }
3992  else
3993  {
3994 #ifdef DEBUG_TO_FROM_CHAR
3995  elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
3996 #endif
3997  ent = NUMCache + n_NUMCache;
3998  ent->valid = false;
3999  StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
4000  ent->age = (++NUMCounter);
4001  /* caller is expected to fill format and Num, then set valid */
4002  ++n_NUMCache;
4003  return ent;
4004  }
4005 }
4006 
4007 /* look for an existing NUMCacheEntry matching the given format picture */
4008 static NUMCacheEntry *
4009 NUM_cache_search(const char *str)
4010 {
4011  int i;
4012  NUMCacheEntry *ent;
4013 
4014  /* counter overflow check - paranoia? */
4015  if (NUMCounter >= (INT_MAX - NUM_CACHE_ENTRIES))
4016  {
4017  NUMCounter = 0;
4018 
4019  for (ent = NUMCache; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
4020  ent->age = (++NUMCounter);
4021  }
4022 
4023  for (i = 0, ent = NUMCache; i < n_NUMCache; i++, ent++)
4024  {
4025  if (ent->valid && strcmp(ent->str, str) == 0)
4026  {
4027  ent->age = (++NUMCounter);
4028  return ent;
4029  }
4030  }
4031 
4032  return NULL;
4033 }
4034 
4035 /* Find or create a NUMCacheEntry for the given format picture */
4036 static NUMCacheEntry *
4037 NUM_cache_fetch(const char *str)
4038 {
4039  NUMCacheEntry *ent;
4040 
4041  if ((ent = NUM_cache_search(str)) == NULL)
4042  {
4043  /*
4044  * Not in the cache, must run parser and save a new format-picture to
4045  * the cache. Do not mark the cache entry valid until parsing
4046  * succeeds.
4047  */
4048  ent = NUM_cache_getnew(str);
4049 
4050  zeroize_NUM(&ent->Num);
4051 
4052  parse_format(ent->format, str, NUM_keywords,
4053  NULL, NUM_index, NUM_TYPE, &ent->Num);
4054 
4055  ent->valid = true;
4056  }
4057  return ent;
4058 }
4059 
4060 /* ----------
4061  * Cache routine for NUM to_char version
4062  * ----------
4063  */
4064 static FormatNode *
4065 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4066 {
4067  FormatNode *format = NULL;
4068  char *str;
4069 
4070  str = text_to_cstring(pars_str);
4071 
4072  if (len > NUM_CACHE_SIZE)
4073  {
4074  /*
4075  * Allocate new memory if format picture is bigger than static cache
4076  * and do not use cache (call parser always)
4077  */
4078  format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4079 
4080  *shouldFree = true;
4081 
4082  zeroize_NUM(Num);
4083 
4084  parse_format(format, str, NUM_keywords,
4085  NULL, NUM_index, NUM_TYPE, Num);
4086  }
4087  else
4088  {
4089  /*
4090  * Use cache buffers
4091  */
4092  NUMCacheEntry *ent = NUM_cache_fetch(str);
4093 
4094  *shouldFree = false;
4095 
4096  format = ent->format;
4097 
4098  /*
4099  * Copy cache to used struct
4100  */
4101  Num->flag = ent->Num.flag;
4102  Num->lsign = ent->Num.lsign;
4103  Num->pre = ent->Num.pre;
4104  Num->post = ent->Num.post;
4105  Num->pre_lsign_num = ent->Num.pre_lsign_num;
4106  Num->need_locale = ent->Num.need_locale;
4107  Num->multi = ent->Num.multi;
4108  Num->zero_start = ent->Num.zero_start;
4109  Num->zero_end = ent->Num.zero_end;
4110  }
4111 
4112 #ifdef DEBUG_TO_FROM_CHAR
4113  /* dump_node(format, len); */
4114  dump_index(NUM_keywords, NUM_index);
4115 #endif
4116 
4117  pfree(str);
4118  return format;
4119 }
4120 
4121 
4122 static char *
4123 int_to_roman(int number)
4124 {
4125  int len = 0,
4126  num = 0;
4127  char *p = NULL,
4128  *result,
4129  numstr[12];
4130 
4131  result = (char *) palloc(16);
4132  *result = '\0';
4133 
4134  if (number > 3999 || number < 1)
4135  {
4136  fill_str(result, '#', 15);
4137  return result;
4138  }
4139  len = snprintf(numstr, sizeof(numstr), "%d", number);
4140 
4141  for (p = numstr; *p != '\0'; p++, --len)
4142  {
4143  num = *p - 49; /* 48 ascii + 1 */
4144  if (num < 0)
4145  continue;
4146 
4147  if (len > 3)
4148  {
4149  while (num-- != -1)
4150  strcat(result, "M");
4151  }
4152  else
4153  {
4154  if (len == 3)
4155  strcat(result, rm100[num]);
4156  else if (len == 2)
4157  strcat(result, rm10[num]);
4158  else if (len == 1)
4159  strcat(result, rm1[num]);
4160  }
4161  }
4162  return result;
4163 }
4164 
4165 
4166 
4167 /* ----------
4168  * Locale
4169  * ----------
4170  */
4171 static void
4173 {
4174  if (Np->Num->need_locale)
4175  {
4176  struct lconv *lconv;
4177 
4178  /*
4179  * Get locales
4180  */
4181  lconv = PGLC_localeconv();
4182 
4183  /*
4184  * Positive / Negative number sign
4185  */
4186  if (lconv->negative_sign && *lconv->negative_sign)
4187  Np->L_negative_sign = lconv->negative_sign;
4188  else
4189  Np->L_negative_sign = "-";
4190 
4191  if (lconv->positive_sign && *lconv->positive_sign)
4192  Np->L_positive_sign = lconv->positive_sign;
4193  else
4194  Np->L_positive_sign = "+";
4195 
4196  /*
4197  * Number decimal point
4198  */
4199  if (lconv->decimal_point && *lconv->decimal_point)
4200  Np->decimal = lconv->decimal_point;
4201 
4202  else
4203  Np->decimal = ".";
4204 
4205  if (!IS_LDECIMAL(Np->Num))
4206  Np->decimal = ".";
4207 
4208  /*
4209  * Number thousands separator
4210  *
4211  * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
4212  * but "" for thousands_sep, so we set the thousands_sep too.
4213  * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
4214  */
4215  if (lconv->thousands_sep && *lconv->thousands_sep)
4216  Np->L_thousands_sep = lconv->thousands_sep;
4217  /* Make sure thousands separator doesn't match decimal point symbol. */
4218  else if (strcmp(Np->decimal, ",") !=0)
4219  Np->L_thousands_sep = ",";
4220  else
4221  Np->L_thousands_sep = ".";
4222 
4223  /*
4224  * Currency symbol
4225  */
4226  if (lconv->currency_symbol && *lconv->currency_symbol)
4227  Np->L_currency_symbol = lconv->currency_symbol;
4228  else
4229  Np->L_currency_symbol = " ";
4230  }
4231  else
4232  {
4233  /*
4234  * Default values
4235  */
4236  Np->L_negative_sign = "-";
4237  Np->L_positive_sign = "+";
4238  Np->decimal = ".";
4239 
4240  Np->L_thousands_sep = ",";
4241  Np->L_currency_symbol = " ";
4242  }
4243 }
4244 
4245 /* ----------
4246  * Return pointer of last relevant number after decimal point
4247  * 12.0500 --> last relevant is '5'
4248  * 12.0000 --> last relevant is '.'
4249  * If there is no decimal point, return NULL (which will result in same
4250  * behavior as if FM hadn't been specified).
4251  * ----------
4252  */
4253 static char *
4255 {
4256  char *result,
4257  *p = strchr(num, '.');
4258 
4259 #ifdef DEBUG_TO_FROM_CHAR
4260  elog(DEBUG_elog_output, "get_last_relevant_decnum()");
4261 #endif
4262 
4263  if (!p)
4264  return NULL;
4265 
4266  result = p;
4267 
4268  while (*(++p))
4269  {
4270  if (*p != '0')
4271  result = p;
4272  }
4273 
4274  return result;
4275 }
4276 
4277 /*
4278  * These macros are used in NUM_processor() and its subsidiary routines.
4279  * OVERLOAD_TEST: true if we've reached end of input string
4280  * AMOUNT_TEST(s): true if at least s bytes remain in string
4281  */
4282 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
4283 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
4284 
4285 /* ----------
4286  * Number extraction for TO_NUMBER()
4287  * ----------
4288  */
4289 static void
4290 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
4291 {
4292  bool isread = false;
4293 
4294 #ifdef DEBUG_TO_FROM_CHAR
4295  elog(DEBUG_elog_output, " --- scan start --- id=%s",
4296  (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
4297 #endif
4298 
4299  if (OVERLOAD_TEST)
4300  return;
4301 
4302  if (*Np->inout_p == ' ')
4303  Np->inout_p++;
4304 
4305  if (OVERLOAD_TEST)
4306  return;
4307 
4308  /*
4309  * read sign before number
4310  */
4311  if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
4312  (Np->read_pre + Np->read_post) == 0)
4313  {
4314 #ifdef DEBUG_TO_FROM_CHAR
4315  elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
4316  *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
4317 #endif
4318 
4319  /*
4320  * locale sign
4321  */
4322  if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
4323  {
4324  int x = 0;
4325 
4326 #ifdef DEBUG_TO_FROM_CHAR
4327  elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
4328 #endif
4329  if ((x = strlen(Np->L_negative_sign)) &&
4330  AMOUNT_TEST(x) &&
4331  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4332  {
4333  Np->inout_p += x;
4334  *Np->number = '-';
4335  }
4336  else if ((x = strlen(Np->L_positive_sign)) &&
4337  AMOUNT_TEST(x) &&
4338  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4339  {
4340  Np->inout_p += x;
4341  *Np->number = '+';
4342  }
4343  }
4344  else
4345  {
4346 #ifdef DEBUG_TO_FROM_CHAR
4347  elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
4348 #endif
4349 
4350  /*
4351  * simple + - < >
4352  */
4353  if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
4354  *Np->inout_p == '<'))
4355  {
4356  *Np->number = '-'; /* set - */
4357  Np->inout_p++;
4358  }
4359  else if (*Np->inout_p == '+')
4360  {
4361  *Np->number = '+'; /* set + */
4362  Np->inout_p++;
4363  }
4364  }
4365  }
4366 
4367  if (OVERLOAD_TEST)
4368  return;
4369 
4370 #ifdef DEBUG_TO_FROM_CHAR
4371  elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
4372 #endif
4373 
4374  /*
4375  * read digit or decimal point
4376  */
4377  if (isdigit((unsigned char) *Np->inout_p))
4378  {
4379  if (Np->read_dec && Np->read_post == Np->Num->post)
4380  return;
4381 
4382  *Np->number_p = *Np->inout_p;
4383  Np->number_p++;
4384 
4385  if (Np->read_dec)
4386  Np->read_post++;
4387  else
4388  Np->read_pre++;
4389 
4390  isread = true;
4391 
4392 #ifdef DEBUG_TO_FROM_CHAR
4393  elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
4394 #endif
4395  }
4396  else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
4397  {
4398  /*
4399  * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
4400  * Np->decimal is always just "." if we don't have a D format token.
4401  * So we just unconditionally match to Np->decimal.
4402  */
4403  int x = strlen(Np->decimal);
4404 
4405 #ifdef DEBUG_TO_FROM_CHAR
4406  elog(DEBUG_elog_output, "Try read decimal point (%c)",
4407  *Np->inout_p);
4408 #endif
4409  if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
4410  {
4411  Np->inout_p += x - 1;
4412  *Np->number_p = '.';
4413  Np->number_p++;
4414  Np->read_dec = true;
4415  isread = true;
4416  }
4417  }
4418 
4419  if (OVERLOAD_TEST)
4420  return;
4421 
4422  /*
4423  * Read sign behind "last" number
4424  *
4425  * We need sign detection because determine exact position of post-sign is
4426  * difficult:
4427  *
4428  * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
4429  * 5.01-
4430  */
4431  if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
4432  {
4433  /*
4434  * locale sign (NUM_S) is always anchored behind a last number, if: -
4435  * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
4436  * next char is not digit
4437  */
4438  if (IS_LSIGN(Np->Num) && isread &&
4439  (Np->inout_p + 1) < Np->inout + input_len &&
4440  !isdigit((unsigned char) *(Np->inout_p + 1)))
4441  {
4442  int x;
4443  char *tmp = Np->inout_p++;
4444 
4445 #ifdef DEBUG_TO_FROM_CHAR
4446  elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
4447 #endif
4448  if ((x = strlen(Np->L_negative_sign)) &&
4449  AMOUNT_TEST(x) &&
4450  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4451  {
4452  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4453  *Np->number = '-';
4454  }
4455  else if ((x = strlen(Np->L_positive_sign)) &&
4456  AMOUNT_TEST(x) &&
4457  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4458  {
4459  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4460  *Np->number = '+';
4461  }
4462  if (*Np->number == ' ')
4463  /* no sign read */
4464  Np->inout_p = tmp;
4465  }
4466 
4467  /*
4468  * try read non-locale sign, it's happen only if format is not exact
4469  * and we cannot determine sign position of MI/PL/SG, an example:
4470  *
4471  * FM9.999999MI -> 5.01-
4472  *
4473  * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
4474  * like to_number('1 -', '9S') where sign is not anchored to last
4475  * number.
4476  */
4477  else if (isread == false && IS_LSIGN(Np->Num) == false &&
4478  (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
4479  {
4480 #ifdef DEBUG_TO_FROM_CHAR
4481  elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
4482 #endif
4483 
4484  /*
4485  * simple + -
4486  */
4487  if (*Np->inout_p == '-' || *Np->inout_p == '+')
4488  /* NUM_processor() do inout_p++ */
4489  *Np->number = *Np->inout_p;
4490  }
4491  }
4492 }
4493 
4494 #define IS_PREDEC_SPACE(_n) \
4495  (IS_ZERO((_n)->Num)==false && \
4496  (_n)->number == (_n)->number_p && \
4497  *(_n)->number == '0' && \
4498  (_n)->Num->post != 0)
4499 
4500 /* ----------
4501  * Add digit or sign to number-string
4502  * ----------
4503  */
4504 static void
4506 {
4507  int end;
4508 
4509  if (IS_ROMAN(Np->Num))
4510  return;
4511 
4512  /* Note: in this elog() output not set '\0' in 'inout' */
4513 
4514 #ifdef DEBUG_TO_FROM_CHAR
4515 
4516  /*
4517  * Np->num_curr is number of current item in format-picture, it is not
4518  * current position in inout!
4519  */
4520  elog(DEBUG_elog_output,
4521  "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
4522  Np->sign_wrote,
4523  Np->num_curr,
4524  Np->number_p,
4525  Np->inout);
4526 #endif
4527  Np->num_in = false;
4528 
4529  /*
4530  * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
4531  * handle "9.9" --> " .1"
4532  */
4533  if (Np->sign_wrote == false &&
4534  (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
4535  (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
4536  {
4537  if (IS_LSIGN(Np->Num))
4538  {
4539  if (Np->Num->lsign == NUM_LSIGN_PRE)
4540  {
4541  if (Np->sign == '-')
4542  strcpy(Np->inout_p, Np->L_negative_sign);
4543  else
4544  strcpy(Np->inout_p, Np->L_positive_sign);
4545  Np->inout_p += strlen(Np->inout_p);
4546  Np->sign_wrote = true;
4547  }
4548  }
4549  else if (IS_BRACKET(Np->Num))
4550  {
4551  *Np->inout_p = Np->sign == '+' ? ' ' : '<';
4552  ++Np->inout_p;
4553  Np->sign_wrote = true;
4554  }
4555  else if (Np->sign == '+')
4556  {
4557  if (!IS_FILLMODE(Np->Num))
4558  {
4559  *Np->inout_p = ' '; /* Write + */
4560  ++Np->inout_p;
4561  }
4562  Np->sign_wrote = true;
4563  }
4564  else if (Np->sign == '-')
4565  { /* Write - */
4566  *Np->inout_p = '-';
4567  ++Np->inout_p;
4568  Np->sign_wrote = true;
4569  }
4570  }
4571 
4572 
4573  /*
4574  * digits / FM / Zero / Dec. point
4575  */
4576  if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
4577  {
4578  if (Np->num_curr < Np->out_pre_spaces &&
4579  (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
4580  {
4581  /*
4582  * Write blank space
4583  */
4584  if (!IS_FILLMODE(Np->Num))
4585  {
4586  *Np->inout_p = ' '; /* Write ' ' */
4587  ++Np->inout_p;
4588  }
4589  }
4590  else if (IS_ZERO(Np->Num) &&
4591  Np->num_curr < Np->out_pre_spaces &&
4592  Np->Num->zero_start <= Np->num_curr)
4593  {
4594  /*
4595  * Write ZERO
4596  */
4597  *Np->inout_p = '0'; /* Write '0' */
4598  ++Np->inout_p;
4599  Np->num_in = true;
4600  }
4601  else
4602  {
4603  /*
4604  * Write Decimal point
4605  */
4606  if (*Np->number_p == '.')
4607  {
4608  if (!Np->last_relevant || *Np->last_relevant != '.')
4609  {
4610  strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4611  Np->inout_p += strlen(Np->inout_p);
4612  }
4613 
4614  /*
4615  * Ora 'n' -- FM9.9 --> 'n.'
4616  */
4617  else if (IS_FILLMODE(Np->Num) &&
4618  Np->last_relevant && *Np->last_relevant == '.')
4619  {
4620  strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4621  Np->inout_p += strlen(Np->inout_p);
4622  }
4623  }
4624  else
4625  {
4626  /*
4627  * Write Digits
4628  */
4629  if (Np->last_relevant && Np->number_p > Np->last_relevant &&
4630  id != NUM_0)
4631  ;
4632 
4633  /*
4634  * '0.1' -- 9.9 --> ' .1'
4635  */
4636  else if (IS_PREDEC_SPACE(Np))
4637  {
4638  if (!IS_FILLMODE(Np->Num))
4639  {
4640  *Np->inout_p = ' ';
4641  ++Np->inout_p;
4642  }
4643 
4644  /*
4645  * '0' -- FM9.9 --> '0.'
4646  */
4647  else if (Np->last_relevant && *Np->last_relevant == '.')
4648  {
4649  *Np->inout_p = '0';
4650  ++Np->inout_p;
4651  }
4652  }
4653  else
4654  {
4655  *Np->inout_p = *Np->number_p; /* Write DIGIT */
4656  ++Np->inout_p;
4657  Np->num_in = true;
4658  }
4659  }
4660  /* do no exceed string length */
4661  if (*Np->number_p)
4662  ++Np->number_p;
4663  }
4664 
4665  end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
4666 
4667  if (Np->last_relevant && Np->last_relevant == Np->number_p)
4668  end = Np->num_curr;
4669 
4670  if (Np->num_curr + 1 == end)
4671  {
4672  if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
4673  {
4674  *Np->inout_p = Np->sign == '+' ? ' ' : '>';
4675  ++Np->inout_p;
4676  }
4677  else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
4678  {
4679  if (Np->sign == '-')
4680  strcpy(Np->inout_p, Np->L_negative_sign);
4681  else
4682  strcpy(Np->inout_p, Np->L_positive_sign);
4683  Np->inout_p += strlen(Np->inout_p);
4684  }
4685  }
4686  }
4687 
4688  ++Np->num_curr;
4689 }
4690 
4691 /*
4692  * Skip over "n" input characters, but only if they aren't numeric data
4693  */
4694 static void
4695 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
4696 {
4697  while (n-- > 0)
4698  {
4699  if (OVERLOAD_TEST)
4700  break; /* end of input */
4701  if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
4702  break; /* it's a data character */
4703  Np->inout_p += pg_mblen(Np->inout_p);
4704  }
4705 }
4706 
4707 static char *
4708 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
4709  char *number, int input_len, int to_char_out_pre_spaces,
4710  int sign, bool is_to_char, Oid collid)
4711 {
4712  FormatNode *n;
4713  NUMProc _Np,
4714  *Np = &_Np;
4715  const char *pattern;
4716  int pattern_len;
4717 
4718  MemSet(Np, 0, sizeof(NUMProc));
4719 
4720  Np->Num = Num;
4721  Np->is_to_char = is_to_char;
4722  Np->number = number;
4723  Np->inout = inout;
4724  Np->last_relevant = NULL;
4725  Np->read_post = 0;
4726  Np->read_pre = 0;
4727  Np->read_dec = false;
4728 
4729  if (Np->Num->zero_start)
4730  --Np->Num->zero_start;
4731 
4732  if (IS_EEEE(Np->Num))
4733  {
4734  if (!Np->is_to_char)
4735  ereport(ERROR,
4736  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4737  errmsg("\"EEEE\" not supported for input")));
4738  return strcpy(inout, number);
4739  }
4740 
4741  /*
4742  * Roman correction
4743  */
4744  if (IS_ROMAN(Np->Num))
4745  {
4746  if (!Np->is_to_char)
4747  ereport(ERROR,
4748  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4749  errmsg("\"RN\" not supported for input")));
4750 
4751  Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
4752  Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
4753 
4754  if (IS_FILLMODE(Np->Num))
4755  {
4756  Np->Num->flag = 0;
4757  Np->Num->flag |= NUM_F_FILLMODE;
4758  }
4759  else
4760  Np->Num->flag = 0;
4761  Np->Num->flag |= NUM_F_ROMAN;
4762  }
4763 
4764  /*
4765  * Sign
4766  */
4767  if (is_to_char)
4768  {
4769  Np->sign = sign;
4770 
4771  /* MI/PL/SG - write sign itself and not in number */
4772  if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
4773  {
4774  if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
4775  Np->sign_wrote = false; /* need sign */
4776  else
4777  Np->sign_wrote = true; /* needn't sign */
4778  }
4779  else
4780  {
4781  if (Np->sign != '-')
4782  {
4783  if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
4784  Np->Num->flag &= ~NUM_F_BRACKET;
4785  if (IS_MINUS(Np->Num))
4786  Np->Num->flag &= ~NUM_F_MINUS;
4787  }
4788  else if (Np->sign != '+' && IS_PLUS(Np->Num))
4789  Np->Num->flag &= ~NUM_F_PLUS;
4790 
4791  if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
4792  Np->sign_wrote = true; /* needn't sign */
4793  else
4794  Np->sign_wrote = false; /* need sign */
4795 
4796  if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
4797  Np->Num->lsign = NUM_LSIGN_POST;
4798  }
4799  }
4800  else
4801  Np->sign = false;
4802 
4803  /*
4804  * Count
4805  */
4806  Np->num_count = Np->Num->post + Np->Num->pre - 1;
4807 
4808  if (is_to_char)
4809  {
4810  Np->out_pre_spaces = to_char_out_pre_spaces;
4811 
4812  if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
4813  {
4815 
4816  /*
4817  * If any '0' specifiers are present, make sure we don't strip
4818  * those digits.
4819  */
4820  if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
4821  {
4822  char *last_zero;
4823 
4824  last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
4825  if (Np->last_relevant < last_zero)
4826  Np->last_relevant = last_zero;
4827  }
4828  }
4829 
4830  if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
4831  ++Np->num_count;
4832  }
4833  else
4834  {
4835  Np->out_pre_spaces = 0;
4836  *Np->number = ' '; /* sign space */
4837  *(Np->number + 1) = '\0';
4838  }
4839 
4840  Np->num_in = 0;
4841  Np->num_curr = 0;
4842 
4843 #ifdef DEBUG_TO_FROM_CHAR
4844  elog(DEBUG_elog_output,
4845  "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
4846  Np->sign,
4847  Np->number,
4848  Np->Num->pre,
4849  Np->Num->post,
4850  Np->num_count,
4851  Np->out_pre_spaces,
4852  Np->sign_wrote ? "Yes" : "No",
4853  IS_ZERO(Np->Num) ? "Yes" : "No",
4854  Np->Num->zero_start,
4855  Np->Num->zero_end,
4856  Np->last_relevant ? Np->last_relevant : "<not set>",
4857  IS_BRACKET(Np->Num) ? "Yes" : "No",
4858  IS_PLUS(Np->Num) ? "Yes" : "No",
4859  IS_MINUS(Np->Num) ? "Yes" : "No",
4860  IS_FILLMODE(Np->Num) ? "Yes" : "No",
4861  IS_ROMAN(Np->Num) ? "Yes" : "No",
4862  IS_EEEE(Np->Num) ? "Yes" : "No"
4863  );
4864 #endif
4865 
4866  /*
4867  * Locale
4868  */
4869  NUM_prepare_locale(Np);
4870 
4871  /*
4872  * Processor direct cycle
4873  */
4874  if (Np->is_to_char)
4875  Np->number_p = Np->number;
4876  else
4877  Np->number_p = Np->number + 1; /* first char is space for sign */
4878 
4879  for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
4880  {
4881  if (!Np->is_to_char)
4882  {
4883  /*
4884  * Check at least one byte remains to be scanned. (In actions
4885  * below, must use AMOUNT_TEST if we want to read more bytes than
4886  * that.)
4887  */
4888  if (OVERLOAD_TEST)
4889  break;
4890  }
4891 
4892  /*
4893  * Format pictures actions
4894  */
4895  if (n->type == NODE_TYPE_ACTION)
4896  {
4897  /*
4898  * Create/read digit/zero/blank/sign/special-case
4899  *
4900  * 'NUM_S' note: The locale sign is anchored to number and we
4901  * read/write it when we work with first or last number
4902  * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
4903  *
4904  * Notice the "Np->inout_p++" at the bottom of the loop. This is
4905  * why most of the actions advance inout_p one less than you might
4906  * expect. In cases where we don't want that increment to happen,
4907  * a switch case ends with "continue" not "break".
4908  */
4909  switch (n->key->id)
4910  {
4911  case NUM_9:
4912  case NUM_0:
4913  case NUM_DEC:
4914  case NUM_D:
4915  if (Np->is_to_char)
4916  {
4917  NUM_numpart_to_char(Np, n->key->id);
4918  continue; /* for() */
4919  }
4920  else
4921  {
4922  NUM_numpart_from_char(Np, n->key->id, input_len);
4923  break; /* switch() case: */
4924  }
4925 
4926  case NUM_COMMA:
4927  if (Np->is_to_char)
4928  {
4929  if (!Np->num_in)
4930  {
4931  if (IS_FILLMODE(Np->Num))
4932  continue;
4933  else
4934  *Np->inout_p = ' ';
4935  }
4936  else
4937  *Np->inout_p = ',';
4938  }
4939  else
4940  {
4941  if (!Np->num_in)
4942  {
4943  if (IS_FILLMODE(Np->Num))
4944  continue;
4945  }
4946  if (*Np->inout_p != ',')
4947  continue;
4948  }
4949  break;
4950 
4951  case NUM_G:
4952  pattern = Np->L_thousands_sep;
4953  pattern_len = strlen(pattern);
4954  if (Np->is_to_char)
4955  {
4956  if (!Np->num_in)
4957  {
4958  if (IS_FILLMODE(Np->Num))
4959  continue;
4960  else
4961  {
4962  /* just in case there are MB chars */
4963  pattern_len = pg_mbstrlen(pattern);
4964  memset(Np->inout_p, ' ', pattern_len);
4965  Np->inout_p += pattern_len - 1;
4966  }
4967  }
4968  else
4969  {
4970  strcpy(Np->inout_p, pattern);
4971  Np->inout_p += pattern_len - 1;
4972  }
4973  }
4974  else
4975  {
4976  if (!Np->num_in)
4977  {
4978  if (IS_FILLMODE(Np->Num))
4979  continue;
4980  }
4981 
4982  /*
4983  * Because L_thousands_sep typically contains data
4984  * characters (either '.' or ','), we can't use
4985  * NUM_eat_non_data_chars here. Instead skip only if
4986  * the input matches L_thousands_sep.
4987  */
4988  if (AMOUNT_TEST(pattern_len) &&
4989  strncmp(Np->inout_p, pattern, pattern_len) == 0)
4990  Np->inout_p += pattern_len - 1;
4991  else
4992  continue;
4993  }
4994  break;
4995 
4996  case NUM_L:
4997  pattern = Np->L_currency_symbol;
4998  if (Np->is_to_char)
4999  {
5000  strcpy(Np->inout_p, pattern);
5001  Np->inout_p += strlen(pattern) - 1;
5002  }
5003  else
5004  {
5005  NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5006  continue;
5007  }
5008  break;
5009 
5010  case NUM_RN:
5011  if (IS_FILLMODE(Np->Num))
5012  {
5013  strcpy(Np->inout_p, Np->number_p);
5014  Np->inout_p += strlen(Np->inout_p) - 1;
5015  }
5016  else
5017  {
5018  sprintf(Np->inout_p, "%15s", Np->number_p);
5019  Np->inout_p += strlen(Np->inout_p) - 1;
5020  }
5021  break;
5022 
5023  case NUM_rn:
5024  if (IS_FILLMODE(Np->Num))
5025  {
5026  strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5027  Np->inout_p += strlen(Np->inout_p) - 1;
5028  }
5029  else
5030  {
5031  sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5032  Np->inout_p += strlen(Np->inout_p) - 1;
5033  }
5034  break;
5035 
5036  case NUM_th:
5037  if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5038  Np->sign == '-' || IS_DECIMAL(Np->Num))
5039  continue;
5040 
5041  if (Np->is_to_char)
5042  {
5043  strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5044  Np->inout_p += 1;
5045  }
5046  else
5047  {
5048  /* All variants of 'th' occupy 2 characters */
5049  NUM_eat_non_data_chars(Np, 2, input_len);
5050  continue;
5051  }
5052  break;
5053 
5054  case NUM_TH:
5055  if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5056  Np->sign == '-' || IS_DECIMAL(Np->Num))
5057  continue;
5058 
5059  if (Np->is_to_char)
5060  {
5061  strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5062  Np->inout_p += 1;
5063  }
5064  else
5065  {
5066  /* All variants of 'TH' occupy 2 characters */
5067  NUM_eat_non_data_chars(Np, 2, input_len);
5068  continue;
5069  }
5070  break;
5071 
5072  case NUM_MI:
5073  if (Np->is_to_char)
5074  {
5075  if (Np->sign == '-')
5076  *Np->inout_p = '-';
5077  else if (IS_FILLMODE(Np->Num))
5078  continue;
5079  else
5080  *Np->inout_p = ' ';
5081  }
5082  else
5083  {
5084  if (*Np->inout_p == '-')
5085  *Np->number = '-';
5086  else
5087  {
5088  NUM_eat_non_data_chars(Np, 1, input_len);
5089  continue;
5090  }
5091  }
5092  break;
5093 
5094  case NUM_PL:
5095  if (Np->is_to_char)
5096  {
5097  if (Np->sign == '+')
5098  *Np->inout_p = '+';
5099  else if (IS_FILLMODE(Np->Num))
5100  continue;
5101  else
5102  *Np->inout_p = ' ';
5103  }
5104  else
5105  {
5106  if (*Np->inout_p == '+')
5107  *Np->number = '+';
5108  else
5109  {
5110  NUM_eat_non_data_chars(Np, 1, input_len);
5111  continue;
5112  }
5113  }
5114  break;
5115 
5116  case NUM_SG:
5117  if (Np->is_to_char)
5118  *Np->inout_p = Np->sign;
5119  else
5120  {
5121  if (*Np->inout_p == '-')
5122  *Np->number = '-';
5123  else if (*Np->inout_p == '+')
5124  *Np->number = '+';
5125  else
5126  {
5127  NUM_eat_non_data_chars(Np, 1, input_len);
5128  continue;
5129  }
5130  }
5131  break;
5132 
5133  default:
5134  continue;
5135  break;
5136  }
5137  }
5138  else
5139  {
5140  /*
5141  * In TO_CHAR, non-pattern characters in the format are copied to
5142  * the output. In TO_NUMBER, we skip one input character for each
5143  * non-pattern format character, whether or not it matches the
5144  * format character.
5145  */
5146  if (Np->is_to_char)
5147  {
5148  strcpy(Np->inout_p, n->character);
5149  Np->inout_p += strlen(Np->inout_p);
5150  }
5151  else
5152  {
5153  Np->inout_p += pg_mblen(Np->inout_p);
5154  }
5155  continue;
5156  }
5157  Np->inout_p++;
5158  }
5159 
5160  if (Np->is_to_char)
5161  {
5162  *Np->inout_p = '\0';
5163  return Np->inout;
5164  }
5165  else
5166  {
5167  if (*(Np->number_p - 1) == '.')
5168  *(Np->number_p - 1) = '\0';
5169  else
5170  *Np->number_p = '\0';
5171 
5172  /*
5173  * Correction - precision of dec. number
5174  */
5175  Np->Num->post = Np->read_post;
5176 
5177 #ifdef DEBUG_TO_FROM_CHAR
5178  elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
5179 #endif
5180  return Np->number;
5181  }
5182 }
5183 
5184 /* ----------
5185  * MACRO: Start part of NUM - for all NUM's to_char variants
5186  * (sorry, but I hate copy same code - macro is better..)
5187  * ----------
5188  */
5189 #define NUM_TOCHAR_prepare \
5190 do { \
5191  int len = VARSIZE_ANY_EXHDR(fmt); \
5192  if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
5193  PG_RETURN_TEXT_P(cstring_to_text("")); \
5194  result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
5195  format = NUM_cache(len, &Num, fmt, &shouldFree); \
5196 } while (0)
5197 
5198 /* ----------
5199  * MACRO: Finish part of NUM
5200  * ----------
5201  */
5202 #define NUM_TOCHAR_finish \
5203 do { \
5204  int len; \
5205  \
5206  NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
5207  \
5208  if (shouldFree) \
5209  pfree(format); \
5210  \
5211  /* \
5212  * Convert null-terminated representation of result to standard text. \
5213  * The result is usually much bigger than it needs to be, but there \
5214  * seems little point in realloc'ing it smaller. \
5215  */ \
5216  len = strlen(VARDATA(result)); \
5217  SET_VARSIZE(result, len + VARHDRSZ); \
5218 } while (0)
5219 
5220 /* -------------------
5221  * NUMERIC to_number() (convert string to numeric)
5222  * -------------------
5223  */
5224 Datum
5226 {
5228  text *fmt = PG_GETARG_TEXT_PP(1);
5229  NUMDesc Num;
5230  Datum result;
5231  FormatNode *format;
5232  char *numstr;
5233  bool shouldFree;
5234  int len = 0;
5235  int scale,
5236  precision;
5237 
5238  len = VARSIZE_ANY_EXHDR(fmt);
5239 
5240  if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
5241  PG_RETURN_NULL();
5242 
5243  format = NUM_cache(len, &Num, fmt, &shouldFree);
5244 
5245  numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
5246 
5247  NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
5248  VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
5249 
5250  scale = Num.post;
5251  precision = Num.pre + Num.multi + scale;
5252 
5253  if (shouldFree)
5254  pfree(format);
5255 
5257  CStringGetDatum(numstr),
5259  Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
5260 
5261  if (IS_MULTI(&Num))
5262  {
5263  Numeric x;
5265  Int32GetDatum(10)));
5267  Int32GetDatum(-Num.multi)));
5268 
5270  NumericGetDatum(a),
5271  NumericGetDatum(b)));
5273  result,
5274  NumericGetDatum(x));
5275  }
5276 
5277  pfree(numstr);
5278  return result;
5279 }
5280 
5281 /* ------------------
5282  * NUMERIC to_char()