PostgreSQL Source Code  git master
formatting.c
Go to the documentation of this file.
1 /* -----------------------------------------------------------------------
2  * formatting.c
3  *
4  * src/backend/utils/adt/formatting.c
5  *
6  *
7  * Portions Copyright (c) 1999-2018, PostgreSQL Global Development Group
8  *
9  *
10  * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
11  *
12  * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13  * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
14  *
15  *
16  * Cache & Memory:
17  * Routines use (itself) internal cache for format pictures.
18  *
19  * The cache uses a static buffer and is persistent across transactions. If
20  * the format-picture is bigger than the cache buffer, the parser is called
21  * always.
22  *
23  * NOTE for Number version:
24  * All in this version is implemented as keywords ( => not used
25  * suffixes), because a format picture is for *one* item (number)
26  * only. It not is as a timestamp version, where each keyword (can)
27  * has suffix.
28  *
29  * NOTE for Timestamp routines:
30  * In this module the POSIX 'struct tm' type is *not* used, but rather
31  * PgSQL type, which has tm_mon based on one (*non* zero) and
32  * year *not* based on 1900, but is used full year number.
33  * Module supports AD / BC / AM / PM.
34  *
35  * Supported types for to_char():
36  *
37  * Timestamp, Numeric, int4, int8, float4, float8
38  *
39  * Supported types for reverse conversion:
40  *
41  * Timestamp - to_timestamp()
42  * Date - to_date()
43  * Numeric - to_number()
44  *
45  *
46  * Karel Zak
47  *
48  * TODO
49  * - better number building (formatting) / parsing, now it isn't
50  * ideal code
51  * - use Assert()
52  * - add support for abstime
53  * - add support for roman number to standard number conversion
54  * - add support for number spelling
55  * - add support for string to string formatting (we must be better
56  * than Oracle :-),
57  * to_char('Hello', 'X X X X X') -> 'H e l l o'
58  *
59  * -----------------------------------------------------------------------
60  */
61 
62 #ifdef DEBUG_TO_FROM_CHAR
63 #define DEBUG_elog_output DEBUG3
64 #endif
65 
66 #include "postgres.h"
67 
68 #include <ctype.h>
69 #include <unistd.h>
70 #include <math.h>
71 #include <float.h>
72 #include <limits.h>
73 
74 /*
75  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
76  * declare them in <wchar.h>.
77  */
78 #ifdef HAVE_WCHAR_H
79 #include <wchar.h>
80 #endif
81 #ifdef HAVE_WCTYPE_H
82 #include <wctype.h>
83 #endif
84 
85 #ifdef USE_ICU
86 #include <unicode/ustring.h>
87 #endif
88 
89 #include "catalog/pg_collation.h"
90 #include "mb/pg_wchar.h"
91 #include "utils/builtins.h"
92 #include "utils/date.h"
93 #include "utils/datetime.h"
94 #include "utils/formatting.h"
95 #include "utils/int8.h"
96 #include "utils/numeric.h"
97 #include "utils/pg_locale.h"
98 
99 /* ----------
100  * Routines type
101  * ----------
102  */
103 #define DCH_TYPE 1 /* DATE-TIME version */
104 #define NUM_TYPE 2 /* NUMBER version */
105 
106 /* ----------
107  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
108  * ----------
109  */
110 #define KeyWord_INDEX_SIZE ('~' - ' ')
111 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
112 
113 /* ----------
114  * Maximal length of one node
115  * ----------
116  */
117 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
118 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
119 
120 /* ----------
121  * More is in float.c
122  * ----------
123  */
124 #define MAXFLOATWIDTH 60
125 #define MAXDOUBLEWIDTH 500
126 
127 
128 /* ----------
129  * Format parser structs
130  * ----------
131  */
132 typedef struct
133 {
134  char *name; /* suffix string */
135  int len, /* suffix length */
136  id, /* used in node->suffix */
137  type; /* prefix / postfix */
138 } KeySuffix;
139 
140 /* ----------
141  * FromCharDateMode
142  * ----------
143  *
144  * This value is used to nominate one of several distinct (and mutually
145  * exclusive) date conventions that a keyword can belong to.
146  */
147 typedef enum
148 {
149  FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
150  FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
151  FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
153 
154 typedef struct
155 {
156  const char *name;
157  int len;
158  int id;
159  bool is_digit;
161 } KeyWord;
162 
163 typedef struct
164 {
165  int type; /* NODE_TYPE_XXX, see below */
166  const KeyWord *key; /* if type is ACTION */
167  char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
168  int suffix; /* keyword prefix/suffix code, if any */
169 } FormatNode;
170 
171 #define NODE_TYPE_END 1
172 #define NODE_TYPE_ACTION 2
173 #define NODE_TYPE_CHAR 3
174 
175 #define SUFFTYPE_PREFIX 1
176 #define SUFFTYPE_POSTFIX 2
177 
178 #define CLOCK_24_HOUR 0
179 #define CLOCK_12_HOUR 1
180 
181 
182 /* ----------
183  * Full months
184  * ----------
185  */
186 static const char *const months_full[] = {
187  "January", "February", "March", "April", "May", "June", "July",
188  "August", "September", "October", "November", "December", NULL
189 };
190 
191 static const char *const days_short[] = {
192  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
193 };
194 
195 /* ----------
196  * AD / BC
197  * ----------
198  * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
199  * positive and map year == -1 to year zero, and shift all negative
200  * years up one. For interval years, we just return the year.
201  */
202 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
203 
204 #define A_D_STR "A.D."
205 #define a_d_STR "a.d."
206 #define AD_STR "AD"
207 #define ad_STR "ad"
208 
209 #define B_C_STR "B.C."
210 #define b_c_STR "b.c."
211 #define BC_STR "BC"
212 #define bc_STR "bc"
213 
214 /*
215  * AD / BC strings for seq_search.
216  *
217  * These are given in two variants, a long form with periods and a standard
218  * form without.
219  *
220  * The array is laid out such that matches for AD have an even index, and
221  * matches for BC have an odd index. So the boolean value for BC is given by
222  * taking the array index of the match, modulo 2.
223  */
224 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
225 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
226 
227 /* ----------
228  * AM / PM
229  * ----------
230  */
231 #define A_M_STR "A.M."
232 #define a_m_STR "a.m."
233 #define AM_STR "AM"
234 #define am_STR "am"
235 
236 #define P_M_STR "P.M."
237 #define p_m_STR "p.m."
238 #define PM_STR "PM"
239 #define pm_STR "pm"
240 
241 /*
242  * AM / PM strings for seq_search.
243  *
244  * These are given in two variants, a long form with periods and a standard
245  * form without.
246  *
247  * The array is laid out such that matches for AM have an even index, and
248  * matches for PM have an odd index. So the boolean value for PM is given by
249  * taking the array index of the match, modulo 2.
250  */
251 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
252 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
253 
254 /* ----------
255  * Months in roman-numeral
256  * (Must be in reverse order for seq_search (in FROM_CHAR), because
257  * 'VIII' must have higher precedence than 'V')
258  * ----------
259  */
260 static const char *const rm_months_upper[] =
261 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
262 
263 static const char *const rm_months_lower[] =
264 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
265 
266 /* ----------
267  * Roman numbers
268  * ----------
269  */
270 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
271 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
272 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
273 
274 /* ----------
275  * Ordinal postfixes
276  * ----------
277  */
278 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
279 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
280 
281 /* ----------
282  * Flags & Options:
283  * ----------
284  */
285 #define ONE_UPPER 1 /* Name */
286 #define ALL_UPPER 2 /* NAME */
287 #define ALL_LOWER 3 /* name */
288 
289 #define FULL_SIZ 0
290 
291 #define MAX_MONTH_LEN 9
292 #define MAX_MON_LEN 3
293 #define MAX_DAY_LEN 9
294 #define MAX_DY_LEN 3
295 #define MAX_RM_LEN 4
296 
297 #define TH_UPPER 1
298 #define TH_LOWER 2
299 
300 /* ----------
301  * Number description struct
302  * ----------
303  */
304 typedef struct
305 {
306  int pre, /* (count) numbers before decimal */
307  post, /* (count) numbers after decimal */
308  lsign, /* want locales sign */
309  flag, /* number parameters */
310  pre_lsign_num, /* tmp value for lsign */
311  multi, /* multiplier for 'V' */
312  zero_start, /* position of first zero */
313  zero_end, /* position of last zero */
314  need_locale; /* needs it locale */
315 } NUMDesc;
316 
317 /* ----------
318  * Flags for NUMBER version
319  * ----------
320  */
321 #define NUM_F_DECIMAL (1 << 1)
322 #define NUM_F_LDECIMAL (1 << 2)
323 #define NUM_F_ZERO (1 << 3)
324 #define NUM_F_BLANK (1 << 4)
325 #define NUM_F_FILLMODE (1 << 5)
326 #define NUM_F_LSIGN (1 << 6)
327 #define NUM_F_BRACKET (1 << 7)
328 #define NUM_F_MINUS (1 << 8)
329 #define NUM_F_PLUS (1 << 9)
330 #define NUM_F_ROMAN (1 << 10)
331 #define NUM_F_MULTI (1 << 11)
332 #define NUM_F_PLUS_POST (1 << 12)
333 #define NUM_F_MINUS_POST (1 << 13)
334 #define NUM_F_EEEE (1 << 14)
335 
336 #define NUM_LSIGN_PRE (-1)
337 #define NUM_LSIGN_POST 1
338 #define NUM_LSIGN_NONE 0
339 
340 /* ----------
341  * Tests
342  * ----------
343  */
344 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
345 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
346 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
347 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
348 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
349 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
350 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
351 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
352 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
353 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
354 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
355 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
356 
357 /* ----------
358  * Format picture cache
359  *
360  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
361  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
362  *
363  * For simplicity, the cache entries are fixed-size, so they allow for the
364  * worst case of a FormatNode for each byte in the picture string.
365  *
366  * The max number of entries in the caches is DCH_CACHE_ENTRIES
367  * resp. NUM_CACHE_ENTRIES.
368  * ----------
369  */
370 #define NUM_CACHE_SIZE 64
371 #define NUM_CACHE_ENTRIES 20
372 #define DCH_CACHE_SIZE 128
373 #define DCH_CACHE_ENTRIES 20
374 
375 typedef struct
376 {
378  char str[DCH_CACHE_SIZE + 1];
379  bool valid;
380  int age;
381 } DCHCacheEntry;
382 
383 typedef struct
384 {
386  char str[NUM_CACHE_SIZE + 1];
387  bool valid;
388  int age;
390 } NUMCacheEntry;
391 
392 /* global cache for date/time format pictures */
394 static int n_DCHCache = 0; /* current number of entries */
395 static int DCHCounter = 0; /* aging-event counter */
396 
397 /* global cache for number format pictures */
399 static int n_NUMCache = 0; /* current number of entries */
400 static int NUMCounter = 0; /* aging-event counter */
401 
402 /* ----------
403  * For char->date/time conversion
404  * ----------
405  */
406 typedef struct
407 {
409  int hh,
410  pm,
411  mi,
412  ss,
413  ssss,
414  d, /* stored as 1-7, Sunday = 1, 0 means missing */
415  dd,
416  ddd,
417  mm,
418  ms,
419  year,
420  bc,
421  ww,
422  w,
423  cc,
424  j,
425  us,
426  yysz, /* is it YY or YYYY ? */
427  clock, /* 12 or 24 hour clock? */
428  tzsign, /* +1, -1 or 0 if timezone info is absent */
429  tzh,
430  tzm;
431 } TmFromChar;
432 
433 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
434 
435 /* ----------
436  * Debug
437  * ----------
438  */
439 #ifdef DEBUG_TO_FROM_CHAR
440 #define DEBUG_TMFC(_X) \
441  elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
442  (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
443  (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
444  (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
445  (_X)->yysz, (_X)->clock);
446 #define DEBUG_TM(_X) \
447  elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
448  (_X)->tm_sec, (_X)->tm_year,\
449  (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
450  (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
451 #else
452 #define DEBUG_TMFC(_X)
453 #define DEBUG_TM(_X)
454 #endif
455 
456 /* ----------
457  * Datetime to char conversion
458  * ----------
459  */
460 typedef struct TmToChar
461 {
462  struct pg_tm tm; /* classic 'tm' struct */
463  fsec_t fsec; /* fractional seconds */
464  const char *tzn; /* timezone */
465 } TmToChar;
466 
467 #define tmtcTm(_X) (&(_X)->tm)
468 #define tmtcTzn(_X) ((_X)->tzn)
469 #define tmtcFsec(_X) ((_X)->fsec)
470 
471 #define ZERO_tm(_X) \
472 do { \
473  (_X)->tm_sec = (_X)->tm_year = (_X)->tm_min = (_X)->tm_wday = \
474  (_X)->tm_hour = (_X)->tm_yday = (_X)->tm_isdst = 0; \
475  (_X)->tm_mday = (_X)->tm_mon = 1; \
476  (_X)->tm_zone = NULL; \
477 } while(0)
478 
479 #define ZERO_tmtc(_X) \
480 do { \
481  ZERO_tm( tmtcTm(_X) ); \
482  tmtcFsec(_X) = 0; \
483  tmtcTzn(_X) = NULL; \
484 } while(0)
485 
486 /*
487  * to_char(time) appears to to_char() as an interval, so this check
488  * is really for interval and time data types.
489  */
490 #define INVALID_FOR_INTERVAL \
491 do { \
492  if (is_interval) \
493  ereport(ERROR, \
494  (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
495  errmsg("invalid format specification for an interval value"), \
496  errhint("Intervals are not tied to specific calendar dates."))); \
497 } while(0)
498 
499 /*****************************************************************************
500  * KeyWord definitions
501  *****************************************************************************/
502 
503 /* ----------
504  * Suffixes:
505  * ----------
506  */
507 #define DCH_S_FM 0x01
508 #define DCH_S_TH 0x02
509 #define DCH_S_th 0x04
510 #define DCH_S_SP 0x08
511 #define DCH_S_TM 0x10
512 
513 /* ----------
514  * Suffix tests
515  * ----------
516  */
517 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
518 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
519 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
520 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
521 
522 /* Oracle toggles FM behavior, we don't; see docs. */
523 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
524 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
525 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
526 
527 /* ----------
528  * Suffixes definition for DATE-TIME TO/FROM CHAR
529  * ----------
530  */
531 #define TM_SUFFIX_LEN 2
532 
533 static const KeySuffix DCH_suff[] = {
534  {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
535  {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
536  {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
537  {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
538  {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
539  {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
540  {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
541  /* last */
542  {NULL, 0, 0, 0}
543 };
544 
545 
546 /* ----------
547  * Format-pictures (KeyWord).
548  *
549  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
550  * complicated -to-> easy:
551  *
552  * (example: "DDD","DD","Day","D" )
553  *
554  * (this specific sort needs the algorithm for sequential search for strings,
555  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
556  * or "HH12"? You must first try "HH12", because "HH" is in string, but
557  * it is not good.
558  *
559  * (!)
560  * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
561  * (!)
562  *
563  * For fast search is used the 'int index[]', index is ascii table from position
564  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
565  * position or -1 if char is not used in the KeyWord. Search example for
566  * string "MM":
567  * 1) see in index to index['M' - 32],
568  * 2) take keywords position (enum DCH_MI) from index
569  * 3) run sequential search in keywords[] from this position
570  *
571  * ----------
572  */
573 
574 typedef enum
575 {
590  DCH_FX, /* global suffix */
671 
672  /* last */
674 } DCH_poz;
675 
676 typedef enum
677 {
714 
715  /* last */
717 } NUM_poz;
718 
719 /* ----------
720  * KeyWords for DATE-TIME version
721  * ----------
722  */
723 static const KeyWord DCH_keywords[] = {
724 /* name, len, id, is_digit, date_mode */
725  {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
726  {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
727  {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
728  {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
729  {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
730  {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
731  {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
732  {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
733  {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
734  {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
735  {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
736  {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
737  {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
738  {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
739  {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* F */
740  {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
741  {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
742  {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
743  {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
744  {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
745  {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
746  {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
747  {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
748  {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
749  {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
750  {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
751  {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
752  {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
753  {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
754  {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
755  {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
756  {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
757  {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
758  {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
759  {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
760  {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
761  {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
762  {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
763  {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
764  {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
765  {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
766  {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
767  {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
768  {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
769  {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
770  {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
771  {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
772  {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
773  {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
774  {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
775  {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
776  {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
777  {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
778  {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
779  {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
780  {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
781  {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
782  {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
783  {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
784  {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
785  {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
786  {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
787  {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
788  {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE}, /* f */
789  {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
790  {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
791  {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
792  {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
793  {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
794  {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
795  {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
796  {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
797  {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
798  {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
799  {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
800  {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
801  {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
802  {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
803  {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
804  {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
805  {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
806  {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
807  {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
808  {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
809  {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
810  {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
811  {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE}, /* t */
812  {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
813  {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
814  {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
815  {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
816  {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
817  {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
818  {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
819  {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
820 
821  /* last */
822  {NULL, 0, 0, 0, 0}
823 };
824 
825 /* ----------
826  * KeyWords for NUMBER version
827  *
828  * The is_digit and date_mode fields are not relevant here.
829  * ----------
830  */
831 static const KeyWord NUM_keywords[] = {
832 /* name, len, id is in Index */
833  {",", 1, NUM_COMMA}, /* , */
834  {".", 1, NUM_DEC}, /* . */
835  {"0", 1, NUM_0}, /* 0 */
836  {"9", 1, NUM_9}, /* 9 */
837  {"B", 1, NUM_B}, /* B */
838  {"C", 1, NUM_C}, /* C */
839  {"D", 1, NUM_D}, /* D */
840  {"EEEE", 4, NUM_E}, /* E */
841  {"FM", 2, NUM_FM}, /* F */
842  {"G", 1, NUM_G}, /* G */
843  {"L", 1, NUM_L}, /* L */
844  {"MI", 2, NUM_MI}, /* M */
845  {"PL", 2, NUM_PL}, /* P */
846  {"PR", 2, NUM_PR},
847  {"RN", 2, NUM_RN}, /* R */
848  {"SG", 2, NUM_SG}, /* S */
849  {"SP", 2, NUM_SP},
850  {"S", 1, NUM_S},
851  {"TH", 2, NUM_TH}, /* T */
852  {"V", 1, NUM_V}, /* V */
853  {"b", 1, NUM_B}, /* b */
854  {"c", 1, NUM_C}, /* c */
855  {"d", 1, NUM_D}, /* d */
856  {"eeee", 4, NUM_E}, /* e */
857  {"fm", 2, NUM_FM}, /* f */
858  {"g", 1, NUM_G}, /* g */
859  {"l", 1, NUM_L}, /* l */
860  {"mi", 2, NUM_MI}, /* m */
861  {"pl", 2, NUM_PL}, /* p */
862  {"pr", 2, NUM_PR},
863  {"rn", 2, NUM_rn}, /* r */
864  {"sg", 2, NUM_SG}, /* s */
865  {"sp", 2, NUM_SP},
866  {"s", 1, NUM_S},
867  {"th", 2, NUM_th}, /* t */
868  {"v", 1, NUM_V}, /* v */
869 
870  /* last */
871  {NULL, 0, 0}
872 };
873 
874 
875 /* ----------
876  * KeyWords index for DATE-TIME version
877  * ----------
878  */
879 static const int DCH_index[KeyWord_INDEX_SIZE] = {
880 /*
881 0 1 2 3 4 5 6 7 8 9
882 */
883  /*---- first 0..31 chars are skipped ----*/
884 
885  -1, -1, -1, -1, -1, -1, -1, -1,
886  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
887  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
888  -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
889  DCH_FX, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
891  -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
892  DCH_day, -1, DCH_fx, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
893  -1, -1, DCH_p_m, DCH_q, DCH_rm, DCH_ssss, DCH_tz, DCH_us, -1, DCH_ww,
894  -1, DCH_y_yyy, -1, -1, -1, -1
895 
896  /*---- chars over 126 are skipped ----*/
897 };
898 
899 /* ----------
900  * KeyWords index for NUMBER version
901  * ----------
902  */
903 static const int NUM_index[KeyWord_INDEX_SIZE] = {
904 /*
905 0 1 2 3 4 5 6 7 8 9
906 */
907  /*---- first 0..31 chars are skipped ----*/
908 
909  -1, -1, -1, -1, -1, -1, -1, -1,
910  -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
911  -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
912  -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
913  NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
914  NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
915  -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
916  NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
917  -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
918  -1, -1, -1, -1, -1, -1
919 
920  /*---- chars over 126 are skipped ----*/
921 };
922 
923 /* ----------
924  * Number processor struct
925  * ----------
926  */
927 typedef struct NUMProc
928 {
930  NUMDesc *Num; /* number description */
931 
932  int sign, /* '-' or '+' */
933  sign_wrote, /* was sign write */
934  num_count, /* number of write digits */
935  num_in, /* is inside number */
936  num_curr, /* current position in number */
937  out_pre_spaces, /* spaces before first digit */
938 
939  read_dec, /* to_number - was read dec. point */
940  read_post, /* to_number - number of dec. digit */
941  read_pre; /* to_number - number non-dec. digit */
942 
943  char *number, /* string with number */
944  *number_p, /* pointer to current number position */
945  *inout, /* in / out buffer */
946  *inout_p, /* pointer to current inout position */
947  *last_relevant, /* last relevant number after decimal point */
948 
949  *L_negative_sign, /* Locale */
950  *L_positive_sign,
951  *decimal,
952  *L_thousands_sep,
953  *L_currency_symbol;
954 } NUMProc;
955 
956 
957 /* ----------
958  * Functions
959  * ----------
960  */
961 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
962  const int *index);
963 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
964 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
965 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
966  const KeySuffix *suf, const int *index, int ver, NUMDesc *Num);
967 
968 static void DCH_to_char(FormatNode *node, bool is_interval,
969  TmToChar *in, char *out, Oid collid);
970 static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out);
971 
972 #ifdef DEBUG_TO_FROM_CHAR
973 static void dump_index(const KeyWord *k, const int *index);
974 static void dump_node(FormatNode *node, int max);
975 #endif
976 
977 static const char *get_th(char *num, int type);
978 static char *str_numth(char *dest, char *num, int type);
979 static int adjust_partial_year_to_2020(int year);
980 static int strspace_len(char *str);
981 static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode);
982 static void from_char_set_int(int *dest, const int value, const FormatNode *node);
983 static int from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node);
984 static int from_char_parse_int(int *dest, char **src, FormatNode *node);
985 static int seq_search(char *name, const char *const *array, int type, int max, int *len);
986 static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node);
987 static void do_to_timestamp(text *date_txt, text *fmt,
988  struct pg_tm *tm, fsec_t *fsec);
989 static char *fill_str(char *str, int c, int max);
990 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
991 static char *int_to_roman(int number);
992 static void NUM_prepare_locale(NUMProc *Np);
993 static char *get_last_relevant_decnum(char *num);
994 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
995 static void NUM_numpart_to_char(NUMProc *Np, int id);
996 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
997  char *number, int input_len, int to_char_out_pre_spaces,
998  int sign, bool is_to_char, Oid collid);
999 static DCHCacheEntry *DCH_cache_getnew(const char *str);
1000 static DCHCacheEntry *DCH_cache_search(const char *str);
1001 static DCHCacheEntry *DCH_cache_fetch(const char *str);
1002 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1003 static NUMCacheEntry *NUM_cache_search(const char *str);
1004 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1005 
1006 
1007 /* ----------
1008  * Fast sequential search, use index for data selection which
1009  * go to seq. cycle (it is very fast for unwanted strings)
1010  * (can't be used binary search in format parsing)
1011  * ----------
1012  */
1013 static const KeyWord *
1014 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1015 {
1016  int poz;
1017 
1018  if (!KeyWord_INDEX_FILTER(*str))
1019  return NULL;
1020 
1021  if ((poz = *(index + (*str - ' '))) > -1)
1022  {
1023  const KeyWord *k = kw + poz;
1024 
1025  do
1026  {
1027  if (strncmp(str, k->name, k->len) == 0)
1028  return k;
1029  k++;
1030  if (!k->name)
1031  return NULL;
1032  } while (*str == *k->name);
1033  }
1034  return NULL;
1035 }
1036 
1037 static const KeySuffix *
1038 suff_search(const char *str, const KeySuffix *suf, int type)
1039 {
1040  const KeySuffix *s;
1041 
1042  for (s = suf; s->name != NULL; s++)
1043  {
1044  if (s->type != type)
1045  continue;
1046 
1047  if (strncmp(str, s->name, s->len) == 0)
1048  return s;
1049  }
1050  return NULL;
1051 }
1052 
1053 /* ----------
1054  * Prepare NUMDesc (number description struct) via FormatNode struct
1055  * ----------
1056  */
1057 static void
1059 {
1060  if (n->type != NODE_TYPE_ACTION)
1061  return;
1062 
1063  if (IS_EEEE(num) && n->key->id != NUM_E)
1064  ereport(ERROR,
1065  (errcode(ERRCODE_SYNTAX_ERROR),
1066  errmsg("\"EEEE\" must be the last pattern used")));
1067 
1068  switch (n->key->id)
1069  {
1070  case NUM_9:
1071  if (IS_BRACKET(num))
1072  ereport(ERROR,
1073  (errcode(ERRCODE_SYNTAX_ERROR),
1074  errmsg("\"9\" must be ahead of \"PR\"")));
1075  if (IS_MULTI(num))
1076  {
1077  ++num->multi;
1078  break;
1079  }
1080  if (IS_DECIMAL(num))
1081  ++num->post;
1082  else
1083  ++num->pre;
1084  break;
1085 
1086  case NUM_0:
1087  if (IS_BRACKET(num))
1088  ereport(ERROR,
1089  (errcode(ERRCODE_SYNTAX_ERROR),
1090  errmsg("\"0\" must be ahead of \"PR\"")));
1091  if (!IS_ZERO(num) && !IS_DECIMAL(num))
1092  {
1093  num->flag |= NUM_F_ZERO;
1094  num->zero_start = num->pre + 1;
1095  }
1096  if (!IS_DECIMAL(num))
1097  ++num->pre;
1098  else
1099  ++num->post;
1100 
1101  num->zero_end = num->pre + num->post;
1102  break;
1103 
1104  case NUM_B:
1105  if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1106  num->flag |= NUM_F_BLANK;
1107  break;
1108 
1109  case NUM_D:
1110  num->flag |= NUM_F_LDECIMAL;
1111  num->need_locale = true;
1112  /* FALLTHROUGH */
1113  case NUM_DEC:
1114  if (IS_DECIMAL(num))
1115  ereport(ERROR,
1116  (errcode(ERRCODE_SYNTAX_ERROR),
1117  errmsg("multiple decimal points")));
1118  if (IS_MULTI(num))
1119  ereport(ERROR,
1120  (errcode(ERRCODE_SYNTAX_ERROR),
1121  errmsg("cannot use \"V\" and decimal point together")));
1122  num->flag |= NUM_F_DECIMAL;
1123  break;
1124 
1125  case NUM_FM:
1126  num->flag |= NUM_F_FILLMODE;
1127  break;
1128 
1129  case NUM_S:
1130  if (IS_LSIGN(num))
1131  ereport(ERROR,
1132  (errcode(ERRCODE_SYNTAX_ERROR),
1133  errmsg("cannot use \"S\" twice")));
1134  if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1135  ereport(ERROR,
1136  (errcode(ERRCODE_SYNTAX_ERROR),
1137  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1138  if (!IS_DECIMAL(num))
1139  {
1140  num->lsign = NUM_LSIGN_PRE;
1141  num->pre_lsign_num = num->pre;
1142  num->need_locale = true;
1143  num->flag |= NUM_F_LSIGN;
1144  }
1145  else if (num->lsign == NUM_LSIGN_NONE)
1146  {
1147  num->lsign = NUM_LSIGN_POST;
1148  num->need_locale = true;
1149  num->flag |= NUM_F_LSIGN;
1150  }
1151  break;
1152 
1153  case NUM_MI:
1154  if (IS_LSIGN(num))
1155  ereport(ERROR,
1156  (errcode(ERRCODE_SYNTAX_ERROR),
1157  errmsg("cannot use \"S\" and \"MI\" together")));
1158  num->flag |= NUM_F_MINUS;
1159  if (IS_DECIMAL(num))
1160  num->flag |= NUM_F_MINUS_POST;
1161  break;
1162 
1163  case NUM_PL:
1164  if (IS_LSIGN(num))
1165  ereport(ERROR,
1166  (errcode(ERRCODE_SYNTAX_ERROR),
1167  errmsg("cannot use \"S\" and \"PL\" together")));
1168  num->flag |= NUM_F_PLUS;
1169  if (IS_DECIMAL(num))
1170  num->flag |= NUM_F_PLUS_POST;
1171  break;
1172 
1173  case NUM_SG:
1174  if (IS_LSIGN(num))
1175  ereport(ERROR,
1176  (errcode(ERRCODE_SYNTAX_ERROR),
1177  errmsg("cannot use \"S\" and \"SG\" together")));
1178  num->flag |= NUM_F_MINUS;
1179  num->flag |= NUM_F_PLUS;
1180  break;
1181 
1182  case NUM_PR:
1183  if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1184  ereport(ERROR,
1185  (errcode(ERRCODE_SYNTAX_ERROR),
1186  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1187  num->flag |= NUM_F_BRACKET;
1188  break;
1189 
1190  case NUM_rn:
1191  case NUM_RN:
1192  num->flag |= NUM_F_ROMAN;
1193  break;
1194 
1195  case NUM_L:
1196  case NUM_G:
1197  num->need_locale = true;
1198  break;
1199 
1200  case NUM_V:
1201  if (IS_DECIMAL(num))
1202  ereport(ERROR,
1203  (errcode(ERRCODE_SYNTAX_ERROR),
1204  errmsg("cannot use \"V\" and decimal point together")));
1205  num->flag |= NUM_F_MULTI;
1206  break;
1207 
1208  case NUM_E:
1209  if (IS_EEEE(num))
1210  ereport(ERROR,
1211  (errcode(ERRCODE_SYNTAX_ERROR),
1212  errmsg("cannot use \"EEEE\" twice")));
1213  if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1214  IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1215  IS_ROMAN(num) || IS_MULTI(num))
1216  ereport(ERROR,
1217  (errcode(ERRCODE_SYNTAX_ERROR),
1218  errmsg("\"EEEE\" is incompatible with other formats"),
1219  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1220  num->flag |= NUM_F_EEEE;
1221  break;
1222  }
1223 }
1224 
1225 /* ----------
1226  * Format parser, search small keywords and keyword's suffixes, and make
1227  * format-node tree.
1228  *
1229  * for DATE-TIME & NUMBER version
1230  * ----------
1231  */
1232 static void
1233 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1234  const KeySuffix *suf, const int *index, int ver, NUMDesc *Num)
1235 {
1236  FormatNode *n;
1237 
1238 #ifdef DEBUG_TO_FROM_CHAR
1239  elog(DEBUG_elog_output, "to_char/number(): run parser");
1240 #endif
1241 
1242  n = node;
1243 
1244  while (*str)
1245  {
1246  int suffix = 0;
1247  const KeySuffix *s;
1248 
1249  /*
1250  * Prefix
1251  */
1252  if (ver == DCH_TYPE &&
1253  (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1254  {
1255  suffix |= s->id;
1256  if (s->len)
1257  str += s->len;
1258  }
1259 
1260  /*
1261  * Keyword
1262  */
1263  if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1264  {
1265  n->type = NODE_TYPE_ACTION;
1266  n->suffix = suffix;
1267  if (n->key->len)
1268  str += n->key->len;
1269 
1270  /*
1271  * NUM version: Prepare global NUMDesc struct
1272  */
1273  if (ver == NUM_TYPE)
1274  NUMDesc_prepare(Num, n);
1275 
1276  /*
1277  * Postfix
1278  */
1279  if (ver == DCH_TYPE && *str &&
1280  (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1281  {
1282  n->suffix |= s->id;
1283  if (s->len)
1284  str += s->len;
1285  }
1286 
1287  n++;
1288  }
1289  else if (*str)
1290  {
1291  int chlen;
1292 
1293  /*
1294  * Process double-quoted literal string, if any
1295  */
1296  if (*str == '"')
1297  {
1298  str++;
1299  while (*str)
1300  {
1301  if (*str == '"')
1302  {
1303  str++;
1304  break;
1305  }
1306  /* backslash quotes the next character, if any */
1307  if (*str == '\\' && *(str + 1))
1308  str++;
1309  chlen = pg_mblen(str);
1310  n->type = NODE_TYPE_CHAR;
1311  memcpy(n->character, str, chlen);
1312  n->character[chlen] = '\0';
1313  n->key = NULL;
1314  n->suffix = 0;
1315  n++;
1316  str += chlen;
1317  }
1318  }
1319  else
1320  {
1321  /*
1322  * Outside double-quoted strings, backslash is only special if
1323  * it immediately precedes a double quote.
1324  */
1325  if (*str == '\\' && *(str + 1) == '"')
1326  str++;
1327  chlen = pg_mblen(str);
1328  n->type = NODE_TYPE_CHAR;
1329  memcpy(n->character, str, chlen);
1330  n->character[chlen] = '\0';
1331  n->key = NULL;
1332  n->suffix = 0;
1333  n++;
1334  str += chlen;
1335  }
1336  }
1337  }
1338 
1339  n->type = NODE_TYPE_END;
1340  n->suffix = 0;
1341 }
1342 
1343 /* ----------
1344  * DEBUG: Dump the FormatNode Tree (debug)
1345  * ----------
1346  */
1347 #ifdef DEBUG_TO_FROM_CHAR
1348 
1349 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1350 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1351 
1352 static void
1353 dump_node(FormatNode *node, int max)
1354 {
1355  FormatNode *n;
1356  int a;
1357 
1358  elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1359 
1360  for (a = 0, n = node; a <= max; n++, a++)
1361  {
1362  if (n->type == NODE_TYPE_ACTION)
1363  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1364  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1365  else if (n->type == NODE_TYPE_CHAR)
1366  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1367  a, n->character);
1368  else if (n->type == NODE_TYPE_END)
1369  {
1370  elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1371  return;
1372  }
1373  else
1374  elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1375  }
1376 }
1377 #endif /* DEBUG */
1378 
1379 /*****************************************************************************
1380  * Private utils
1381  *****************************************************************************/
1382 
1383 /* ----------
1384  * Return ST/ND/RD/TH for simple (1..9) numbers
1385  * type --> 0 upper, 1 lower
1386  * ----------
1387  */
1388 static const char *
1389 get_th(char *num, int type)
1390 {
1391  int len = strlen(num),
1392  last,
1393  seclast;
1394 
1395  last = *(num + (len - 1));
1396  if (!isdigit((unsigned char) last))
1397  ereport(ERROR,
1398  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1399  errmsg("\"%s\" is not a number", num)));
1400 
1401  /*
1402  * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1403  * 'ST/st', 'ND/nd', 'RD/rd', respectively
1404  */
1405  if ((len > 1) && ((seclast = num[len - 2]) == '1'))
1406  last = 0;
1407 
1408  switch (last)
1409  {
1410  case '1':
1411  if (type == TH_UPPER)
1412  return numTH[0];
1413  return numth[0];
1414  case '2':
1415  if (type == TH_UPPER)
1416  return numTH[1];
1417  return numth[1];
1418  case '3':
1419  if (type == TH_UPPER)
1420  return numTH[2];
1421  return numth[2];
1422  default:
1423  if (type == TH_UPPER)
1424  return numTH[3];
1425  return numth[3];
1426  }
1427 }
1428 
1429 /* ----------
1430  * Convert string-number to ordinal string-number
1431  * type --> 0 upper, 1 lower
1432  * ----------
1433  */
1434 static char *
1435 str_numth(char *dest, char *num, int type)
1436 {
1437  if (dest != num)
1438  strcpy(dest, num);
1439  strcat(dest, get_th(num, type));
1440  return dest;
1441 }
1442 
1443 /*****************************************************************************
1444  * upper/lower/initcap functions
1445  *****************************************************************************/
1446 
1447 #ifdef USE_ICU
1448 
1449 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1450  const UChar *src, int32_t srcLength,
1451  const char *locale,
1452  UErrorCode *pErrorCode);
1453 
1454 static int32_t
1455 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1456  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1457 {
1458  UErrorCode status;
1459  int32_t len_dest;
1460 
1461  len_dest = len_source; /* try first with same length */
1462  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1463  status = U_ZERO_ERROR;
1464  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1465  mylocale->info.icu.locale, &status);
1466  if (status == U_BUFFER_OVERFLOW_ERROR)
1467  {
1468  /* try again with adjusted length */
1469  pfree(*buff_dest);
1470  *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1471  status = U_ZERO_ERROR;
1472  len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1473  mylocale->info.icu.locale, &status);
1474  }
1475  if (U_FAILURE(status))
1476  ereport(ERROR,
1477  (errmsg("case conversion failed: %s", u_errorName(status))));
1478  return len_dest;
1479 }
1480 
1481 static int32_t
1482 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1483  const UChar *src, int32_t srcLength,
1484  const char *locale,
1485  UErrorCode *pErrorCode)
1486 {
1487  return u_strToTitle(dest, destCapacity, src, srcLength,
1488  NULL, locale, pErrorCode);
1489 }
1490 
1491 #endif /* USE_ICU */
1492 
1493 /*
1494  * If the system provides the needed functions for wide-character manipulation
1495  * (which are all standardized by C99), then we implement upper/lower/initcap
1496  * using wide-character functions, if necessary. Otherwise we use the
1497  * traditional <ctype.h> functions, which of course will not work as desired
1498  * in multibyte character sets. Note that in either case we are effectively
1499  * assuming that the database character encoding matches the encoding implied
1500  * by LC_CTYPE.
1501  *
1502  * If the system provides locale_t and associated functions (which are
1503  * standardized by Open Group's XBD), we can support collations that are
1504  * neither default nor C. The code is written to handle both combinations
1505  * of have-wide-characters and have-locale_t, though it's rather unlikely
1506  * a platform would have the latter without the former.
1507  */
1508 
1509 /*
1510  * collation-aware, wide-character-aware lower function
1511  *
1512  * We pass the number of bytes so we can pass varlena and char*
1513  * to this function. The result is a palloc'd, null-terminated string.
1514  */
1515 char *
1516 str_tolower(const char *buff, size_t nbytes, Oid collid)
1517 {
1518  char *result;
1519 
1520  if (!buff)
1521  return NULL;
1522 
1523  /* C/POSIX collations use this path regardless of database encoding */
1524  if (lc_ctype_is_c(collid))
1525  {
1526  result = asc_tolower(buff, nbytes);
1527  }
1528  else
1529  {
1530  pg_locale_t mylocale = 0;
1531 
1532  if (collid != DEFAULT_COLLATION_OID)
1533  {
1534  if (!OidIsValid(collid))
1535  {
1536  /*
1537  * This typically means that the parser could not resolve a
1538  * conflict of implicit collations, so report it that way.
1539  */
1540  ereport(ERROR,
1541  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1542  errmsg("could not determine which collation to use for lower() function"),
1543  errhint("Use the COLLATE clause to set the collation explicitly.")));
1544  }
1545  mylocale = pg_newlocale_from_collation(collid);
1546  }
1547 
1548 #ifdef USE_ICU
1549  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1550  {
1551  int32_t len_uchar;
1552  int32_t len_conv;
1553  UChar *buff_uchar;
1554  UChar *buff_conv;
1555 
1556  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1557  len_conv = icu_convert_case(u_strToLower, mylocale,
1558  &buff_conv, buff_uchar, len_uchar);
1559  icu_from_uchar(&result, buff_conv, len_conv);
1560  pfree(buff_uchar);
1561  }
1562  else
1563 #endif
1564  {
1566  {
1567  wchar_t *workspace;
1568  size_t curr_char;
1569  size_t result_size;
1570 
1571  /* Overflow paranoia */
1572  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1573  ereport(ERROR,
1574  (errcode(ERRCODE_OUT_OF_MEMORY),
1575  errmsg("out of memory")));
1576 
1577  /* Output workspace cannot have more codes than input bytes */
1578  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1579 
1580  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1581 
1582  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1583  {
1584 #ifdef HAVE_LOCALE_T
1585  if (mylocale)
1586  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1587  else
1588 #endif
1589  workspace[curr_char] = towlower(workspace[curr_char]);
1590  }
1591 
1592  /*
1593  * Make result large enough; case change might change number
1594  * of bytes
1595  */
1596  result_size = curr_char * pg_database_encoding_max_length() + 1;
1597  result = palloc(result_size);
1598 
1599  wchar2char(result, workspace, result_size, mylocale);
1600  pfree(workspace);
1601  }
1602  else
1603  {
1604  char *p;
1605 
1606  result = pnstrdup(buff, nbytes);
1607 
1608  /*
1609  * Note: we assume that tolower_l() will not be so broken as
1610  * to need an isupper_l() guard test. When using the default
1611  * collation, we apply the traditional Postgres behavior that
1612  * forces ASCII-style treatment of I/i, but in non-default
1613  * collations you get exactly what the collation says.
1614  */
1615  for (p = result; *p; p++)
1616  {
1617 #ifdef HAVE_LOCALE_T
1618  if (mylocale)
1619  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1620  else
1621 #endif
1622  *p = pg_tolower((unsigned char) *p);
1623  }
1624  }
1625  }
1626  }
1627 
1628  return result;
1629 }
1630 
1631 /*
1632  * collation-aware, wide-character-aware upper function
1633  *
1634  * We pass the number of bytes so we can pass varlena and char*
1635  * to this function. The result is a palloc'd, null-terminated string.
1636  */
1637 char *
1638 str_toupper(const char *buff, size_t nbytes, Oid collid)
1639 {
1640  char *result;
1641 
1642  if (!buff)
1643  return NULL;
1644 
1645  /* C/POSIX collations use this path regardless of database encoding */
1646  if (lc_ctype_is_c(collid))
1647  {
1648  result = asc_toupper(buff, nbytes);
1649  }
1650  else
1651  {
1652  pg_locale_t mylocale = 0;
1653 
1654  if (collid != DEFAULT_COLLATION_OID)
1655  {
1656  if (!OidIsValid(collid))
1657  {
1658  /*
1659  * This typically means that the parser could not resolve a
1660  * conflict of implicit collations, so report it that way.
1661  */
1662  ereport(ERROR,
1663  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1664  errmsg("could not determine which collation to use for upper() function"),
1665  errhint("Use the COLLATE clause to set the collation explicitly.")));
1666  }
1667  mylocale = pg_newlocale_from_collation(collid);
1668  }
1669 
1670 #ifdef USE_ICU
1671  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1672  {
1673  int32_t len_uchar,
1674  len_conv;
1675  UChar *buff_uchar;
1676  UChar *buff_conv;
1677 
1678  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1679  len_conv = icu_convert_case(u_strToUpper, mylocale,
1680  &buff_conv, buff_uchar, len_uchar);
1681  icu_from_uchar(&result, buff_conv, len_conv);
1682  pfree(buff_uchar);
1683  }
1684  else
1685 #endif
1686  {
1688  {
1689  wchar_t *workspace;
1690  size_t curr_char;
1691  size_t result_size;
1692 
1693  /* Overflow paranoia */
1694  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1695  ereport(ERROR,
1696  (errcode(ERRCODE_OUT_OF_MEMORY),
1697  errmsg("out of memory")));
1698 
1699  /* Output workspace cannot have more codes than input bytes */
1700  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1701 
1702  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1703 
1704  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1705  {
1706 #ifdef HAVE_LOCALE_T
1707  if (mylocale)
1708  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1709  else
1710 #endif
1711  workspace[curr_char] = towupper(workspace[curr_char]);
1712  }
1713 
1714  /*
1715  * Make result large enough; case change might change number
1716  * of bytes
1717  */
1718  result_size = curr_char * pg_database_encoding_max_length() + 1;
1719  result = palloc(result_size);
1720 
1721  wchar2char(result, workspace, result_size, mylocale);
1722  pfree(workspace);
1723  }
1724  else
1725  {
1726  char *p;
1727 
1728  result = pnstrdup(buff, nbytes);
1729 
1730  /*
1731  * Note: we assume that toupper_l() will not be so broken as
1732  * to need an islower_l() guard test. When using the default
1733  * collation, we apply the traditional Postgres behavior that
1734  * forces ASCII-style treatment of I/i, but in non-default
1735  * collations you get exactly what the collation says.
1736  */
1737  for (p = result; *p; p++)
1738  {
1739 #ifdef HAVE_LOCALE_T
1740  if (mylocale)
1741  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1742  else
1743 #endif
1744  *p = pg_toupper((unsigned char) *p);
1745  }
1746  }
1747  }
1748  }
1749 
1750  return result;
1751 }
1752 
1753 /*
1754  * collation-aware, wide-character-aware initcap function
1755  *
1756  * We pass the number of bytes so we can pass varlena and char*
1757  * to this function. The result is a palloc'd, null-terminated string.
1758  */
1759 char *
1760 str_initcap(const char *buff, size_t nbytes, Oid collid)
1761 {
1762  char *result;
1763  int wasalnum = false;
1764 
1765  if (!buff)
1766  return NULL;
1767 
1768  /* C/POSIX collations use this path regardless of database encoding */
1769  if (lc_ctype_is_c(collid))
1770  {
1771  result = asc_initcap(buff, nbytes);
1772  }
1773  else
1774  {
1775  pg_locale_t mylocale = 0;
1776 
1777  if (collid != DEFAULT_COLLATION_OID)
1778  {
1779  if (!OidIsValid(collid))
1780  {
1781  /*
1782  * This typically means that the parser could not resolve a
1783  * conflict of implicit collations, so report it that way.
1784  */
1785  ereport(ERROR,
1786  (errcode(ERRCODE_INDETERMINATE_COLLATION),
1787  errmsg("could not determine which collation to use for initcap() function"),
1788  errhint("Use the COLLATE clause to set the collation explicitly.")));
1789  }
1790  mylocale = pg_newlocale_from_collation(collid);
1791  }
1792 
1793 #ifdef USE_ICU
1794  if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
1795  {
1796  int32_t len_uchar,
1797  len_conv;
1798  UChar *buff_uchar;
1799  UChar *buff_conv;
1800 
1801  len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1802  len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
1803  &buff_conv, buff_uchar, len_uchar);
1804  icu_from_uchar(&result, buff_conv, len_conv);
1805  pfree(buff_uchar);
1806  }
1807  else
1808 #endif
1809  {
1811  {
1812  wchar_t *workspace;
1813  size_t curr_char;
1814  size_t result_size;
1815 
1816  /* Overflow paranoia */
1817  if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1818  ereport(ERROR,
1819  (errcode(ERRCODE_OUT_OF_MEMORY),
1820  errmsg("out of memory")));
1821 
1822  /* Output workspace cannot have more codes than input bytes */
1823  workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1824 
1825  char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1826 
1827  for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1828  {
1829 #ifdef HAVE_LOCALE_T
1830  if (mylocale)
1831  {
1832  if (wasalnum)
1833  workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1834  else
1835  workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1836  wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
1837  }
1838  else
1839 #endif
1840  {
1841  if (wasalnum)
1842  workspace[curr_char] = towlower(workspace[curr_char]);
1843  else
1844  workspace[curr_char] = towupper(workspace[curr_char]);
1845  wasalnum = iswalnum(workspace[curr_char]);
1846  }
1847  }
1848 
1849  /*
1850  * Make result large enough; case change might change number
1851  * of bytes
1852  */
1853  result_size = curr_char * pg_database_encoding_max_length() + 1;
1854  result = palloc(result_size);
1855 
1856  wchar2char(result, workspace, result_size, mylocale);
1857  pfree(workspace);
1858  }
1859  else
1860  {
1861  char *p;
1862 
1863  result = pnstrdup(buff, nbytes);
1864 
1865  /*
1866  * Note: we assume that toupper_l()/tolower_l() will not be so
1867  * broken as to need guard tests. When using the default
1868  * collation, we apply the traditional Postgres behavior that
1869  * forces ASCII-style treatment of I/i, but in non-default
1870  * collations you get exactly what the collation says.
1871  */
1872  for (p = result; *p; p++)
1873  {
1874 #ifdef HAVE_LOCALE_T
1875  if (mylocale)
1876  {
1877  if (wasalnum)
1878  *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1879  else
1880  *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1881  wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
1882  }
1883  else
1884 #endif
1885  {
1886  if (wasalnum)
1887  *p = pg_tolower((unsigned char) *p);
1888  else
1889  *p = pg_toupper((unsigned char) *p);
1890  wasalnum = isalnum((unsigned char) *p);
1891  }
1892  }
1893  }
1894  }
1895  }
1896 
1897  return result;
1898 }
1899 
1900 /*
1901  * ASCII-only lower function
1902  *
1903  * We pass the number of bytes so we can pass varlena and char*
1904  * to this function. The result is a palloc'd, null-terminated string.
1905  */
1906 char *
1907 asc_tolower(const char *buff, size_t nbytes)
1908 {
1909  char *result;
1910  char *p;
1911 
1912  if (!buff)
1913  return NULL;
1914 
1915  result = pnstrdup(buff, nbytes);
1916 
1917  for (p = result; *p; p++)
1918  *p = pg_ascii_tolower((unsigned char) *p);
1919 
1920  return result;
1921 }
1922 
1923 /*
1924  * ASCII-only upper function
1925  *
1926  * We pass the number of bytes so we can pass varlena and char*
1927  * to this function. The result is a palloc'd, null-terminated string.
1928  */
1929 char *
1930 asc_toupper(const char *buff, size_t nbytes)
1931 {
1932  char *result;
1933  char *p;
1934 
1935  if (!buff)
1936  return NULL;
1937 
1938  result = pnstrdup(buff, nbytes);
1939 
1940  for (p = result; *p; p++)
1941  *p = pg_ascii_toupper((unsigned char) *p);
1942 
1943  return result;
1944 }
1945 
1946 /*
1947  * ASCII-only initcap function
1948  *
1949  * We pass the number of bytes so we can pass varlena and char*
1950  * to this function. The result is a palloc'd, null-terminated string.
1951  */
1952 char *
1953 asc_initcap(const char *buff, size_t nbytes)
1954 {
1955  char *result;
1956  char *p;
1957  int wasalnum = false;
1958 
1959  if (!buff)
1960  return NULL;
1961 
1962  result = pnstrdup(buff, nbytes);
1963 
1964  for (p = result; *p; p++)
1965  {
1966  char c;
1967 
1968  if (wasalnum)
1969  *p = c = pg_ascii_tolower((unsigned char) *p);
1970  else
1971  *p = c = pg_ascii_toupper((unsigned char) *p);
1972  /* we don't trust isalnum() here */
1973  wasalnum = ((c >= 'A' && c <= 'Z') ||
1974  (c >= 'a' && c <= 'z') ||
1975  (c >= '0' && c <= '9'));
1976  }
1977 
1978  return result;
1979 }
1980 
1981 /* convenience routines for when the input is null-terminated */
1982 
1983 static char *
1984 str_tolower_z(const char *buff, Oid collid)
1985 {
1986  return str_tolower(buff, strlen(buff), collid);
1987 }
1988 
1989 static char *
1990 str_toupper_z(const char *buff, Oid collid)
1991 {
1992  return str_toupper(buff, strlen(buff), collid);
1993 }
1994 
1995 static char *
1996 str_initcap_z(const char *buff, Oid collid)
1997 {
1998  return str_initcap(buff, strlen(buff), collid);
1999 }
2000 
2001 static char *
2002 asc_tolower_z(const char *buff)
2003 {
2004  return asc_tolower(buff, strlen(buff));
2005 }
2006 
2007 static char *
2008 asc_toupper_z(const char *buff)
2009 {
2010  return asc_toupper(buff, strlen(buff));
2011 }
2012 
2013 /* asc_initcap_z is not currently needed */
2014 
2015 
2016 /* ----------
2017  * Skip TM / th in FROM_CHAR
2018  *
2019  * If S_THth is on, skip two chars, assuming there are two available
2020  * ----------
2021  */
2022 #define SKIP_THth(ptr, _suf) \
2023  do { \
2024  if (S_THth(_suf)) \
2025  { \
2026  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2027  if (*(ptr)) (ptr) += pg_mblen(ptr); \
2028  } \
2029  } while (0)
2030 
2031 
2032 #ifdef DEBUG_TO_FROM_CHAR
2033 /* -----------
2034  * DEBUG: Call for debug and for index checking; (Show ASCII char
2035  * and defined keyword for each used position
2036  * ----------
2037  */
2038 static void
2039 dump_index(const KeyWord *k, const int *index)
2040 {
2041  int i,
2042  count = 0,
2043  free_i = 0;
2044 
2045  elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2046 
2047  for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2048  {
2049  if (index[i] != -1)
2050  {
2051  elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2052  count++;
2053  }
2054  else
2055  {
2056  free_i++;
2057  elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2058  }
2059  }
2060  elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2061  count, free_i);
2062 }
2063 #endif /* DEBUG */
2064 
2065 /* ----------
2066  * Return true if next format picture is not digit value
2067  * ----------
2068  */
2069 static bool
2071 {
2072  if (n->type == NODE_TYPE_END)
2073  return false;
2074 
2075  if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2076  return true;
2077 
2078  /*
2079  * Next node
2080  */
2081  n++;
2082 
2083  /* end of format string is treated like a non-digit separator */
2084  if (n->type == NODE_TYPE_END)
2085  return true;
2086 
2087  if (n->type == NODE_TYPE_ACTION)
2088  {
2089  if (n->key->is_digit)
2090  return false;
2091 
2092  return true;
2093  }
2094  else if (n->character[1] == '\0' &&
2095  isdigit((unsigned char) n->character[0]))
2096  return false;
2097 
2098  return true; /* some non-digit input (separator) */
2099 }
2100 
2101 
2102 static int
2104 {
2105  /*
2106  * Adjust all dates toward 2020; this is effectively what happens when we
2107  * assume '70' is 1970 and '69' is 2069.
2108  */
2109  /* Force 0-69 into the 2000's */
2110  if (year < 70)
2111  return year + 2000;
2112  /* Force 70-99 into the 1900's */
2113  else if (year < 100)
2114  return year + 1900;
2115  /* Force 100-519 into the 2000's */
2116  else if (year < 520)
2117  return year + 2000;
2118  /* Force 520-999 into the 1000's */
2119  else if (year < 1000)
2120  return year + 1000;
2121  else
2122  return year;
2123 }
2124 
2125 
2126 static int
2128 {
2129  int len = 0;
2130 
2131  while (*str && isspace((unsigned char) *str))
2132  {
2133  str++;
2134  len++;
2135  }
2136  return len;
2137 }
2138 
2139 /*
2140  * Set the date mode of a from-char conversion.
2141  *
2142  * Puke if the date mode has already been set, and the caller attempts to set
2143  * it to a conflicting mode.
2144  */
2145 static void
2147 {
2148  if (mode != FROM_CHAR_DATE_NONE)
2149  {
2150  if (tmfc->mode == FROM_CHAR_DATE_NONE)
2151  tmfc->mode = mode;
2152  else if (tmfc->mode != mode)
2153  ereport(ERROR,
2154  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2155  errmsg("invalid combination of date conventions"),
2156  errhint("Do not mix Gregorian and ISO week date "
2157  "conventions in a formatting template.")));
2158  }
2159 }
2160 
2161 /*
2162  * Set the integer pointed to by 'dest' to the given value.
2163  *
2164  * Puke if the destination integer has previously been set to some other
2165  * non-zero value.
2166  */
2167 static void
2168 from_char_set_int(int *dest, const int value, const FormatNode *node)
2169 {
2170  if (*dest != 0 && *dest != value)
2171  ereport(ERROR,
2172  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2173  errmsg("conflicting values for \"%s\" field in formatting string",
2174  node->key->name),
2175  errdetail("This value contradicts a previous setting for "
2176  "the same field type.")));
2177  *dest = value;
2178 }
2179 
2180 /*
2181  * Read a single integer from the source string, into the int pointed to by
2182  * 'dest'. If 'dest' is NULL, the result is discarded.
2183  *
2184  * In fixed-width mode (the node does not have the FM suffix), consume at most
2185  * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2186  *
2187  * We use strtol() to recover the integer value from the source string, in
2188  * accordance with the given FormatNode.
2189  *
2190  * If the conversion completes successfully, src will have been advanced to
2191  * point at the character immediately following the last character used in the
2192  * conversion.
2193  *
2194  * Return the number of characters consumed.
2195  *
2196  * Note that from_char_parse_int() provides a more convenient wrapper where
2197  * the length of the field is the same as the length of the format keyword (as
2198  * with DD and MI).
2199  */
2200 static int
2201 from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node)
2202 {
2203  long result;
2204  char copy[DCH_MAX_ITEM_SIZ + 1];
2205  char *init = *src;
2206  int used;
2207 
2208  /*
2209  * Skip any whitespace before parsing the integer.
2210  */
2211  *src += strspace_len(*src);
2212 
2213  Assert(len <= DCH_MAX_ITEM_SIZ);
2214  used = (int) strlcpy(copy, *src, len + 1);
2215 
2216  if (S_FM(node->suffix) || is_next_separator(node))
2217  {
2218  /*
2219  * This node is in Fill Mode, or the next node is known to be a
2220  * non-digit value, so we just slurp as many characters as we can get.
2221  */
2222  errno = 0;
2223  result = strtol(init, src, 10);
2224  }
2225  else
2226  {
2227  /*
2228  * We need to pull exactly the number of characters given in 'len' out
2229  * of the string, and convert those.
2230  */
2231  char *last;
2232 
2233  if (used < len)
2234  ereport(ERROR,
2235  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2236  errmsg("source string too short for \"%s\" formatting field",
2237  node->key->name),
2238  errdetail("Field requires %d characters, but only %d "
2239  "remain.",
2240  len, used),
2241  errhint("If your source string is not fixed-width, try "
2242  "using the \"FM\" modifier.")));
2243 
2244  errno = 0;
2245  result = strtol(copy, &last, 10);
2246  used = last - copy;
2247 
2248  if (used > 0 && used < len)
2249  ereport(ERROR,
2250  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2251  errmsg("invalid value \"%s\" for \"%s\"",
2252  copy, node->key->name),
2253  errdetail("Field requires %d characters, but only %d "
2254  "could be parsed.", len, used),
2255  errhint("If your source string is not fixed-width, try "
2256  "using the \"FM\" modifier.")));
2257 
2258  *src += used;
2259  }
2260 
2261  if (*src == init)
2262  ereport(ERROR,
2263  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2264  errmsg("invalid value \"%s\" for \"%s\"",
2265  copy, node->key->name),
2266  errdetail("Value must be an integer.")));
2267 
2268  if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2269  ereport(ERROR,
2270  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2271  errmsg("value for \"%s\" in source string is out of range",
2272  node->key->name),
2273  errdetail("Value must be in the range %d to %d.",
2274  INT_MIN, INT_MAX)));
2275 
2276  if (dest != NULL)
2277  from_char_set_int(dest, (int) result, node);
2278  return *src - init;
2279 }
2280 
2281 /*
2282  * Call from_char_parse_int_len(), using the length of the format keyword as
2283  * the expected length of the field.
2284  *
2285  * Don't call this function if the field differs in length from the format
2286  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2287  * In such cases, call from_char_parse_int_len() instead to specify the
2288  * required length explicitly.
2289  */
2290 static int
2291 from_char_parse_int(int *dest, char **src, FormatNode *node)
2292 {
2293  return from_char_parse_int_len(dest, src, node->key->len, node);
2294 }
2295 
2296 /* ----------
2297  * Sequential search with to upper/lower conversion
2298  * ----------
2299  */
2300 static int
2301 seq_search(char *name, const char *const *array, int type, int max, int *len)
2302 {
2303  const char *p;
2304  const char *const *a;
2305  char *n;
2306  int last,
2307  i;
2308 
2309  *len = 0;
2310 
2311  if (!*name)
2312  return -1;
2313 
2314  /* set first char */
2315  if (type == ONE_UPPER || type == ALL_UPPER)
2316  *name = pg_toupper((unsigned char) *name);
2317  else if (type == ALL_LOWER)
2318  *name = pg_tolower((unsigned char) *name);
2319 
2320  for (last = 0, a = array; *a != NULL; a++)
2321  {
2322  /* compare first chars */
2323  if (*name != **a)
2324  continue;
2325 
2326  for (i = 1, p = *a + 1, n = name + 1;; n++, p++, i++)
2327  {
2328  /* search fragment (max) only */
2329  if (max && i == max)
2330  {
2331  *len = i;
2332  return a - array;
2333  }
2334  /* full size */
2335  if (*p == '\0')
2336  {
2337  *len = i;
2338  return a - array;
2339  }
2340  /* Not found in array 'a' */
2341  if (*n == '\0')
2342  break;
2343 
2344  /*
2345  * Convert (but convert new chars only)
2346  */
2347  if (i > last)
2348  {
2349  if (type == ONE_UPPER || type == ALL_LOWER)
2350  *n = pg_tolower((unsigned char) *n);
2351  else if (type == ALL_UPPER)
2352  *n = pg_toupper((unsigned char) *n);
2353  last = i;
2354  }
2355 
2356 #ifdef DEBUG_TO_FROM_CHAR
2357  elog(DEBUG_elog_output, "N: %c, P: %c, A: %s (%s)",
2358  *n, *p, *a, name);
2359 #endif
2360  if (*n != *p)
2361  break;
2362  }
2363  }
2364 
2365  return -1;
2366 }
2367 
2368 /*
2369  * Perform a sequential search in 'array' for text matching the first 'max'
2370  * characters of the source string.
2371  *
2372  * If a match is found, copy the array index of the match into the integer
2373  * pointed to by 'dest', advance 'src' to the end of the part of the string
2374  * which matched, and return the number of characters consumed.
2375  *
2376  * If the string doesn't match, throw an error.
2377  */
2378 static int
2379 from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max,
2380  FormatNode *node)
2381 {
2382  int len;
2383 
2384  *dest = seq_search(*src, array, type, max, &len);
2385  if (len <= 0)
2386  {
2387  char copy[DCH_MAX_ITEM_SIZ + 1];
2388 
2389  Assert(max <= DCH_MAX_ITEM_SIZ);
2390  strlcpy(copy, *src, max + 1);
2391 
2392  ereport(ERROR,
2393  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2394  errmsg("invalid value \"%s\" for \"%s\"",
2395  copy, node->key->name),
2396  errdetail("The given value did not match any of the allowed "
2397  "values for this field.")));
2398  }
2399  *src += len;
2400  return len;
2401 }
2402 
2403 /* ----------
2404  * Process a TmToChar struct as denoted by a list of FormatNodes.
2405  * The formatted data is written to the string pointed to by 'out'.
2406  * ----------
2407  */
2408 static void
2409 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2410 {
2411  FormatNode *n;
2412  char *s;
2413  struct pg_tm *tm = &in->tm;
2414  int i;
2415 
2416  /* cache localized days and months */
2418 
2419  s = out;
2420  for (n = node; n->type != NODE_TYPE_END; n++)
2421  {
2422  if (n->type != NODE_TYPE_ACTION)
2423  {
2424  strcpy(s, n->character);
2425  s += strlen(s);
2426  continue;
2427  }
2428 
2429  switch (n->key->id)
2430  {
2431  case DCH_A_M:
2432  case DCH_P_M:
2433  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2434  ? P_M_STR : A_M_STR);
2435  s += strlen(s);
2436  break;
2437  case DCH_AM:
2438  case DCH_PM:
2439  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2440  ? PM_STR : AM_STR);
2441  s += strlen(s);
2442  break;
2443  case DCH_a_m:
2444  case DCH_p_m:
2445  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2446  ? p_m_STR : a_m_STR);
2447  s += strlen(s);
2448  break;
2449  case DCH_am:
2450  case DCH_pm:
2451  strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2452  ? pm_STR : am_STR);
2453  s += strlen(s);
2454  break;
2455  case DCH_HH:
2456  case DCH_HH12:
2457 
2458  /*
2459  * display time as shown on a 12-hour clock, even for
2460  * intervals
2461  */
2462  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2463  tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ? HOURS_PER_DAY / 2 :
2464  tm->tm_hour % (HOURS_PER_DAY / 2));
2465  if (S_THth(n->suffix))
2466  str_numth(s, s, S_TH_TYPE(n->suffix));
2467  s += strlen(s);
2468  break;
2469  case DCH_HH24:
2470  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2471  tm->tm_hour);
2472  if (S_THth(n->suffix))
2473  str_numth(s, s, S_TH_TYPE(n->suffix));
2474  s += strlen(s);
2475  break;
2476  case DCH_MI:
2477  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2478  tm->tm_min);
2479  if (S_THth(n->suffix))
2480  str_numth(s, s, S_TH_TYPE(n->suffix));
2481  s += strlen(s);
2482  break;
2483  case DCH_SS:
2484  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2485  tm->tm_sec);
2486  if (S_THth(n->suffix))
2487  str_numth(s, s, S_TH_TYPE(n->suffix));
2488  s += strlen(s);
2489  break;
2490  case DCH_MS: /* millisecond */
2491  sprintf(s, "%03d", (int) (in->fsec / INT64CONST(1000)));
2492  if (S_THth(n->suffix))
2493  str_numth(s, s, S_TH_TYPE(n->suffix));
2494  s += strlen(s);
2495  break;
2496  case DCH_US: /* microsecond */
2497  sprintf(s, "%06d", (int) in->fsec);
2498  if (S_THth(n->suffix))
2499  str_numth(s, s, S_TH_TYPE(n->suffix));
2500  s += strlen(s);
2501  break;
2502  case DCH_SSSS:
2503  sprintf(s, "%d", tm->tm_hour * SECS_PER_HOUR +
2504  tm->tm_min * SECS_PER_MINUTE +
2505  tm->tm_sec);
2506  if (S_THth(n->suffix))
2507  str_numth(s, s, S_TH_TYPE(n->suffix));
2508  s += strlen(s);
2509  break;
2510  case DCH_tz:
2512  if (tmtcTzn(in))
2513  {
2514  /* We assume here that timezone names aren't localized */
2515  char *p = asc_tolower_z(tmtcTzn(in));
2516 
2517  strcpy(s, p);
2518  pfree(p);
2519  s += strlen(s);
2520  }
2521  break;
2522  case DCH_TZ:
2524  if (tmtcTzn(in))
2525  {
2526  strcpy(s, tmtcTzn(in));
2527  s += strlen(s);
2528  }
2529  break;
2530  case DCH_TZH:
2532  sprintf(s, "%c%02d",
2533  (tm->tm_gmtoff >= 0) ? '+' : '-',
2534  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2535  s += strlen(s);
2536  break;
2537  case DCH_TZM:
2539  sprintf(s, "%02d",
2540  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2541  s += strlen(s);
2542  break;
2543  case DCH_OF:
2545  sprintf(s, "%c%0*d",
2546  (tm->tm_gmtoff >= 0) ? '+' : '-',
2547  S_FM(n->suffix) ? 0 : 2,
2548  abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2549  s += strlen(s);
2550  if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2551  {
2552  sprintf(s, ":%02d",
2553  (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2554  s += strlen(s);
2555  }
2556  break;
2557  case DCH_A_D:
2558  case DCH_B_C:
2560  strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2561  s += strlen(s);
2562  break;
2563  case DCH_AD:
2564  case DCH_BC:
2566  strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2567  s += strlen(s);
2568  break;
2569  case DCH_a_d:
2570  case DCH_b_c:
2572  strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2573  s += strlen(s);
2574  break;
2575  case DCH_ad:
2576  case DCH_bc:
2578  strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2579  s += strlen(s);
2580  break;
2581  case DCH_MONTH:
2583  if (!tm->tm_mon)
2584  break;
2585  if (S_TM(n->suffix))
2586  {
2587  char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2588 
2589  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2590  strcpy(s, str);
2591  else
2592  ereport(ERROR,
2593  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2594  errmsg("localized string format value too long")));
2595  }
2596  else
2597  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2598  asc_toupper_z(months_full[tm->tm_mon - 1]));
2599  s += strlen(s);
2600  break;
2601  case DCH_Month:
2603  if (!tm->tm_mon)
2604  break;
2605  if (S_TM(n->suffix))
2606  {
2607  char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2608 
2609  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2610  strcpy(s, str);
2611  else
2612  ereport(ERROR,
2613  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2614  errmsg("localized string format value too long")));
2615  }
2616  else
2617  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2618  months_full[tm->tm_mon - 1]);
2619  s += strlen(s);
2620  break;
2621  case DCH_month:
2623  if (!tm->tm_mon)
2624  break;
2625  if (S_TM(n->suffix))
2626  {
2627  char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2628 
2629  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2630  strcpy(s, str);
2631  else
2632  ereport(ERROR,
2633  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2634  errmsg("localized string format value too long")));
2635  }
2636  else
2637  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2638  asc_tolower_z(months_full[tm->tm_mon - 1]));
2639  s += strlen(s);
2640  break;
2641  case DCH_MON:
2643  if (!tm->tm_mon)
2644  break;
2645  if (S_TM(n->suffix))
2646  {
2647  char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2648 
2649  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2650  strcpy(s, str);
2651  else
2652  ereport(ERROR,
2653  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2654  errmsg("localized string format value too long")));
2655  }
2656  else
2657  strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
2658  s += strlen(s);
2659  break;
2660  case DCH_Mon:
2662  if (!tm->tm_mon)
2663  break;
2664  if (S_TM(n->suffix))
2665  {
2666  char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2667 
2668  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2669  strcpy(s, str);
2670  else
2671  ereport(ERROR,
2672  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2673  errmsg("localized string format value too long")));
2674  }
2675  else
2676  strcpy(s, months[tm->tm_mon - 1]);
2677  s += strlen(s);
2678  break;
2679  case DCH_mon:
2681  if (!tm->tm_mon)
2682  break;
2683  if (S_TM(n->suffix))
2684  {
2685  char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2686 
2687  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2688  strcpy(s, str);
2689  else
2690  ereport(ERROR,
2691  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2692  errmsg("localized string format value too long")));
2693  }
2694  else
2695  strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
2696  s += strlen(s);
2697  break;
2698  case DCH_MM:
2699  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
2700  tm->tm_mon);
2701  if (S_THth(n->suffix))
2702  str_numth(s, s, S_TH_TYPE(n->suffix));
2703  s += strlen(s);
2704  break;
2705  case DCH_DAY:
2707  if (S_TM(n->suffix))
2708  {
2709  char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
2710 
2711  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2712  strcpy(s, str);
2713  else
2714  ereport(ERROR,
2715  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2716  errmsg("localized string format value too long")));
2717  }
2718  else
2719  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2720  asc_toupper_z(days[tm->tm_wday]));
2721  s += strlen(s);
2722  break;
2723  case DCH_Day:
2725  if (S_TM(n->suffix))
2726  {
2727  char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
2728 
2729  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2730  strcpy(s, str);
2731  else
2732  ereport(ERROR,
2733  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2734  errmsg("localized string format value too long")));
2735  }
2736  else
2737  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2738  days[tm->tm_wday]);
2739  s += strlen(s);
2740  break;
2741  case DCH_day:
2743  if (S_TM(n->suffix))
2744  {
2745  char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
2746 
2747  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2748  strcpy(s, str);
2749  else
2750  ereport(ERROR,
2751  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2752  errmsg("localized string format value too long")));
2753  }
2754  else
2755  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2756  asc_tolower_z(days[tm->tm_wday]));
2757  s += strlen(s);
2758  break;
2759  case DCH_DY:
2761  if (S_TM(n->suffix))
2762  {
2763  char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
2764 
2765  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2766  strcpy(s, str);
2767  else
2768  ereport(ERROR,
2769  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2770  errmsg("localized string format value too long")));
2771  }
2772  else
2773  strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
2774  s += strlen(s);
2775  break;
2776  case DCH_Dy:
2778  if (S_TM(n->suffix))
2779  {
2780  char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
2781 
2782  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2783  strcpy(s, str);
2784  else
2785  ereport(ERROR,
2786  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2787  errmsg("localized string format value too long")));
2788  }
2789  else
2790  strcpy(s, days_short[tm->tm_wday]);
2791  s += strlen(s);
2792  break;
2793  case DCH_dy:
2795  if (S_TM(n->suffix))
2796  {
2797  char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
2798 
2799  if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2800  strcpy(s, str);
2801  else
2802  ereport(ERROR,
2803  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2804  errmsg("localized string format value too long")));
2805  }
2806  else
2807  strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
2808  s += strlen(s);
2809  break;
2810  case DCH_DDD:
2811  case DCH_IDDD:
2812  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
2813  (n->key->id == DCH_DDD) ?
2814  tm->tm_yday :
2815  date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
2816  if (S_THth(n->suffix))
2817  str_numth(s, s, S_TH_TYPE(n->suffix));
2818  s += strlen(s);
2819  break;
2820  case DCH_DD:
2821  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
2822  if (S_THth(n->suffix))
2823  str_numth(s, s, S_TH_TYPE(n->suffix));
2824  s += strlen(s);
2825  break;
2826  case DCH_D:
2828  sprintf(s, "%d", tm->tm_wday + 1);
2829  if (S_THth(n->suffix))
2830  str_numth(s, s, S_TH_TYPE(n->suffix));
2831  s += strlen(s);
2832  break;
2833  case DCH_ID:
2835  sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
2836  if (S_THth(n->suffix))
2837  str_numth(s, s, S_TH_TYPE(n->suffix));
2838  s += strlen(s);
2839  break;
2840  case DCH_WW:
2841  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2842  (tm->tm_yday - 1) / 7 + 1);
2843  if (S_THth(n->suffix))
2844  str_numth(s, s, S_TH_TYPE(n->suffix));
2845  s += strlen(s);
2846  break;
2847  case DCH_IW:
2848  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
2849  date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
2850  if (S_THth(n->suffix))
2851  str_numth(s, s, S_TH_TYPE(n->suffix));
2852  s += strlen(s);
2853  break;
2854  case DCH_Q:
2855  if (!tm->tm_mon)
2856  break;
2857  sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
2858  if (S_THth(n->suffix))
2859  str_numth(s, s, S_TH_TYPE(n->suffix));
2860  s += strlen(s);
2861  break;
2862  case DCH_CC:
2863  if (is_interval) /* straight calculation */
2864  i = tm->tm_year / 100;
2865  else
2866  {
2867  if (tm->tm_year > 0)
2868  /* Century 20 == 1901 - 2000 */
2869  i = (tm->tm_year - 1) / 100 + 1;
2870  else
2871  /* Century 6BC == 600BC - 501BC */
2872  i = tm->tm_year / 100 - 1;
2873  }
2874  if (i <= 99 && i >= -99)
2875  sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
2876  else
2877  sprintf(s, "%d", i);
2878  if (S_THth(n->suffix))
2879  str_numth(s, s, S_TH_TYPE(n->suffix));
2880  s += strlen(s);
2881  break;
2882  case DCH_Y_YYY:
2883  i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
2884  sprintf(s, "%d,%03d", i,
2885  ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
2886  if (S_THth(n->suffix))
2887  str_numth(s, s, S_TH_TYPE(n->suffix));
2888  s += strlen(s);
2889  break;
2890  case DCH_YYYY:
2891  case DCH_IYYY:
2892  sprintf(s, "%0*d",
2893  S_FM(n->suffix) ? 0 :
2894  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
2895  (n->key->id == DCH_YYYY ?
2896  ADJUST_YEAR(tm->tm_year, is_interval) :
2898  tm->tm_mon,
2899  tm->tm_mday),
2900  is_interval)));
2901  if (S_THth(n->suffix))
2902  str_numth(s, s, S_TH_TYPE(n->suffix));
2903  s += strlen(s);
2904  break;
2905  case DCH_YYY:
2906  case DCH_IYY:
2907  sprintf(s, "%0*d",
2908  S_FM(n->suffix) ? 0 :
2909  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
2910  (n->key->id == DCH_YYY ?
2911  ADJUST_YEAR(tm->tm_year, is_interval) :
2913  tm->tm_mon,
2914  tm->tm_mday),
2915  is_interval)) % 1000);
2916  if (S_THth(n->suffix))
2917  str_numth(s, s, S_TH_TYPE(n->suffix));
2918  s += strlen(s);
2919  break;
2920  case DCH_YY:
2921  case DCH_IY:
2922  sprintf(s, "%0*d",
2923  S_FM(n->suffix) ? 0 :
2924  (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
2925  (n->key->id == DCH_YY ?
2926  ADJUST_YEAR(tm->tm_year, is_interval) :
2928  tm->tm_mon,
2929  tm->tm_mday),
2930  is_interval)) % 100);
2931  if (S_THth(n->suffix))
2932  str_numth(s, s, S_TH_TYPE(n->suffix));
2933  s += strlen(s);
2934  break;
2935  case DCH_Y:
2936  case DCH_I:
2937  sprintf(s, "%1d",
2938  (n->key->id == DCH_Y ?
2939  ADJUST_YEAR(tm->tm_year, is_interval) :
2941  tm->tm_mon,
2942  tm->tm_mday),
2943  is_interval)) % 10);
2944  if (S_THth(n->suffix))
2945  str_numth(s, s, S_TH_TYPE(n->suffix));
2946  s += strlen(s);
2947  break;
2948  case DCH_RM:
2949  if (!tm->tm_mon)
2950  break;
2951  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
2953  s += strlen(s);
2954  break;
2955  case DCH_rm:
2956  if (!tm->tm_mon)
2957  break;
2958  sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
2960  s += strlen(s);
2961  break;
2962  case DCH_W:
2963  sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
2964  if (S_THth(n->suffix))
2965  str_numth(s, s, S_TH_TYPE(n->suffix));
2966  s += strlen(s);
2967  break;
2968  case DCH_J:
2969  sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
2970  if (S_THth(n->suffix))
2971  str_numth(s, s, S_TH_TYPE(n->suffix));
2972  s += strlen(s);
2973  break;
2974  }
2975  }
2976 
2977  *s = '\0';
2978 }
2979 
2980 /* ----------
2981  * Process a string as denoted by a list of FormatNodes.
2982  * The TmFromChar struct pointed to by 'out' is populated with the results.
2983  *
2984  * Note: we currently don't have any to_interval() function, so there
2985  * is no need here for INVALID_FOR_INTERVAL checks.
2986  * ----------
2987  */
2988 static void
2989 DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
2990 {
2991  FormatNode *n;
2992  char *s;
2993  int len,
2994  value;
2995  bool fx_mode = false;
2996 
2997  for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
2998  {
2999  if (n->type != NODE_TYPE_ACTION)
3000  {
3001  /*
3002  * Separator, so consume one character from input string. Notice
3003  * we don't insist that the consumed character match the format's
3004  * character.
3005  */
3006  s += pg_mblen(s);
3007  continue;
3008  }
3009 
3010  /* Ignore spaces before fields when not in FX (fixed width) mode */
3011  if (!fx_mode && n->key->id != DCH_FX)
3012  {
3013  while (*s != '\0' && isspace((unsigned char) *s))
3014  s++;
3015  }
3016 
3017  from_char_set_mode(out, n->key->date_mode);
3018 
3019  switch (n->key->id)
3020  {
3021  case DCH_FX:
3022  fx_mode = true;
3023  break;
3024  case DCH_A_M:
3025  case DCH_P_M:
3026  case DCH_a_m:
3027  case DCH_p_m:
3029  ALL_UPPER, n->key->len, n);
3030  from_char_set_int(&out->pm, value % 2, n);
3031  out->clock = CLOCK_12_HOUR;
3032  break;
3033  case DCH_AM:
3034  case DCH_PM:
3035  case DCH_am:
3036  case DCH_pm:
3037  from_char_seq_search(&value, &s, ampm_strings,
3038  ALL_UPPER, n->key->len, n);
3039  from_char_set_int(&out->pm, value % 2, n);
3040  out->clock = CLOCK_12_HOUR;
3041  break;
3042  case DCH_HH:
3043  case DCH_HH12:
3044  from_char_parse_int_len(&out->hh, &s, 2, n);
3045  out->clock = CLOCK_12_HOUR;
3046  SKIP_THth(s, n->suffix);
3047  break;
3048  case DCH_HH24:
3049  from_char_parse_int_len(&out->hh, &s, 2, n);
3050  SKIP_THth(s, n->suffix);
3051  break;
3052  case DCH_MI:
3053  from_char_parse_int(&out->mi, &s, n);
3054  SKIP_THth(s, n->suffix);
3055  break;
3056  case DCH_SS:
3057  from_char_parse_int(&out->ss, &s, n);
3058  SKIP_THth(s, n->suffix);
3059  break;
3060  case DCH_MS: /* millisecond */
3061  len = from_char_parse_int_len(&out->ms, &s, 3, n);
3062 
3063  /*
3064  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3065  */
3066  out->ms *= len == 1 ? 100 :
3067  len == 2 ? 10 : 1;
3068 
3069  SKIP_THth(s, n->suffix);
3070  break;
3071  case DCH_US: /* microsecond */
3072  len = from_char_parse_int_len(&out->us, &s, 6, n);
3073 
3074  out->us *= len == 1 ? 100000 :
3075  len == 2 ? 10000 :
3076  len == 3 ? 1000 :
3077  len == 4 ? 100 :
3078  len == 5 ? 10 : 1;
3079 
3080  SKIP_THth(s, n->suffix);
3081  break;
3082  case DCH_SSSS:
3083  from_char_parse_int(&out->ssss, &s, n);
3084  SKIP_THth(s, n->suffix);
3085  break;
3086  case DCH_tz:
3087  case DCH_TZ:
3088  case DCH_OF:
3089  ereport(ERROR,
3090  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3091  errmsg("formatting field \"%s\" is only supported in to_char",
3092  n->key->name)));
3093  break;
3094  case DCH_TZH:
3095  out->tzsign = *s == '-' ? -1 : +1;
3096 
3097  if (*s == '+' || *s == '-' || *s == ' ')
3098  s++;
3099 
3100  from_char_parse_int_len(&out->tzh, &s, 2, n);
3101  break;
3102  case DCH_TZM:
3103  /* assign positive timezone sign if TZH was not seen before */
3104  if (!out->tzsign)
3105  out->tzsign = +1;
3106  from_char_parse_int_len(&out->tzm, &s, 2, n);
3107  break;
3108  case DCH_A_D:
3109  case DCH_B_C:
3110  case DCH_a_d:
3111  case DCH_b_c:
3113  ALL_UPPER, n->key->len, n);
3114  from_char_set_int(&out->bc, value % 2, n);
3115  break;
3116  case DCH_AD:
3117  case DCH_BC:
3118  case DCH_ad:
3119  case DCH_bc:
3120  from_char_seq_search(&value, &s, adbc_strings,
3121  ALL_UPPER, n->key->len, n);
3122  from_char_set_int(&out->bc, value % 2, n);
3123  break;
3124  case DCH_MONTH:
3125  case DCH_Month:
3126  case DCH_month:
3128  MAX_MONTH_LEN, n);
3129  from_char_set_int(&out->mm, value + 1, n);
3130  break;
3131  case DCH_MON:
3132  case DCH_Mon:
3133  case DCH_mon:
3134  from_char_seq_search(&value, &s, months, ONE_UPPER,
3135  MAX_MON_LEN, n);
3136  from_char_set_int(&out->mm, value + 1, n);
3137  break;
3138  case DCH_MM:
3139  from_char_parse_int(&out->mm, &s, n);
3140  SKIP_THth(s, n->suffix);
3141  break;
3142  case DCH_DAY:
3143  case DCH_Day:
3144  case DCH_day:
3145  from_char_seq_search(&value, &s, days, ONE_UPPER,
3146  MAX_DAY_LEN, n);
3147  from_char_set_int(&out->d, value, n);
3148  out->d++;
3149  break;
3150  case DCH_DY:
3151  case DCH_Dy:
3152  case DCH_dy:
3153  from_char_seq_search(&value, &s, days, ONE_UPPER,
3154  MAX_DY_LEN, n);
3155  from_char_set_int(&out->d, value, n);
3156  out->d++;
3157  break;
3158  case DCH_DDD:
3159  from_char_parse_int(&out->ddd, &s, n);
3160  SKIP_THth(s, n->suffix);
3161  break;
3162  case DCH_IDDD:
3163  from_char_parse_int_len(&out->ddd, &s, 3, n);
3164  SKIP_THth(s, n->suffix);
3165  break;
3166  case DCH_DD:
3167  from_char_parse_int(&out->dd, &s, n);
3168  SKIP_THth(s, n->suffix);
3169  break;
3170  case DCH_D:
3171  from_char_parse_int(&out->d, &s, n);
3172  SKIP_THth(s, n->suffix);
3173  break;
3174  case DCH_ID:
3175  from_char_parse_int_len(&out->d, &s, 1, n);
3176  /* Shift numbering to match Gregorian where Sunday = 1 */
3177  if (++out->d > 7)
3178  out->d = 1;
3179  SKIP_THth(s, n->suffix);
3180  break;
3181  case DCH_WW:
3182  case DCH_IW:
3183  from_char_parse_int(&out->ww, &s, n);
3184  SKIP_THth(s, n->suffix);
3185  break;
3186  case DCH_Q:
3187 
3188  /*
3189  * We ignore 'Q' when converting to date because it is unclear
3190  * which date in the quarter to use, and some people specify
3191  * both quarter and month, so if it was honored it might
3192  * conflict with the supplied month. That is also why we don't
3193  * throw an error.
3194  *
3195  * We still parse the source string for an integer, but it
3196  * isn't stored anywhere in 'out'.
3197  */
3198  from_char_parse_int((int *) NULL, &s, n);
3199  SKIP_THth(s, n->suffix);
3200  break;
3201  case DCH_CC:
3202  from_char_parse_int(&out->cc, &s, n);
3203  SKIP_THth(s, n->suffix);
3204  break;
3205  case DCH_Y_YYY:
3206  {
3207  int matched,
3208  years,
3209  millennia,
3210  nch;
3211 
3212  matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3213  if (matched < 2)
3214  ereport(ERROR,
3215  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3216  errmsg("invalid input string for \"Y,YYY\"")));
3217  years += (millennia * 1000);
3218  from_char_set_int(&out->year, years, n);
3219  out->yysz = 4;
3220  s += nch;
3221  SKIP_THth(s, n->suffix);
3222  }
3223  break;
3224  case DCH_YYYY:
3225  case DCH_IYYY:
3226  from_char_parse_int(&out->year, &s, n);
3227  out->yysz = 4;
3228  SKIP_THth(s, n->suffix);
3229  break;
3230  case DCH_YYY:
3231  case DCH_IYY:
3232  if (from_char_parse_int(&out->year, &s, n) < 4)
3233  out->year = adjust_partial_year_to_2020(out->year);
3234  out->yysz = 3;
3235  SKIP_THth(s, n->suffix);
3236  break;
3237  case DCH_YY:
3238  case DCH_IY:
3239  if (from_char_parse_int(&out->year, &s, n) < 4)
3240  out->year = adjust_partial_year_to_2020(out->year);
3241  out->yysz = 2;
3242  SKIP_THth(s, n->suffix);
3243  break;
3244  case DCH_Y:
3245  case DCH_I:
3246  if (from_char_parse_int(&out->year, &s, n) < 4)
3247  out->year = adjust_partial_year_to_2020(out->year);
3248  out->yysz = 1;
3249  SKIP_THth(s, n->suffix);
3250  break;
3251  case DCH_RM:
3253  ALL_UPPER, MAX_RM_LEN, n);
3254  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n);
3255  break;
3256  case DCH_rm:
3258  ALL_LOWER, MAX_RM_LEN, n);
3259  from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n);
3260  break;
3261  case DCH_W:
3262  from_char_parse_int(&out->w, &s, n);
3263  SKIP_THth(s, n->suffix);
3264  break;
3265  case DCH_J:
3266  from_char_parse_int(&out->j, &s, n);
3267  SKIP_THth(s, n->suffix);
3268  break;
3269  }
3270  }
3271 }
3272 
3273 /* select a DCHCacheEntry to hold the given format picture */
3274 static DCHCacheEntry *
3275 DCH_cache_getnew(const char *str)
3276 {
3277  DCHCacheEntry *ent;
3278 
3279  /* counter overflow check - paranoia? */
3280  if (DCHCounter >= (INT_MAX - DCH_CACHE_ENTRIES))
3281  {
3282  DCHCounter = 0;
3283 
3284  for (ent = DCHCache; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3285  ent->age = (++DCHCounter);
3286  }
3287 
3288  /*
3289  * If cache is full, remove oldest entry (or recycle first not-valid one)
3290  */
3292  {
3293  DCHCacheEntry *old = DCHCache + 0;
3294 
3295 #ifdef DEBUG_TO_FROM_CHAR
3296  elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
3297 #endif
3298  if (old->valid)
3299  {
3300  for (ent = DCHCache + 1; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3301  {
3302  if (!ent->valid)
3303  {
3304  old = ent;
3305  break;
3306  }
3307  if (ent->age < old->age)
3308  old = ent;
3309  }
3310  }
3311 #ifdef DEBUG_TO_FROM_CHAR
3312  elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
3313 #endif
3314  old->valid = false;
3315  StrNCpy(old->str, str, DCH_CACHE_SIZE + 1);
3316  old->age = (++DCHCounter);
3317  /* caller is expected to fill format, then set valid */
3318  return old;
3319  }
3320  else
3321  {
3322 #ifdef DEBUG_TO_FROM_CHAR
3323  elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
3324 #endif
3325  ent = DCHCache + n_DCHCache;
3326  ent->valid = false;
3327  StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
3328  ent->age = (++DCHCounter);
3329  /* caller is expected to fill format, then set valid */
3330  ++n_DCHCache;
3331  return ent;
3332  }
3333 }
3334 
3335 /* look for an existing DCHCacheEntry matching the given format picture */
3336 static DCHCacheEntry *
3337 DCH_cache_search(const char *str)
3338 {
3339  int i;
3340  DCHCacheEntry *ent;
3341 
3342  /* counter overflow check - paranoia? */
3343  if (DCHCounter >= (INT_MAX - DCH_CACHE_ENTRIES))
3344  {
3345  DCHCounter = 0;
3346 
3347  for (ent = DCHCache; ent < (DCHCache + DCH_CACHE_ENTRIES); ent++)
3348  ent->age = (++DCHCounter);
3349  }
3350 
3351  for (i = 0, ent = DCHCache; i < n_DCHCache; i++, ent++)
3352  {
3353  if (ent->valid && strcmp(ent->str, str) == 0)
3354  {
3355  ent->age = (++DCHCounter);
3356  return ent;
3357  }
3358  }
3359 
3360  return NULL;
3361 }
3362 
3363 /* Find or create a DCHCacheEntry for the given format picture */
3364 static DCHCacheEntry *
3365 DCH_cache_fetch(const char *str)
3366 {
3367  DCHCacheEntry *ent;
3368 
3369  if ((ent = DCH_cache_search(str)) == NULL)
3370  {
3371  /*
3372  * Not in the cache, must run parser and save a new format-picture to
3373  * the cache. Do not mark the cache entry valid until parsing
3374  * succeeds.
3375  */
3376  ent = DCH_cache_getnew(str);
3377 
3378  parse_format(ent->format, str, DCH_keywords,
3379  DCH_suff, DCH_index, DCH_TYPE, NULL);
3380 
3381  ent->valid = true;
3382  }
3383  return ent;
3384 }
3385 
3386 /*
3387  * Format a date/time or interval into a string according to fmt.
3388  * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
3389  * for formatting.
3390  */
3391 static text *
3392 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
3393 {
3394  FormatNode *format;
3395  char *fmt_str,
3396  *result;
3397  bool incache;
3398  int fmt_len;
3399  text *res;
3400 
3401  /*
3402  * Convert fmt to C string
3403  */
3404  fmt_str = text_to_cstring(fmt);
3405  fmt_len = strlen(fmt_str);
3406 
3407  /*
3408  * Allocate workspace for result as C string
3409  */
3410  result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
3411  *result = '\0';
3412 
3413  if (fmt_len > DCH_CACHE_SIZE)
3414  {
3415  /*
3416  * Allocate new memory if format picture is bigger than static cache
3417  * and do not use cache (call parser always)
3418  */
3419  incache = false;
3420 
3421  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3422 
3423  parse_format(format, fmt_str, DCH_keywords,
3424  DCH_suff, DCH_index, DCH_TYPE, NULL);
3425  }
3426  else
3427  {
3428  /*
3429  * Use cache buffers
3430  */
3431  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3432 
3433  incache = true;
3434  format = ent->format;
3435  }
3436 
3437  /* The real work is here */
3438  DCH_to_char(format, is_interval, tmtc, result, collid);
3439 
3440  if (!incache)
3441  pfree(format);
3442 
3443  pfree(fmt_str);
3444 
3445  /* convert C-string result to TEXT format */
3446  res = cstring_to_text(result);
3447 
3448  pfree(result);
3449  return res;
3450 }
3451 
3452 /****************************************************************************
3453  * Public routines
3454  ***************************************************************************/
3455 
3456 /* -------------------
3457  * TIMESTAMP to_char()
3458  * -------------------
3459  */
3460 Datum
3462 {
3464  text *fmt = PG_GETARG_TEXT_PP(1),
3465  *res;
3466  TmToChar tmtc;
3467  struct pg_tm *tm;
3468  int thisdate;
3469 
3470  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3471  PG_RETURN_NULL();
3472 
3473  ZERO_tmtc(&tmtc);
3474  tm = tmtcTm(&tmtc);
3475 
3476  if (timestamp2tm(dt, NULL, tm, &tmtcFsec(&tmtc), NULL, NULL) != 0)
3477  ereport(ERROR,
3478  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3479  errmsg("timestamp out of range")));
3480 
3481  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3482  tm->tm_wday = (thisdate + 1) % 7;
3483  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3484 
3485  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3486  PG_RETURN_NULL();
3487 
3488  PG_RETURN_TEXT_P(res);
3489 }
3490 
3491 Datum
3493 {
3495  text *fmt = PG_GETARG_TEXT_PP(1),
3496  *res;
3497  TmToChar tmtc;
3498  int tz;
3499  struct pg_tm *tm;
3500  int thisdate;
3501 
3502  if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
3503  PG_RETURN_NULL();
3504 
3505  ZERO_tmtc(&tmtc);
3506  tm = tmtcTm(&tmtc);
3507 
3508  if (timestamp2tm(dt, &tz, tm, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
3509  ereport(ERROR,
3510  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3511  errmsg("timestamp out of range")));
3512 
3513  thisdate = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
3514  tm->tm_wday = (thisdate + 1) % 7;
3515  tm->tm_yday = thisdate - date2j(tm->tm_year, 1, 1) + 1;
3516 
3517  if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
3518  PG_RETURN_NULL();
3519 
3520  PG_RETURN_TEXT_P(res);
3521 }
3522 
3523 
3524 /* -------------------
3525  * INTERVAL to_char()
3526  * -------------------
3527  */
3528 Datum
3530 {
3531  Interval *it = PG_GETARG_INTERVAL_P(0);
3532  text *fmt = PG_GETARG_TEXT_PP(1),
3533  *res;
3534  TmToChar tmtc;
3535  struct pg_tm *tm;
3536 
3537  if (VARSIZE_ANY_EXHDR(fmt) <= 0)
3538  PG_RETURN_NULL();
3539 
3540  ZERO_tmtc(&tmtc);
3541  tm = tmtcTm(&tmtc);
3542 
3543  if (interval2tm(*it, tm, &tmtcFsec(&tmtc)) != 0)
3544  PG_RETURN_NULL();
3545 
3546  /* wday is meaningless, yday approximates the total span in days */
3547  tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
3548 
3549  if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
3550  PG_RETURN_NULL();
3551 
3552  PG_RETURN_TEXT_P(res);
3553 }
3554 
3555 /* ---------------------
3556  * TO_TIMESTAMP()
3557  *
3558  * Make Timestamp from date_str which is formatted at argument 'fmt'
3559  * ( to_timestamp is reverse to_char() )
3560  * ---------------------
3561  */
3562 Datum
3564 {
3565  text *date_txt = PG_GETARG_TEXT_PP(0);
3566  text *fmt = PG_GETARG_TEXT_PP(1);
3567  Timestamp result;
3568  int tz;
3569  struct pg_tm tm;
3570  fsec_t fsec;
3571 
3572  do_to_timestamp(date_txt, fmt, &tm, &fsec);
3573 
3574  /* Use the specified time zone, if any. */
3575  if (tm.tm_zone)
3576  {
3577  int dterr = DecodeTimezone((char *) tm.tm_zone, &tz);
3578 
3579  if (dterr)
3580  DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
3581  }
3582  else
3584 
3585  if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
3586  ereport(ERROR,
3587  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3588  errmsg("timestamp out of range")));
3589 
3590  PG_RETURN_TIMESTAMP(result);
3591 }
3592 
3593 /* ----------
3594  * TO_DATE
3595  * Make Date from date_str which is formated at argument 'fmt'
3596  * ----------
3597  */
3598 Datum
3600 {
3601  text *date_txt = PG_GETARG_TEXT_PP(0);
3602  text *fmt = PG_GETARG_TEXT_PP(1);
3603  DateADT result;
3604  struct pg_tm tm;
3605  fsec_t fsec;
3606 
3607  do_to_timestamp(date_txt, fmt, &tm, &fsec);
3608 
3609  /* Prevent overflow in Julian-day routines */
3610  if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
3611  ereport(ERROR,
3612  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3613  errmsg("date out of range: \"%s\"",
3614  text_to_cstring(date_txt))));
3615 
3616  result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
3617 
3618  /* Now check for just-out-of-range dates */
3619  if (!IS_VALID_DATE(result))
3620  ereport(ERROR,
3621  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3622  errmsg("date out of range: \"%s\"",
3623  text_to_cstring(date_txt))));
3624 
3625  PG_RETURN_DATEADT(result);
3626 }
3627 
3628 /*
3629  * do_to_timestamp: shared code for to_timestamp and to_date
3630  *
3631  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm
3632  * and fractional seconds.
3633  *
3634  * We parse 'fmt' into a list of FormatNodes, which is then passed to
3635  * DCH_from_char to populate a TmFromChar with the parsed contents of
3636  * 'date_txt'.
3637  *
3638  * The TmFromChar is then analysed and converted into the final results in
3639  * struct 'tm' and 'fsec'.
3640  */
3641 static void
3642 do_to_timestamp(text *date_txt, text *fmt,
3643  struct pg_tm *tm, fsec_t *fsec)
3644 {
3645  FormatNode *format;
3646  TmFromChar tmfc;
3647  int fmt_len;
3648  char *date_str;
3649  int fmask;
3650 
3651  date_str = text_to_cstring(date_txt);
3652 
3653  ZERO_tmfc(&tmfc);
3654  ZERO_tm(tm);
3655  *fsec = 0;
3656  fmask = 0; /* bit mask for ValidateDate() */
3657 
3658  fmt_len = VARSIZE_ANY_EXHDR(fmt);
3659 
3660  if (fmt_len)
3661  {
3662  char *fmt_str;
3663  bool incache;
3664 
3665  fmt_str = text_to_cstring(fmt);
3666 
3667  if (fmt_len > DCH_CACHE_SIZE)
3668  {
3669  /*
3670  * Allocate new memory if format picture is bigger than static
3671  * cache and do not use cache (call parser always)
3672  */
3673  incache = false;
3674 
3675  format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
3676 
3677  parse_format(format, fmt_str, DCH_keywords,
3678  DCH_suff, DCH_index, DCH_TYPE, NULL);
3679  }
3680  else
3681  {
3682  /*
3683  * Use cache buffers
3684  */
3685  DCHCacheEntry *ent = DCH_cache_fetch(fmt_str);
3686 
3687  incache = true;
3688  format = ent->format;
3689  }
3690 
3691 #ifdef DEBUG_TO_FROM_CHAR
3692  /* dump_node(format, fmt_len); */
3693  /* dump_index(DCH_keywords, DCH_index); */
3694 #endif
3695 
3696  DCH_from_char(format, date_str, &tmfc);
3697 
3698  pfree(fmt_str);
3699  if (!incache)
3700  pfree(format);
3701  }
3702 
3703  DEBUG_TMFC(&tmfc);
3704 
3705  /*
3706  * Convert to_date/to_timestamp input fields to standard 'tm'
3707  */
3708  if (tmfc.ssss)
3709  {
3710  int x = tmfc.ssss;
3711 
3712  tm->tm_hour = x / SECS_PER_HOUR;
3713  x %= SECS_PER_HOUR;
3714  tm->tm_min = x / SECS_PER_MINUTE;
3715  x %= SECS_PER_MINUTE;
3716  tm->tm_sec = x;
3717  }
3718 
3719  if (tmfc.ss)
3720  tm->tm_sec = tmfc.ss;
3721  if (tmfc.mi)
3722  tm->tm_min = tmfc.mi;
3723  if (tmfc.hh)
3724  tm->tm_hour = tmfc.hh;
3725 
3726  if (tmfc.clock == CLOCK_12_HOUR)
3727  {
3728  if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
3729  ereport(ERROR,
3730  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3731  errmsg("hour \"%d\" is invalid for the 12-hour clock",
3732  tm->tm_hour),
3733  errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
3734 
3735  if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
3736  tm->tm_hour += HOURS_PER_DAY / 2;
3737  else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
3738  tm->tm_hour = 0;
3739  }
3740 
3741  if (tmfc.year)
3742  {
3743  /*
3744  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
3745  * the year in the given century. Keep in mind that the 21st century
3746  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
3747  * 600BC to 501BC.
3748  */
3749  if (tmfc.cc && tmfc.yysz <= 2)
3750  {
3751  if (tmfc.bc)
3752  tmfc.cc = -tmfc.cc;
3753  tm->tm_year = tmfc.year % 100;
3754  if (tm->tm_year)
3755  {
3756  if (tmfc.cc >= 0)
3757  tm->tm_year += (tmfc.cc - 1) * 100;
3758  else
3759  tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
3760  }
3761  else
3762  {
3763  /* find century year for dates ending in "00" */
3764  tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
3765  }
3766  }
3767  else
3768  {
3769  /* If a 4-digit year is provided, we use that and ignore CC. */
3770  tm->tm_year = tmfc.year;
3771  if (tmfc.bc && tm->tm_year > 0)
3772  tm->tm_year = -(tm->tm_year - 1);
3773  }
3774  fmask |= DTK_M(YEAR);
3775  }
3776  else if (tmfc.cc)
3777  {
3778  /* use first year of century */
3779  if (tmfc.bc)
3780  tmfc.cc = -tmfc.cc;
3781  if (tmfc.cc >= 0)
3782  /* +1 because 21st century started in 2001 */
3783  tm->tm_year = (tmfc.cc - 1) * 100 + 1;
3784  else
3785  /* +1 because year == 599 is 600 BC */
3786  tm->tm_year = tmfc.cc * 100 + 1;
3787  fmask |= DTK_M(YEAR);
3788  }
3789 
3790  if (tmfc.j)
3791  {
3792  j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3793  fmask |= DTK_DATE_M;
3794  }
3795 
3796  if (tmfc.ww)
3797  {
3798  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3799  {
3800  /*
3801  * If tmfc.d is not set, then the date is left at the beginning of
3802  * the ISO week (Monday).
3803  */
3804  if (tmfc.d)
3805  isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3806  else
3807  isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3808  fmask |= DTK_DATE_M;
3809  }
3810  else
3811  tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
3812  }
3813 
3814  if (tmfc.w)
3815  tmfc.dd = (tmfc.w - 1) * 7 + 1;
3816  if (tmfc.dd)
3817  {
3818  tm->tm_mday = tmfc.dd;
3819  fmask |= DTK_M(DAY);
3820  }
3821  if (tmfc.mm)
3822  {
3823  tm->tm_mon = tmfc.mm;
3824  fmask |= DTK_M(MONTH);
3825  }
3826 
3827  if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
3828  {
3829  /*
3830  * The month and day field have not been set, so we use the
3831  * day-of-year field to populate them. Depending on the date mode,
3832  * this field may be interpreted as a Gregorian day-of-year, or an ISO
3833  * week date day-of-year.
3834  */
3835 
3836  if (!tm->tm_year && !tmfc.bc)
3837  ereport(ERROR,
3838  (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3839  errmsg("cannot calculate day of year without year information")));
3840 
3841  if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
3842  {
3843  int j0; /* zeroth day of the ISO year, in Julian */
3844 
3845  j0 = isoweek2j(tm->tm_year, 1) - 1;
3846 
3847  j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
3848  fmask |= DTK_DATE_M;
3849  }
3850  else
3851  {
3852  const int *y;
3853  int i;
3854 
3855  static const int ysum[2][13] = {
3856  {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
3857  {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
3858 
3859  y = ysum[isleap(tm->tm_year)];
3860 
3861  for (i = 1; i <= MONTHS_PER_YEAR; i++)
3862  {
3863  if (tmfc.ddd <= y[i])
3864  break;
3865  }
3866  if (tm->tm_mon <= 1)
3867  tm->tm_mon = i;
3868 
3869  if (tm->tm_mday <= 1)
3870  tm->tm_mday = tmfc.ddd - y[i - 1];
3871 
3872  fmask |= DTK_M(MONTH) | DTK_M(DAY);
3873  }
3874  }
3875 
3876  if (tmfc.ms)
3877  *fsec += tmfc.ms * 1000;
3878  if (tmfc.us)
3879  *fsec += tmfc.us;
3880 
3881  /* Range-check date fields according to bit mask computed above */
3882  if (fmask != 0)
3883  {
3884  /* We already dealt with AD/BC, so pass isjulian = true */
3885  int dterr = ValidateDate(fmask, true, false, false, tm);
3886 
3887  if (dterr != 0)
3888  {
3889  /*
3890  * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
3891  * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
3892  * irrelevant hint about datestyle.
3893  */
3894  DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
3895  }
3896  }
3897 
3898  /* Range-check time fields too */
3899  if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
3900  tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
3901  tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
3902  *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
3903  DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
3904 
3905  /* Save parsed time-zone into tm->tm_zone if it was specified */
3906  if (tmfc.tzsign)
3907  {
3908  char *tz;
3909 
3910  if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
3911  tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
3912  DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp");
3913 
3914  tz = palloc(7);
3915 
3916  snprintf(tz, 7, "%c%02d:%02d",
3917  tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
3918 
3919  tm->tm_zone = tz;
3920  }
3921 
3922  DEBUG_TM(tm);
3923 
3924  pfree(date_str);
3925 }
3926 
3927 
3928 /**********************************************************************
3929  * the NUMBER version part
3930  *********************************************************************/
3931 
3932 
3933 static char *
3934 fill_str(char *str, int c, int max)
3935 {
3936  memset(str, c, max);
3937  *(str + max) = '\0';
3938  return str;
3939 }
3940 
3941 #define zeroize_NUM(_n) \
3942 do { \
3943  (_n)->flag = 0; \
3944  (_n)->lsign = 0; \
3945  (_n)->pre = 0; \
3946  (_n)->post = 0; \
3947  (_n)->pre_lsign_num = 0; \
3948  (_n)->need_locale = 0; \
3949  (_n)->multi = 0; \
3950  (_n)->zero_start = 0; \
3951  (_n)->zero_end = 0; \
3952 } while(0)
3953 
3954 /* select a NUMCacheEntry to hold the given format picture */
3955 static NUMCacheEntry *
3956 NUM_cache_getnew(const char *str)
3957 {
3958  NUMCacheEntry *ent;
3959 
3960  /* counter overflow check - paranoia? */
3961  if (NUMCounter >= (INT_MAX - NUM_CACHE_ENTRIES))
3962  {
3963  NUMCounter = 0;
3964 
3965  for (ent = NUMCache; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
3966  ent->age = (++NUMCounter);
3967  }
3968 
3969  /*
3970  * If cache is full, remove oldest entry (or recycle first not-valid one)
3971  */
3973  {
3974  NUMCacheEntry *old = NUMCache + 0;
3975 
3976 #ifdef DEBUG_TO_FROM_CHAR
3977  elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
3978 #endif
3979  if (old->valid)
3980  {
3981  for (ent = NUMCache + 1; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
3982  {
3983  if (!ent->valid)
3984  {
3985  old = ent;
3986  break;
3987  }
3988  if (ent->age < old->age)
3989  old = ent;
3990  }
3991  }
3992 #ifdef DEBUG_TO_FROM_CHAR
3993  elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
3994 #endif
3995  old->valid = false;
3996  StrNCpy(old->str, str, NUM_CACHE_SIZE + 1);
3997  old->age = (++NUMCounter);
3998  /* caller is expected to fill format and Num, then set valid */
3999  return old;
4000  }
4001  else
4002  {
4003 #ifdef DEBUG_TO_FROM_CHAR
4004  elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
4005 #endif
4006  ent = NUMCache + n_NUMCache;
4007  ent->valid = false;
4008  StrNCpy(ent->str, str, NUM_CACHE_SIZE + 1);
4009  ent->age = (++NUMCounter);
4010  /* caller is expected to fill format and Num, then set valid */
4011  ++n_NUMCache;
4012  return ent;
4013  }
4014 }
4015 
4016 /* look for an existing NUMCacheEntry matching the given format picture */
4017 static NUMCacheEntry *
4018 NUM_cache_search(const char *str)
4019 {
4020  int i;
4021  NUMCacheEntry *ent;
4022 
4023  /* counter overflow check - paranoia? */
4024  if (NUMCounter >= (INT_MAX - NUM_CACHE_ENTRIES))
4025  {
4026  NUMCounter = 0;
4027 
4028  for (ent = NUMCache; ent < (NUMCache + NUM_CACHE_ENTRIES); ent++)
4029  ent->age = (++NUMCounter);
4030  }
4031 
4032  for (i = 0, ent = NUMCache; i < n_NUMCache; i++, ent++)
4033  {
4034  if (ent->valid && strcmp(ent->str, str) == 0)
4035  {
4036  ent->age = (++NUMCounter);
4037  return ent;
4038  }
4039  }
4040 
4041  return NULL;
4042 }
4043 
4044 /* Find or create a NUMCacheEntry for the given format picture */
4045 static NUMCacheEntry *
4046 NUM_cache_fetch(const char *str)
4047 {
4048  NUMCacheEntry *ent;
4049 
4050  if ((ent = NUM_cache_search(str)) == NULL)
4051  {
4052  /*
4053  * Not in the cache, must run parser and save a new format-picture to
4054  * the cache. Do not mark the cache entry valid until parsing
4055  * succeeds.
4056  */
4057  ent = NUM_cache_getnew(str);
4058 
4059  zeroize_NUM(&ent->Num);
4060 
4061  parse_format(ent->format, str, NUM_keywords,
4062  NULL, NUM_index, NUM_TYPE, &ent->Num);
4063 
4064  ent->valid = true;
4065  }
4066  return ent;
4067 }
4068 
4069 /* ----------
4070  * Cache routine for NUM to_char version
4071  * ----------
4072  */
4073 static FormatNode *
4074 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
4075 {
4076  FormatNode *format = NULL;
4077  char *str;
4078 
4079  str = text_to_cstring(pars_str);
4080 
4081  if (len > NUM_CACHE_SIZE)
4082  {
4083  /*
4084  * Allocate new memory if format picture is bigger than static cache
4085  * and do not use cache (call parser always)
4086  */
4087  format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
4088 
4089  *shouldFree = true;
4090 
4091  zeroize_NUM(Num);
4092 
4093  parse_format(format, str, NUM_keywords,
4094  NULL, NUM_index, NUM_TYPE, Num);
4095  }
4096  else
4097  {
4098  /*
4099  * Use cache buffers
4100  */
4101  NUMCacheEntry *ent = NUM_cache_fetch(str);
4102 
4103  *shouldFree = false;
4104 
4105  format = ent->format;
4106 
4107  /*
4108  * Copy cache to used struct
4109  */
4110  Num->flag = ent->Num.flag;
4111  Num->lsign = ent->Num.lsign;
4112  Num->pre = ent->Num.pre;
4113  Num->post = ent->Num.post;
4114  Num->pre_lsign_num = ent->Num.pre_lsign_num;
4115  Num->need_locale = ent->Num.need_locale;
4116  Num->multi = ent->Num.multi;
4117  Num->zero_start = ent->Num.zero_start;
4118  Num->zero_end = ent->Num.zero_end;
4119  }
4120 
4121 #ifdef DEBUG_TO_FROM_CHAR
4122  /* dump_node(format, len); */
4123  dump_index(NUM_keywords, NUM_index);
4124 #endif
4125 
4126  pfree(str);
4127  return format;
4128 }
4129 
4130 
4131 static char *
4132 int_to_roman(int number)
4133 {
4134  int len = 0,
4135  num = 0;
4136  char *p = NULL,
4137  *result,
4138  numstr[5];
4139 
4140  result = (char *) palloc(16);
4141  *result = '\0';
4142 
4143  if (number > 3999 || number < 1)
4144  {
4145  fill_str(result, '#', 15);
4146  return result;
4147  }
4148  len = snprintf(numstr, sizeof(numstr), "%d", number);
4149 
4150  for (p = numstr; *p != '\0'; p++, --len)
4151  {
4152  num = *p - 49; /* 48 ascii + 1 */
4153  if (num < 0)
4154  continue;
4155 
4156  if (len > 3)
4157  {
4158  while (num-- != -1)
4159  strcat(result, "M");
4160  }
4161  else
4162  {
4163  if (len == 3)
4164  strcat(result, rm100[num]);
4165  else if (len == 2)
4166  strcat(result, rm10[num]);
4167  else if (len == 1)
4168  strcat(result, rm1[num]);
4169  }
4170  }
4171  return result;
4172 }
4173 
4174 
4175 
4176 /* ----------
4177  * Locale
4178  * ----------
4179  */
4180 static void
4182 {
4183  if (Np->Num->need_locale)
4184  {
4185  struct lconv *lconv;
4186 
4187  /*
4188  * Get locales
4189  */
4190  lconv = PGLC_localeconv();
4191 
4192  /*
4193  * Positive / Negative number sign
4194  */
4195  if (lconv->negative_sign && *lconv->negative_sign)
4196  Np->L_negative_sign = lconv->negative_sign;
4197  else
4198  Np->L_negative_sign = "-";
4199 
4200  if (lconv->positive_sign && *lconv->positive_sign)
4201  Np->L_positive_sign = lconv->positive_sign;
4202  else
4203  Np->L_positive_sign = "+";
4204 
4205  /*
4206  * Number decimal point
4207  */
4208  if (lconv->decimal_point && *lconv->decimal_point)
4209  Np->decimal = lconv->decimal_point;
4210 
4211  else
4212  Np->decimal = ".";
4213 
4214  if (!IS_LDECIMAL(Np->Num))
4215  Np->decimal = ".";
4216 
4217  /*
4218  * Number thousands separator
4219  *
4220  * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
4221  * but "" for thousands_sep, so we set the thousands_sep too.
4222  * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
4223  */
4224  if (lconv->thousands_sep && *lconv->thousands_sep)
4225  Np->L_thousands_sep = lconv->thousands_sep;
4226  /* Make sure thousands separator doesn't match decimal point symbol. */
4227  else if (strcmp(Np->decimal, ",") !=0)
4228  Np->L_thousands_sep = ",";
4229  else
4230  Np->L_thousands_sep = ".";
4231 
4232  /*
4233  * Currency symbol
4234  */
4235  if (lconv->currency_symbol && *lconv->currency_symbol)
4236  Np->L_currency_symbol = lconv->currency_symbol;
4237  else
4238  Np->L_currency_symbol = " ";
4239  }
4240  else
4241  {
4242  /*
4243  * Default values
4244  */
4245  Np->L_negative_sign = "-";
4246  Np->L_positive_sign = "+";
4247  Np->decimal = ".";
4248 
4249  Np->L_thousands_sep = ",";
4250  Np->L_currency_symbol = " ";
4251  }
4252 }
4253 
4254 /* ----------
4255  * Return pointer of last relevant number after decimal point
4256  * 12.0500 --> last relevant is '5'
4257  * 12.0000 --> last relevant is '.'
4258  * If there is no decimal point, return NULL (which will result in same
4259  * behavior as if FM hadn't been specified).
4260  * ----------
4261  */
4262 static char *
4264 {
4265  char *result,
4266  *p = strchr(num, '.');
4267 
4268 #ifdef DEBUG_TO_FROM_CHAR
4269  elog(DEBUG_elog_output, "get_last_relevant_decnum()");
4270 #endif
4271 
4272  if (!p)
4273  return NULL;
4274 
4275  result = p;
4276 
4277  while (*(++p))
4278  {
4279  if (*p != '0')
4280  result = p;
4281  }
4282 
4283  return result;
4284 }
4285 
4286 /*
4287  * These macros are used in NUM_processor() and its subsidiary routines.
4288  * OVERLOAD_TEST: true if we've reached end of input string
4289  * AMOUNT_TEST(s): true if at least s bytes remain in string
4290  */
4291 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
4292 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
4293 
4294 /* ----------
4295  * Number extraction for TO_NUMBER()
4296  * ----------
4297  */
4298 static void
4299 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
4300 {
4301  bool isread = false;
4302 
4303 #ifdef DEBUG_TO_FROM_CHAR
4304  elog(DEBUG_elog_output, " --- scan start --- id=%s",
4305  (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
4306 #endif
4307 
4308  if (OVERLOAD_TEST)
4309  return;
4310 
4311  if (*Np->inout_p == ' ')
4312  Np->inout_p++;
4313 
4314  if (OVERLOAD_TEST)
4315  return;
4316 
4317  /*
4318  * read sign before number
4319  */
4320  if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
4321  (Np->read_pre + Np->read_post) == 0)
4322  {
4323 #ifdef DEBUG_TO_FROM_CHAR
4324  elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
4325  *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
4326 #endif
4327 
4328  /*
4329  * locale sign
4330  */
4331  if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
4332  {
4333  int x = 0;
4334 
4335 #ifdef DEBUG_TO_FROM_CHAR
4336  elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
4337 #endif
4338  if ((x = strlen(Np->L_negative_sign)) &&
4339  AMOUNT_TEST(x) &&
4340  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4341  {
4342  Np->inout_p += x;
4343  *Np->number = '-';
4344  }
4345  else if ((x = strlen(Np->L_positive_sign)) &&
4346  AMOUNT_TEST(x) &&
4347  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4348  {
4349  Np->inout_p += x;
4350  *Np->number = '+';
4351  }
4352  }
4353  else
4354  {
4355 #ifdef DEBUG_TO_FROM_CHAR
4356  elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
4357 #endif
4358 
4359  /*
4360  * simple + - < >
4361  */
4362  if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
4363  *Np->inout_p == '<'))
4364  {
4365  *Np->number = '-'; /* set - */
4366  Np->inout_p++;
4367  }
4368  else if (*Np->inout_p == '+')
4369  {
4370  *Np->number = '+'; /* set + */
4371  Np->inout_p++;
4372  }
4373  }
4374  }
4375 
4376  if (OVERLOAD_TEST)
4377  return;
4378 
4379 #ifdef DEBUG_TO_FROM_CHAR
4380  elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
4381 #endif
4382 
4383  /*
4384  * read digit or decimal point
4385  */
4386  if (isdigit((unsigned char) *Np->inout_p))
4387  {
4388  if (Np->read_dec && Np->read_post == Np->Num->post)
4389  return;
4390 
4391  *Np->number_p = *Np->inout_p;
4392  Np->number_p++;
4393 
4394  if (Np->read_dec)
4395  Np->read_post++;
4396  else
4397  Np->read_pre++;
4398 
4399  isread = true;
4400 
4401 #ifdef DEBUG_TO_FROM_CHAR
4402  elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
4403 #endif
4404  }
4405  else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
4406  {
4407  /*
4408  * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
4409  * Np->decimal is always just "." if we don't have a D format token.
4410  * So we just unconditionally match to Np->decimal.
4411  */
4412  int x = strlen(Np->decimal);
4413 
4414 #ifdef DEBUG_TO_FROM_CHAR
4415  elog(DEBUG_elog_output, "Try read decimal point (%c)",
4416  *Np->inout_p);
4417 #endif
4418  if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
4419  {
4420  Np->inout_p += x - 1;
4421  *Np->number_p = '.';
4422  Np->number_p++;
4423  Np->read_dec = true;
4424  isread = true;
4425  }
4426  }
4427 
4428  if (OVERLOAD_TEST)
4429  return;
4430 
4431  /*
4432  * Read sign behind "last" number
4433  *
4434  * We need sign detection because determine exact position of post-sign is
4435  * difficult:
4436  *
4437  * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
4438  * 5.01-
4439  */
4440  if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
4441  {
4442  /*
4443  * locale sign (NUM_S) is always anchored behind a last number, if: -
4444  * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
4445  * next char is not digit
4446  */
4447  if (IS_LSIGN(Np->Num) && isread &&
4448  (Np->inout_p + 1) < Np->inout + input_len &&
4449  !isdigit((unsigned char) *(Np->inout_p + 1)))
4450  {
4451  int x;
4452  char *tmp = Np->inout_p++;
4453 
4454 #ifdef DEBUG_TO_FROM_CHAR
4455  elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
4456 #endif
4457  if ((x = strlen(Np->L_negative_sign)) &&
4458  AMOUNT_TEST(x) &&
4459  strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
4460  {
4461  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4462  *Np->number = '-';
4463  }
4464  else if ((x = strlen(Np->L_positive_sign)) &&
4465  AMOUNT_TEST(x) &&
4466  strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
4467  {
4468  Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
4469  *Np->number = '+';
4470  }
4471  if (*Np->number == ' ')
4472  /* no sign read */
4473  Np->inout_p = tmp;
4474  }
4475 
4476  /*
4477  * try read non-locale sign, it's happen only if format is not exact
4478  * and we cannot determine sign position of MI/PL/SG, an example:
4479  *
4480  * FM9.999999MI -> 5.01-
4481  *
4482  * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
4483  * like to_number('1 -', '9S') where sign is not anchored to last
4484  * number.
4485  */
4486  else if (isread == false && IS_LSIGN(Np->Num) == false &&
4487  (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
4488  {
4489 #ifdef DEBUG_TO_FROM_CHAR
4490  elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
4491 #endif
4492 
4493  /*
4494  * simple + -
4495  */
4496  if (*Np->inout_p == '-' || *Np->inout_p == '+')
4497  /* NUM_processor() do inout_p++ */
4498  *Np->number = *Np->inout_p;
4499  }
4500  }
4501 }
4502 
4503 #define IS_PREDEC_SPACE(_n) \
4504  (IS_ZERO((_n)->Num)==false && \
4505  (_n)->number == (_n)->number_p && \
4506  *(_n)->number == '0' && \
4507  (_n)->Num->post != 0)
4508 
4509 /* ----------
4510  * Add digit or sign to number-string
4511  * ----------
4512  */
4513 static void
4515 {
4516  int end;
4517 
4518  if (IS_ROMAN(Np->Num))
4519  return;
4520 
4521  /* Note: in this elog() output not set '\0' in 'inout' */
4522 
4523 #ifdef DEBUG_TO_FROM_CHAR
4524 
4525  /*
4526  * Np->num_curr is number of current item in format-picture, it is not
4527  * current position in inout!
4528  */
4529  elog(DEBUG_elog_output,
4530  "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
4531  Np->sign_wrote,
4532  Np->num_curr,
4533  Np->number_p,
4534  Np->inout);
4535 #endif
4536  Np->num_in = false;
4537 
4538  /*
4539  * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
4540  * handle "9.9" --> " .1"
4541  */
4542  if (Np->sign_wrote == false &&
4543  (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
4544  (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
4545  {
4546  if (IS_LSIGN(Np->Num))
4547  {
4548  if (Np->Num->lsign == NUM_LSIGN_PRE)
4549  {
4550  if (Np->sign == '-')
4551  strcpy(Np->inout_p, Np->L_negative_sign);
4552  else
4553  strcpy(Np->inout_p, Np->L_positive_sign);
4554  Np->inout_p += strlen(Np->inout_p);
4555  Np->sign_wrote = true;
4556  }
4557  }
4558  else if (IS_BRACKET(Np->Num))
4559  {
4560  *Np->inout_p = Np->sign == '+' ? ' ' : '<';
4561  ++Np->inout_p;
4562  Np->sign_wrote = true;
4563  }
4564  else if (Np->sign == '+')
4565  {
4566  if (!IS_FILLMODE(Np->Num))
4567  {
4568  *Np->inout_p = ' '; /* Write + */
4569  ++Np->inout_p;
4570  }
4571  Np->sign_wrote = true;
4572  }
4573  else if (Np->sign == '-')
4574  { /* Write - */
4575  *Np->inout_p = '-';
4576  ++Np->inout_p;
4577  Np->sign_wrote = true;
4578  }
4579  }
4580 
4581 
4582  /*
4583  * digits / FM / Zero / Dec. point
4584  */
4585  if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
4586  {
4587  if (Np->num_curr < Np->out_pre_spaces &&
4588  (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
4589  {
4590  /*
4591  * Write blank space
4592  */
4593  if (!IS_FILLMODE(Np->Num))
4594  {
4595  *Np->inout_p = ' '; /* Write ' ' */
4596  ++Np->inout_p;
4597  }
4598  }
4599  else if (IS_ZERO(Np->Num) &&
4600  Np->num_curr < Np->out_pre_spaces &&
4601  Np->Num->zero_start <= Np->num_curr)
4602  {
4603  /*
4604  * Write ZERO
4605  */
4606  *Np->inout_p = '0'; /* Write '0' */
4607  ++Np->inout_p;
4608  Np->num_in = true;
4609  }
4610  else
4611  {
4612  /*
4613  * Write Decimal point
4614  */
4615  if (*Np->number_p == '.')
4616  {
4617  if (!Np->last_relevant || *Np->last_relevant != '.')
4618  {
4619  strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4620  Np->inout_p += strlen(Np->inout_p);
4621  }
4622 
4623  /*
4624  * Ora 'n' -- FM9.9 --> 'n.'
4625  */
4626  else if (IS_FILLMODE(Np->Num) &&
4627  Np->last_relevant && *Np->last_relevant == '.')
4628  {
4629  strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
4630  Np->inout_p += strlen(Np->inout_p);
4631  }
4632  }
4633  else
4634  {
4635  /*
4636  * Write Digits
4637  */
4638  if (Np->last_relevant && Np->number_p > Np->last_relevant &&
4639  id != NUM_0)
4640  ;
4641 
4642  /*
4643  * '0.1' -- 9.9 --> ' .1'
4644  */
4645  else if (IS_PREDEC_SPACE(Np))
4646  {
4647  if (!IS_FILLMODE(Np->Num))
4648  {
4649  *Np->inout_p = ' ';
4650  ++Np->inout_p;
4651  }
4652 
4653  /*
4654  * '0' -- FM9.9 --> '0.'
4655  */
4656  else if (Np->last_relevant && *Np->last_relevant == '.')
4657  {
4658  *Np->inout_p = '0';
4659  ++Np->inout_p;
4660  }
4661  }
4662  else
4663  {
4664  *Np->inout_p = *Np->number_p; /* Write DIGIT */
4665  ++Np->inout_p;
4666  Np->num_in = true;
4667  }
4668  }
4669  /* do no exceed string length */
4670  if (*Np->number_p)
4671  ++Np->number_p;
4672  }
4673 
4674  end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
4675 
4676  if (Np->last_relevant && Np->last_relevant == Np->number_p)
4677  end = Np->num_curr;
4678 
4679  if (Np->num_curr + 1 == end)
4680  {
4681  if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
4682  {
4683  *Np->inout_p = Np->sign == '+' ? ' ' : '>';
4684  ++Np->inout_p;
4685  }
4686  else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
4687  {
4688  if (Np->sign == '-')
4689  strcpy(Np->inout_p, Np->L_negative_sign);
4690  else
4691  strcpy(Np->inout_p, Np->L_positive_sign);
4692  Np->inout_p += strlen(Np->inout_p);
4693  }
4694  }
4695  }
4696 
4697  ++Np->num_curr;
4698 }
4699 
4700 /*
4701  * Skip over "n" input characters, but only if they aren't numeric data
4702  */
4703 static void
4704 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
4705 {
4706  while (n-- > 0)
4707  {
4708  if (OVERLOAD_TEST)
4709  break; /* end of input */
4710  if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
4711  break; /* it's a data character */
4712  Np->inout_p += pg_mblen(Np->inout_p);
4713  }
4714 }
4715 
4716 static char *
4717 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
4718  char *number, int input_len, int to_char_out_pre_spaces,
4719  int sign, bool is_to_char, Oid collid)
4720 {
4721  FormatNode *n;
4722  NUMProc _Np,
4723  *Np = &_Np;
4724  const char *pattern;
4725  int pattern_len;
4726 
4727  MemSet(Np, 0, sizeof(NUMProc));
4728 
4729  Np->Num = Num;
4730  Np->is_to_char = is_to_char;
4731  Np->number = number;
4732  Np->inout = inout;
4733  Np->last_relevant = NULL;
4734  Np->read_post = 0;
4735  Np->read_pre = 0;
4736  Np->read_dec = false;
4737 
4738  if (Np->Num->zero_start)
4739  --Np->Num->zero_start;
4740 
4741  if (IS_EEEE(Np->Num))
4742  {
4743  if (!Np->is_to_char)
4744  ereport(ERROR,
4745  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4746  errmsg("\"EEEE\" not supported for input")));
4747  return strcpy(inout, number);
4748  }
4749 
4750  /*
4751  * Roman correction
4752  */
4753  if (IS_ROMAN(Np->Num))
4754  {
4755  if (!Np->is_to_char)
4756  ereport(ERROR,
4757  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4758  errmsg("\"RN\" not supported for input")));
4759 
4760  Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
4761  Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
4762 
4763  if (IS_FILLMODE(Np->Num))
4764  {
4765  Np->Num->flag = 0;
4766  Np->Num->flag |= NUM_F_FILLMODE;
4767  }
4768  else
4769  Np->Num->flag = 0;
4770  Np->Num->flag |= NUM_F_ROMAN;
4771  }
4772 
4773  /*
4774  * Sign
4775  */
4776  if (is_to_char)
4777  {
4778  Np->sign = sign;
4779 
4780  /* MI/PL/SG - write sign itself and not in number */
4781  if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
4782  {
4783  if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
4784  Np->sign_wrote = false; /* need sign */
4785  else
4786  Np->sign_wrote = true; /* needn't sign */
4787  }
4788  else
4789  {
4790  if (Np->sign != '-')
4791  {
4792  if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
4793  Np->Num->flag &= ~NUM_F_BRACKET;
4794  if (IS_MINUS(Np->Num))
4795  Np->Num->flag &= ~NUM_F_MINUS;
4796  }
4797  else if (Np->sign != '+' && IS_PLUS(Np->Num))
4798  Np->Num->flag &= ~NUM_F_PLUS;
4799 
4800  if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
4801  Np->sign_wrote = true; /* needn't sign */
4802  else
4803  Np->sign_wrote = false; /* need sign */
4804 
4805  if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
4806  Np->Num->lsign = NUM_LSIGN_POST;
4807  }
4808  }
4809  else
4810  Np->sign = false;
4811 
4812  /*
4813  * Count
4814  */
4815  Np->num_count = Np->Num->post + Np->Num->pre - 1;
4816 
4817  if (is_to_char)
4818  {
4819  Np->out_pre_spaces = to_char_out_pre_spaces;
4820 
4821  if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
4822  {
4824 
4825  /*
4826  * If any '0' specifiers are present, make sure we don't strip
4827  * those digits.
4828  */
4829  if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
4830  {
4831  char *last_zero;
4832 
4833  last_zero = Np->number + (Np->Num->zero_end - Np->out_pre_spaces);
4834  if (Np->last_relevant < last_zero)
4835  Np->last_relevant = last_zero;
4836  }
4837  }
4838 
4839  if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
4840  ++Np->num_count;
4841  }
4842  else
4843  {
4844  Np->out_pre_spaces = 0;
4845  *Np->number = ' '; /* sign space */
4846  *(Np->number + 1) = '\0';
4847  }
4848 
4849  Np->num_in = 0;
4850  Np->num_curr = 0;
4851 
4852 #ifdef DEBUG_TO_FROM_CHAR
4853  elog(DEBUG_elog_output,
4854  "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
4855  Np->sign,
4856  Np->number,
4857  Np->Num->pre,
4858  Np->Num->post,
4859  Np->num_count,
4860  Np->out_pre_spaces,
4861  Np->sign_wrote ? "Yes" : "No",
4862  IS_ZERO(Np->Num) ? "Yes" : "No",
4863  Np->Num->zero_start,
4864  Np->Num->zero_end,
4865  Np->last_relevant ? Np->last_relevant : "<not set>",
4866  IS_BRACKET(Np->Num) ? "Yes" : "No",
4867  IS_PLUS(Np->Num) ? "Yes" : "No",
4868  IS_MINUS(Np->Num) ? "Yes" : "No",
4869  IS_FILLMODE(Np->Num) ? "Yes" : "No",
4870  IS_ROMAN(Np->Num) ? "Yes" : "No",
4871  IS_EEEE(Np->Num) ? "Yes" : "No"
4872  );
4873 #endif
4874 
4875  /*
4876  * Locale
4877  */
4878  NUM_prepare_locale(Np);
4879 
4880  /*
4881  * Processor direct cycle
4882  */
4883  if (Np->is_to_char)
4884  Np->number_p = Np->number;
4885  else
4886  Np->number_p = Np->number + 1; /* first char is space for sign */
4887 
4888  for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
4889  {
4890  if (!Np->is_to_char)
4891  {
4892  /*
4893  * Check at least one byte remains to be scanned. (In actions
4894  * below, must use AMOUNT_TEST if we want to read more bytes than
4895  * that.)
4896  */
4897  if (OVERLOAD_TEST)
4898  break;
4899  }
4900 
4901  /*
4902  * Format pictures actions
4903  */
4904  if (n->type == NODE_TYPE_ACTION)
4905  {
4906  /*
4907  * Create/read digit/zero/blank/sign/special-case
4908  *
4909  * 'NUM_S' note: The locale sign is anchored to number and we
4910  * read/write it when we work with first or last number
4911  * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
4912  *
4913  * Notice the "Np->inout_p++" at the bottom of the loop. This is
4914  * why most of the actions advance inout_p one less than you might
4915  * expect. In cases where we don't want that increment to happen,
4916  * a switch case ends with "continue" not "break".
4917  */
4918  switch (n->key->id)
4919  {
4920  case NUM_9:
4921  case NUM_0:
4922  case NUM_DEC:
4923  case NUM_D:
4924  if (Np->is_to_char)
4925  {
4926  NUM_numpart_to_char(Np, n->key->id);
4927  continue; /* for() */
4928  }
4929  else
4930  {
4931  NUM_numpart_from_char(Np, n->key->id, input_len);
4932  break; /* switch() case: */
4933  }
4934 
4935  case NUM_COMMA:
4936  if (Np->is_to_char)
4937  {
4938  if (!Np->num_in)
4939  {
4940  if (IS_FILLMODE(Np->Num))
4941  continue;
4942  else
4943  *Np->inout_p = ' ';
4944  }
4945  else
4946  *Np->inout_p = ',';
4947  }
4948  else
4949  {
4950  if (!Np->num_in)
4951  {
4952  if (IS_FILLMODE(Np->Num))
4953  continue;
4954  }
4955  if (*Np->inout_p != ',')
4956  continue;
4957  }
4958  break;
4959 
4960  case NUM_G:
4961  pattern = Np->L_thousands_sep;
4962  pattern_len = strlen(pattern);
4963  if (Np->is_to_char)
4964  {
4965  if (!Np->num_in)
4966  {
4967  if (IS_FILLMODE(Np->Num))
4968  continue;
4969  else
4970  {
4971  /* just in case there are MB chars */
4972  pattern_len = pg_mbstrlen(pattern);
4973  memset(Np->inout_p, ' ', pattern_len);
4974  Np->inout_p += pattern_len - 1;
4975  }
4976  }
4977  else
4978  {
4979  strcpy(Np->inout_p, pattern);
4980  Np->inout_p += pattern_len - 1;
4981  }
4982  }
4983  else
4984  {
4985  if (!Np->num_in)
4986  {
4987  if (IS_FILLMODE(Np->Num))
4988  continue;
4989  }
4990 
4991  /*
4992  * Because L_thousands_sep typically contains data
4993  * characters (either '.' or ','), we can't use
4994  * NUM_eat_non_data_chars here. Instead skip only if
4995  * the input matches L_thousands_sep.
4996  */
4997  if (AMOUNT_TEST(pattern_len) &&
4998  strncmp(Np->inout_p, pattern, pattern_len) == 0)
4999  Np->inout_p += pattern_len - 1;
5000  else
5001  continue;
5002  }
5003  break;
5004 
5005  case NUM_L:
5006  pattern = Np->L_currency_symbol;
5007  if (Np->is_to_char)
5008  {
5009  strcpy(Np->inout_p, pattern);
5010  Np->inout_p += strlen(pattern) - 1;
5011  }
5012  else
5013  {
5014  NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
5015  continue;
5016  }
5017  break;
5018 
5019  case NUM_RN:
5020  if (IS_FILLMODE(Np->Num))
5021  {
5022  strcpy(Np->inout_p, Np->number_p);
5023  Np->inout_p += strlen(Np->inout_p) - 1;
5024  }
5025  else
5026  {
5027  sprintf(Np->inout_p, "%15s", Np->number_p);
5028  Np->inout_p += strlen(Np->inout_p) - 1;
5029  }
5030  break;
5031 
5032  case NUM_rn:
5033  if (IS_FILLMODE(Np->Num))
5034  {
5035  strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
5036  Np->inout_p += strlen(Np->inout_p) - 1;
5037  }
5038  else
5039  {
5040  sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
5041  Np->inout_p += strlen(Np->inout_p) - 1;
5042  }
5043  break;
5044 
5045  case NUM_th:
5046  if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5047  Np->sign == '-' || IS_DECIMAL(Np->Num))
5048  continue;
5049 
5050  if (Np->is_to_char)
5051  {
5052  strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
5053  Np->inout_p += 1;
5054  }
5055  else
5056  {
5057  /* All variants of 'th' occupy 2 characters */
5058  NUM_eat_non_data_chars(Np, 2, input_len);
5059  continue;
5060  }
5061  break;
5062 
5063  case NUM_TH:
5064  if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
5065  Np->sign == '-' || IS_DECIMAL(Np->Num))
5066  continue;
5067 
5068  if (Np->is_to_char)
5069  {
5070  strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
5071  Np->inout_p += 1;
5072  }
5073  else
5074  {
5075  /* All variants of 'TH' occupy 2 characters */
5076  NUM_eat_non_data_chars(Np, 2, input_len);
5077  continue;
5078  }
5079  break;
5080 
5081  case NUM_MI:
5082  if (Np->is_to_char)
5083  {
5084  if (Np->sign == '-')
5085  *Np->inout_p = '-';
5086  else if (IS_FILLMODE(Np->Num))
5087  continue;
5088  else
5089  *Np->inout_p = ' ';
5090  }
5091  else
5092  {
5093  if (*Np->inout_p == '-')
5094  *Np->number = '-';
5095  else
5096  {
5097  NUM_eat_non_data_chars(Np, 1, input_len);
5098  continue;
5099  }
5100  }
5101  break;
5102 
5103  case NUM_PL:
5104  if (Np->is_to_char)
5105  {
5106  if (Np->sign == '+')
5107  *Np->inout_p = '+';
5108  else if (IS_FILLMODE(Np->Num))
5109  continue;
5110  else
5111  *Np->inout_p = ' ';
5112  }
5113  else
5114  {
5115  if (*Np->inout_p == '+')
5116  *Np->number = '+';
5117  else
5118  {
5119  NUM_eat_non_data_chars(Np, 1, input_len);
5120  continue;
5121  }
5122  }
5123  break;
5124 
5125  case NUM_SG:
5126  if (Np->is_to_char)
5127  *Np->inout_p = Np->sign;
5128  else
5129  {
5130  if (*Np->inout_p == '-')
5131  *Np->number = '-';
5132  else if (*Np->inout_p == '+')
5133  *Np->number = '+';
5134  else
5135  {
5136  NUM_eat_non_data_chars(Np, 1, input_len);
5137  continue;
5138  }
5139  }
5140  break;
5141 
5142  default:
5143  continue;
5144  break;
5145  }
5146  }
5147  else
5148  {
5149  /*
5150  * In TO_CHAR, non-pattern characters in the format are copied to
5151  * the output. In TO_NUMBER, we skip one input character for each
5152  * non-pattern format character, whether or not it matches the
5153  * format character.
5154  */
5155  if (Np->is_to_char)
5156  {
5157  strcpy(Np->inout_p, n->character);
5158  Np->inout_p += strlen(Np->inout_p);
5159  }
5160  else
5161  {
5162  Np->inout_p += pg_mblen(Np->inout_p);
5163  }
5164  continue;
5165  }
5166  Np->inout_p++;
5167  }
5168 
5169  if (Np->is_to_char)
5170  {
5171  *Np->inout_p = '\0';
5172  return Np->inout;
5173  }
5174  else
5175  {
5176  if (*(Np->number_p - 1) == '.')
5177  *(Np->number_p - 1) = '\0';
5178  else
5179  *Np->number_p = '\0';
5180 
5181  /*
5182  * Correction - precision of dec. number
5183  */
5184  Np->Num->post = Np->read_post;
5185 
5186 #ifdef DEBUG_TO_FROM_CHAR
5187  elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
5188 #endif
5189  return Np->number;
5190  }
5191 }
5192 
5193 /* ----------
5194  * MACRO: Start part of NUM - for all NUM's to_char variants
5195  * (sorry, but I hate copy same code - macro is better..)
5196  * ----------
5197  */
5198 #define NUM_TOCHAR_prepare \
5199 do { \
5200  int len = VARSIZE_ANY_EXHDR(fmt); \
5201  if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
5202  PG_RETURN_TEXT_P(cstring_to_text("")); \
5203  result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
5204  format = NUM_cache(len, &Num, fmt, &shouldFree); \
5205 } while (0)
5206 
5207 /* ----------
5208  * MACRO: Finish part of NUM
5209  * ----------
5210  */
5211 #define NUM_TOCHAR_finish \
5212 do { \
5213  int len; \
5214  \
5215  NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
5216  \
5217  if (shouldFree) \
5218  pfree(format); \
5219  \
5220  /* \
5221  * Convert null-terminated representation of result to standard text. \
5222  * The result is usually much bigger than it needs to be, but there \
5223  * seems little point in realloc'ing it smaller. \
5224  */ \
5225  len = strlen(VARDATA(result)); \
5226  SET_VARSIZE(result, len + VARHDRSZ); \
5227 } while (0)
5228 
5229 /* -------------------
5230  * NUMERIC to_number() (convert string to numeric)
5231  * -------------------
5232  */
5233 Datum
5235 {
5237  text *fmt = PG_GETARG_TEXT_PP(1);
5238  NUMDesc Num;
5239  Datum result;
5240  FormatNode *format;
5241  char *numstr;
5242  bool shouldFree;
5243  int len = 0;
5244  int scale,
5245  precision;
5246 
5247  len = VARSIZE_ANY_EXHDR(fmt);
5248 
5249  if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
5250  PG_RETURN_NULL();
5251 
5252  format = NUM_cache(len, &Num, fmt, &shouldFree);
5253 
5254  numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
5255 
5256  NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
5257  VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
5258 
5259  scale = Num.post;
5260  precision = Num.pre + Num.multi + scale;
5261 
5262  if (shouldFree)
5263  pfree(format);
5264 
5266  CStringGetDatum(numstr),
5268  Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
5269 
5270  if (IS_MULTI(&Num))
5271  {
5272  Numeric x;
5274  Int32GetDatum(10)));
5276  Int32GetDatum(-Num.multi)));
5277 
5279  NumericGetDatum(a),