PostgreSQL Source Code  git master
tzparser.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tzparser.c
4  * Functions for parsing timezone offset files
5  *
6  * Note: this code is invoked from the check_hook for the GUC variable
7  * timezone_abbreviations. Therefore, it should report problems using
8  * GUC_check_errmsg() and related functions, and try to avoid throwing
9  * elog(ERROR). This is not completely bulletproof at present --- in
10  * particular out-of-memory will throw an error. Could probably fix with
11  * PG_TRY if necessary.
12  *
13  *
14  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  * IDENTIFICATION
18  * src/backend/utils/misc/tzparser.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 #include "postgres.h"
24 
25 #include <ctype.h>
26 
27 #include "miscadmin.h"
28 #include "storage/fd.h"
29 #include "utils/datetime.h"
30 #include "utils/guc.h"
31 #include "utils/memutils.h"
32 #include "utils/tzparser.h"
33 
34 
35 #define WHITESPACE " \t\n\r"
36 
37 static bool validateTzEntry(tzEntry *tzentry);
38 static bool splitTzLine(const char *filename, int lineno,
39  char *line, tzEntry *tzentry);
40 static int addToArray(tzEntry **base, int *arraysize, int n,
41  tzEntry *entry, bool override);
42 static int ParseTzFile(const char *filename, int depth,
43  tzEntry **base, int *arraysize, int n);
44 
45 
46 /*
47  * Apply additional validation checks to a tzEntry
48  *
49  * Returns true if OK, else false
50  */
51 static bool
53 {
54  unsigned char *p;
55 
56  /*
57  * Check restrictions imposed by datetktbl storage format (see datetime.c)
58  */
59  if (strlen(tzentry->abbrev) > TOKMAXLEN)
60  {
61  GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
62  tzentry->abbrev, TOKMAXLEN,
63  tzentry->filename, tzentry->lineno);
64  return false;
65  }
66 
67  /*
68  * Sanity-check the offset: shouldn't exceed 14 hours
69  */
70  if (tzentry->offset > 14 * SECS_PER_HOUR ||
71  tzentry->offset < -14 * SECS_PER_HOUR)
72  {
73  GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
74  tzentry->offset,
75  tzentry->filename, tzentry->lineno);
76  return false;
77  }
78 
79  /*
80  * Convert abbrev to lowercase (must match datetime.c's conversion)
81  */
82  for (p = (unsigned char *) tzentry->abbrev; *p; p++)
83  *p = pg_tolower(*p);
84 
85  return true;
86 }
87 
88 /*
89  * Attempt to parse the line as a timezone abbrev spec
90  *
91  * Valid formats are:
92  * name zone
93  * name offset dst
94  *
95  * Returns true if OK, else false; data is stored in *tzentry
96  */
97 static bool
98 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
99 {
100  char *brkl;
101  char *abbrev;
102  char *offset;
103  char *offset_endptr;
104  char *remain;
105  char *is_dst;
106 
107  tzentry->lineno = lineno;
108  tzentry->filename = filename;
109 
110  abbrev = strtok_r(line, WHITESPACE, &brkl);
111  if (!abbrev)
112  {
113  GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
114  filename, lineno);
115  return false;
116  }
117  tzentry->abbrev = pstrdup(abbrev);
118 
119  offset = strtok_r(NULL, WHITESPACE, &brkl);
120  if (!offset)
121  {
122  GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
123  filename, lineno);
124  return false;
125  }
126 
127  /* We assume zone names don't begin with a digit or sign */
128  if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
129  {
130  tzentry->zone = NULL;
131  tzentry->offset = strtol(offset, &offset_endptr, 10);
132  if (offset_endptr == offset || *offset_endptr != '\0')
133  {
134  GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
135  filename, lineno);
136  return false;
137  }
138 
139  is_dst = strtok_r(NULL, WHITESPACE, &brkl);
140  if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
141  {
142  tzentry->is_dst = true;
143  remain = strtok_r(NULL, WHITESPACE, &brkl);
144  }
145  else
146  {
147  /* there was no 'D' dst specifier */
148  tzentry->is_dst = false;
149  remain = is_dst;
150  }
151  }
152  else
153  {
154  /*
155  * Assume entry is a zone name. We do not try to validate it by
156  * looking up the zone, because that would force loading of a lot of
157  * zones that probably will never be used in the current session.
158  */
159  tzentry->zone = pstrdup(offset);
160  tzentry->offset = 0 * SECS_PER_HOUR;
161  tzentry->is_dst = false;
162  remain = strtok_r(NULL, WHITESPACE, &brkl);
163  }
164 
165  if (!remain) /* no more non-whitespace chars */
166  return true;
167 
168  if (remain[0] != '#') /* must be a comment */
169  {
170  GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
171  filename, lineno);
172  return false;
173  }
174  return true;
175 }
176 
177 /*
178  * Insert entry into sorted array
179  *
180  * *base: base address of array (changeable if must enlarge array)
181  * *arraysize: allocated length of array (changeable if must enlarge array)
182  * n: current number of valid elements in array
183  * entry: new data to insert
184  * override: true if OK to override
185  *
186  * Returns the new array length (new value for n), or -1 if error
187  */
188 static int
189 addToArray(tzEntry **base, int *arraysize, int n,
190  tzEntry *entry, bool override)
191 {
192  tzEntry *arrayptr;
193  int low;
194  int high;
195 
196  /*
197  * Search the array for a duplicate; as a useful side effect, the array is
198  * maintained in sorted order. We use strcmp() to ensure we match the
199  * sort order datetime.c expects.
200  */
201  arrayptr = *base;
202  low = 0;
203  high = n - 1;
204  while (low <= high)
205  {
206  int mid = (low + high) >> 1;
207  tzEntry *midptr = arrayptr + mid;
208  int cmp;
209 
210  cmp = strcmp(entry->abbrev, midptr->abbrev);
211  if (cmp < 0)
212  high = mid - 1;
213  else if (cmp > 0)
214  low = mid + 1;
215  else
216  {
217  /*
218  * Found a duplicate entry; complain unless it's the same.
219  */
220  if ((midptr->zone == NULL && entry->zone == NULL &&
221  midptr->offset == entry->offset &&
222  midptr->is_dst == entry->is_dst) ||
223  (midptr->zone != NULL && entry->zone != NULL &&
224  strcmp(midptr->zone, entry->zone) == 0))
225  {
226  /* return unchanged array */
227  return n;
228  }
229  if (override)
230  {
231  /* same abbrev but something is different, override */
232  midptr->zone = entry->zone;
233  midptr->offset = entry->offset;
234  midptr->is_dst = entry->is_dst;
235  return n;
236  }
237  /* same abbrev but something is different, complain */
238  GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
239  entry->abbrev);
240  GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
241  midptr->filename, midptr->lineno,
242  entry->filename, entry->lineno);
243  return -1;
244  }
245  }
246 
247  /*
248  * No match, insert at position "low".
249  */
250  if (n >= *arraysize)
251  {
252  *arraysize *= 2;
253  *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
254  }
255 
256  arrayptr = *base + low;
257 
258  memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
259 
260  memcpy(arrayptr, entry, sizeof(tzEntry));
261 
262  return n + 1;
263 }
264 
265 /*
266  * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
267  *
268  * filename: user-specified file name (does not include path)
269  * depth: current recursion depth
270  * *base: array for results (changeable if must enlarge array)
271  * *arraysize: allocated length of array (changeable if must enlarge array)
272  * n: current number of valid elements in array
273  *
274  * Returns the new array length (new value for n), or -1 if error
275  */
276 static int
277 ParseTzFile(const char *filename, int depth,
278  tzEntry **base, int *arraysize, int n)
279 {
280  char share_path[MAXPGPATH];
281  char file_path[MAXPGPATH];
282  FILE *tzFile;
283  char tzbuf[1024];
284  char *line;
285  tzEntry tzentry;
286  int lineno = 0;
287  bool override = false;
288  const char *p;
289 
290  /*
291  * We enforce that the filename is all alpha characters. This may be
292  * overly restrictive, but we don't want to allow access to anything
293  * outside the timezonesets directory, so for instance '/' *must* be
294  * rejected.
295  */
296  for (p = filename; *p; p++)
297  {
298  if (!isalpha((unsigned char) *p))
299  {
300  /* at level 0, just use guc.c's regular "invalid value" message */
301  if (depth > 0)
302  GUC_check_errmsg("invalid time zone file name \"%s\"",
303  filename);
304  return -1;
305  }
306  }
307 
308  /*
309  * The maximal recursion depth is a pretty arbitrary setting. It is hard
310  * to imagine that someone needs more than 3 levels so stick with this
311  * conservative setting until someone complains.
312  */
313  if (depth > 3)
314  {
315  GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
316  filename);
317  return -1;
318  }
319 
321  snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
323  tzFile = AllocateFile(file_path, "r");
324  if (!tzFile)
325  {
326  /*
327  * Check to see if the problem is not the filename but the directory.
328  * This is worth troubling over because if the installation share/
329  * directory is missing or unreadable, this is likely to be the first
330  * place we notice a problem during postmaster startup.
331  */
332  int save_errno = errno;
333  DIR *tzdir;
334 
335  snprintf(file_path, sizeof(file_path), "%s/timezonesets",
336  share_path);
337  tzdir = AllocateDir(file_path);
338  if (tzdir == NULL)
339  {
340  GUC_check_errmsg("could not open directory \"%s\": %m",
341  file_path);
342  GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
343  my_exec_path);
344  return -1;
345  }
346  FreeDir(tzdir);
347  errno = save_errno;
348 
349  /*
350  * otherwise, if file doesn't exist and it's level 0, guc.c's
351  * complaint is enough
352  */
353  if (errno != ENOENT || depth > 0)
354  GUC_check_errmsg("could not read time zone file \"%s\": %m",
355  filename);
356 
357  return -1;
358  }
359 
360  while (!feof(tzFile))
361  {
362  lineno++;
363  if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
364  {
365  if (ferror(tzFile))
366  {
367  GUC_check_errmsg("could not read time zone file \"%s\": %m",
368  filename);
369  n = -1;
370  break;
371  }
372  /* else we're at EOF after all */
373  break;
374  }
375  if (strlen(tzbuf) == sizeof(tzbuf) - 1)
376  {
377  /* the line is too long for tzbuf */
378  GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
379  filename, lineno);
380  n = -1;
381  break;
382  }
383 
384  /* skip over whitespace */
385  line = tzbuf;
386  while (*line && isspace((unsigned char) *line))
387  line++;
388 
389  if (*line == '\0') /* empty line */
390  continue;
391  if (*line == '#') /* comment line */
392  continue;
393 
394  if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
395  {
396  /* pstrdup so we can use filename in result data structure */
397  char *includeFile = pstrdup(line + strlen("@INCLUDE"));
398  char *brki;
399 
400  includeFile = strtok_r(includeFile, WHITESPACE, &brki);
401  if (!includeFile || !*includeFile)
402  {
403  GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
404  filename, lineno);
405  n = -1;
406  break;
407  }
408  n = ParseTzFile(includeFile, depth + 1,
409  base, arraysize, n);
410  if (n < 0)
411  break;
412  continue;
413  }
414 
415  if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
416  {
417  override = true;
418  continue;
419  }
420 
421  if (!splitTzLine(filename, lineno, line, &tzentry))
422  {
423  n = -1;
424  break;
425  }
426  if (!validateTzEntry(&tzentry))
427  {
428  n = -1;
429  break;
430  }
431  n = addToArray(base, arraysize, n, &tzentry, override);
432  if (n < 0)
433  break;
434  }
435 
436  FreeFile(tzFile);
437 
438  return n;
439 }
440 
441 /*
442  * load_tzoffsets --- read and parse the specified timezone offset file
443  *
444  * On success, return a filled-in TimeZoneAbbrevTable, which must have been
445  * guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
446  * and friends to give details of the problem.
447  */
450 {
451  TimeZoneAbbrevTable *result = NULL;
452  MemoryContext tmpContext;
453  MemoryContext oldContext;
454  tzEntry *array;
455  int arraysize;
456  int n;
457 
458  /*
459  * Create a temp memory context to work in. This makes it easy to clean
460  * up afterwards.
461  */
463  "TZParserMemory",
465  oldContext = MemoryContextSwitchTo(tmpContext);
466 
467  /* Initialize array at a reasonable size */
468  arraysize = 128;
469  array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
470 
471  /* Parse the file(s) */
472  n = ParseTzFile(filename, 0, &array, &arraysize, 0);
473 
474  /* If no errors so far, let datetime.c allocate memory & convert format */
475  if (n >= 0)
476  {
477  result = ConvertTimeZoneAbbrevs(array, n);
478  if (!result)
479  GUC_check_errmsg("out of memory");
480  }
481 
482  /* Clean up */
483  MemoryContextSwitchTo(oldContext);
484  MemoryContextDelete(tmpContext);
485 
486  return result;
487 }
TimeZoneAbbrevTable * ConvertTimeZoneAbbrevs(struct tzEntry *abbrevs, int n)
Definition: datetime.c:4873
#define SECS_PER_HOUR
Definition: timestamp.h:127
int FreeDir(DIR *dir)
Definition: fd.c:2984
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2606
int FreeFile(FILE *file)
Definition: fd.c:2804
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2866
char my_exec_path[MAXPGPATH]
Definition: globals.c:80
#define GUC_check_errmsg
Definition: guc.h:472
#define GUC_check_errdetail
Definition: guc.h:476
#define GUC_check_errhint
Definition: guc.h:480
#define TOKMAXLEN
Definition: datetime.h:204
static char * share_path
Definition: initdb.c:135
char * pstrdup(const char *in)
Definition: mcxt.c:1696
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
void * palloc(Size size)
Definition: mcxt.c:1317
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:170
#define MAXPGPATH
static char * filename
Definition: pg_dumpall.c:119
void get_share_path(const char *my_exec_path, char *ret_path)
Definition: path.c:825
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define snprintf
Definition: port.h:238
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
MemoryContextSwitchTo(old_ctx)
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
Definition: dirent.c:26
const char * filename
Definition: tzparser.h:33
int lineno
Definition: tzparser.h:32
char * zone
Definition: tzparser.h:27
int offset
Definition: tzparser.h:29
char * abbrev
Definition: tzparser.h:26
bool is_dst
Definition: tzparser.h:30
static bool validateTzEntry(tzEntry *tzentry)
Definition: tzparser.c:52
static bool splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
Definition: tzparser.c:98
TimeZoneAbbrevTable * load_tzoffsets(const char *filename)
Definition: tzparser.c:449
static int addToArray(tzEntry **base, int *arraysize, int n, tzEntry *entry, bool override)
Definition: tzparser.c:189
#define WHITESPACE
Definition: tzparser.c:35
static int ParseTzFile(const char *filename, int depth, tzEntry **base, int *arraysize, int n)
Definition: tzparser.c:277
#define strtok_r
Definition: win32_port.h:421