PostgreSQL Source Code  git master
tzparser.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tzparser.c
4  * Functions for parsing timezone offset files
5  *
6  * Note: this code is invoked from the check_hook for the GUC variable
7  * timezone_abbreviations. Therefore, it should report problems using
8  * GUC_check_errmsg() and related functions, and try to avoid throwing
9  * elog(ERROR). This is not completely bulletproof at present --- in
10  * particular out-of-memory will throw an error. Could probably fix with
11  * PG_TRY if necessary.
12  *
13  *
14  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  * IDENTIFICATION
18  * src/backend/utils/misc/tzparser.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 #include "postgres.h"
24 
25 #include <ctype.h>
26 
27 #include "miscadmin.h"
28 #include "storage/fd.h"
29 #include "utils/guc.h"
30 #include "utils/memutils.h"
31 #include "utils/tzparser.h"
32 
33 
34 #define WHITESPACE " \t\n\r"
35 
36 static bool validateTzEntry(tzEntry *tzentry);
37 static bool splitTzLine(const char *filename, int lineno,
38  char *line, tzEntry *tzentry);
39 static int addToArray(tzEntry **base, int *arraysize, int n,
40  tzEntry *entry, bool override);
41 static int ParseTzFile(const char *filename, int depth,
42  tzEntry **base, int *arraysize, int n);
43 
44 
45 /*
46  * Apply additional validation checks to a tzEntry
47  *
48  * Returns true if OK, else false
49  */
50 static bool
52 {
53  unsigned char *p;
54 
55  /*
56  * Check restrictions imposed by datetktbl storage format (see datetime.c)
57  */
58  if (strlen(tzentry->abbrev) > TOKMAXLEN)
59  {
60  GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
61  tzentry->abbrev, TOKMAXLEN,
62  tzentry->filename, tzentry->lineno);
63  return false;
64  }
65 
66  /*
67  * Sanity-check the offset: shouldn't exceed 14 hours
68  */
69  if (tzentry->offset > 14 * 60 * 60 ||
70  tzentry->offset < -14 * 60 * 60)
71  {
72  GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
73  tzentry->offset,
74  tzentry->filename, tzentry->lineno);
75  return false;
76  }
77 
78  /*
79  * Convert abbrev to lowercase (must match datetime.c's conversion)
80  */
81  for (p = (unsigned char *) tzentry->abbrev; *p; p++)
82  *p = pg_tolower(*p);
83 
84  return true;
85 }
86 
87 /*
88  * Attempt to parse the line as a timezone abbrev spec
89  *
90  * Valid formats are:
91  * name zone
92  * name offset dst
93  *
94  * Returns true if OK, else false; data is stored in *tzentry
95  */
96 static bool
97 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
98 {
99  char *abbrev;
100  char *offset;
101  char *offset_endptr;
102  char *remain;
103  char *is_dst;
104 
105  tzentry->lineno = lineno;
106  tzentry->filename = filename;
107 
108  abbrev = strtok(line, WHITESPACE);
109  if (!abbrev)
110  {
111  GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
112  filename, lineno);
113  return false;
114  }
115  tzentry->abbrev = pstrdup(abbrev);
116 
117  offset = strtok(NULL, WHITESPACE);
118  if (!offset)
119  {
120  GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
121  filename, lineno);
122  return false;
123  }
124 
125  /* We assume zone names don't begin with a digit or sign */
126  if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
127  {
128  tzentry->zone = NULL;
129  tzentry->offset = strtol(offset, &offset_endptr, 10);
130  if (offset_endptr == offset || *offset_endptr != '\0')
131  {
132  GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
133  filename, lineno);
134  return false;
135  }
136 
137  is_dst = strtok(NULL, WHITESPACE);
138  if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
139  {
140  tzentry->is_dst = true;
141  remain = strtok(NULL, WHITESPACE);
142  }
143  else
144  {
145  /* there was no 'D' dst specifier */
146  tzentry->is_dst = false;
147  remain = is_dst;
148  }
149  }
150  else
151  {
152  /*
153  * Assume entry is a zone name. We do not try to validate it by
154  * looking up the zone, because that would force loading of a lot of
155  * zones that probably will never be used in the current session.
156  */
157  tzentry->zone = pstrdup(offset);
158  tzentry->offset = 0;
159  tzentry->is_dst = false;
160  remain = strtok(NULL, WHITESPACE);
161  }
162 
163  if (!remain) /* no more non-whitespace chars */
164  return true;
165 
166  if (remain[0] != '#') /* must be a comment */
167  {
168  GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
169  filename, lineno);
170  return false;
171  }
172  return true;
173 }
174 
175 /*
176  * Insert entry into sorted array
177  *
178  * *base: base address of array (changeable if must enlarge array)
179  * *arraysize: allocated length of array (changeable if must enlarge array)
180  * n: current number of valid elements in array
181  * entry: new data to insert
182  * override: true if OK to override
183  *
184  * Returns the new array length (new value for n), or -1 if error
185  */
186 static int
187 addToArray(tzEntry **base, int *arraysize, int n,
188  tzEntry *entry, bool override)
189 {
190  tzEntry *arrayptr;
191  int low;
192  int high;
193 
194  /*
195  * Search the array for a duplicate; as a useful side effect, the array is
196  * maintained in sorted order. We use strcmp() to ensure we match the
197  * sort order datetime.c expects.
198  */
199  arrayptr = *base;
200  low = 0;
201  high = n - 1;
202  while (low <= high)
203  {
204  int mid = (low + high) >> 1;
205  tzEntry *midptr = arrayptr + mid;
206  int cmp;
207 
208  cmp = strcmp(entry->abbrev, midptr->abbrev);
209  if (cmp < 0)
210  high = mid - 1;
211  else if (cmp > 0)
212  low = mid + 1;
213  else
214  {
215  /*
216  * Found a duplicate entry; complain unless it's the same.
217  */
218  if ((midptr->zone == NULL && entry->zone == NULL &&
219  midptr->offset == entry->offset &&
220  midptr->is_dst == entry->is_dst) ||
221  (midptr->zone != NULL && entry->zone != NULL &&
222  strcmp(midptr->zone, entry->zone) == 0))
223  {
224  /* return unchanged array */
225  return n;
226  }
227  if (override)
228  {
229  /* same abbrev but something is different, override */
230  midptr->zone = entry->zone;
231  midptr->offset = entry->offset;
232  midptr->is_dst = entry->is_dst;
233  return n;
234  }
235  /* same abbrev but something is different, complain */
236  GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
237  entry->abbrev);
238  GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
239  midptr->filename, midptr->lineno,
240  entry->filename, entry->lineno);
241  return -1;
242  }
243  }
244 
245  /*
246  * No match, insert at position "low".
247  */
248  if (n >= *arraysize)
249  {
250  *arraysize *= 2;
251  *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
252  }
253 
254  arrayptr = *base + low;
255 
256  memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
257 
258  memcpy(arrayptr, entry, sizeof(tzEntry));
259 
260  return n + 1;
261 }
262 
263 /*
264  * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
265  *
266  * filename: user-specified file name (does not include path)
267  * depth: current recursion depth
268  * *base: array for results (changeable if must enlarge array)
269  * *arraysize: allocated length of array (changeable if must enlarge array)
270  * n: current number of valid elements in array
271  *
272  * Returns the new array length (new value for n), or -1 if error
273  */
274 static int
275 ParseTzFile(const char *filename, int depth,
276  tzEntry **base, int *arraysize, int n)
277 {
278  char share_path[MAXPGPATH];
279  char file_path[MAXPGPATH];
280  FILE *tzFile;
281  char tzbuf[1024];
282  char *line;
283  tzEntry tzentry;
284  int lineno = 0;
285  bool override = false;
286  const char *p;
287 
288  /*
289  * We enforce that the filename is all alpha characters. This may be
290  * overly restrictive, but we don't want to allow access to anything
291  * outside the timezonesets directory, so for instance '/' *must* be
292  * rejected.
293  */
294  for (p = filename; *p; p++)
295  {
296  if (!isalpha((unsigned char) *p))
297  {
298  /* at level 0, just use guc.c's regular "invalid value" message */
299  if (depth > 0)
300  GUC_check_errmsg("invalid time zone file name \"%s\"",
301  filename);
302  return -1;
303  }
304  }
305 
306  /*
307  * The maximal recursion depth is a pretty arbitrary setting. It is hard
308  * to imagine that someone needs more than 3 levels so stick with this
309  * conservative setting until someone complains.
310  */
311  if (depth > 3)
312  {
313  GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
314  filename);
315  return -1;
316  }
317 
319  snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
321  tzFile = AllocateFile(file_path, "r");
322  if (!tzFile)
323  {
324  /*
325  * Check to see if the problem is not the filename but the directory.
326  * This is worth troubling over because if the installation share/
327  * directory is missing or unreadable, this is likely to be the first
328  * place we notice a problem during postmaster startup.
329  */
330  int save_errno = errno;
331  DIR *tzdir;
332 
333  snprintf(file_path, sizeof(file_path), "%s/timezonesets",
334  share_path);
335  tzdir = AllocateDir(file_path);
336  if (tzdir == NULL)
337  {
338  GUC_check_errmsg("could not open directory \"%s\": %m",
339  file_path);
340  GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
341  my_exec_path);
342  return -1;
343  }
344  FreeDir(tzdir);
345  errno = save_errno;
346 
347  /*
348  * otherwise, if file doesn't exist and it's level 0, guc.c's
349  * complaint is enough
350  */
351  if (errno != ENOENT || depth > 0)
352  GUC_check_errmsg("could not read time zone file \"%s\": %m",
353  filename);
354 
355  return -1;
356  }
357 
358  while (!feof(tzFile))
359  {
360  lineno++;
361  if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
362  {
363  if (ferror(tzFile))
364  {
365  GUC_check_errmsg("could not read time zone file \"%s\": %m",
366  filename);
367  n = -1;
368  break;
369  }
370  /* else we're at EOF after all */
371  break;
372  }
373  if (strlen(tzbuf) == sizeof(tzbuf) - 1)
374  {
375  /* the line is too long for tzbuf */
376  GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
377  filename, lineno);
378  n = -1;
379  break;
380  }
381 
382  /* skip over whitespace */
383  line = tzbuf;
384  while (*line && isspace((unsigned char) *line))
385  line++;
386 
387  if (*line == '\0') /* empty line */
388  continue;
389  if (*line == '#') /* comment line */
390  continue;
391 
392  if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
393  {
394  /* pstrdup so we can use filename in result data structure */
395  char *includeFile = pstrdup(line + strlen("@INCLUDE"));
396 
397  includeFile = strtok(includeFile, WHITESPACE);
398  if (!includeFile || !*includeFile)
399  {
400  GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
401  filename, lineno);
402  n = -1;
403  break;
404  }
405  n = ParseTzFile(includeFile, depth + 1,
406  base, arraysize, n);
407  if (n < 0)
408  break;
409  continue;
410  }
411 
412  if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
413  {
414  override = true;
415  continue;
416  }
417 
418  if (!splitTzLine(filename, lineno, line, &tzentry))
419  {
420  n = -1;
421  break;
422  }
423  if (!validateTzEntry(&tzentry))
424  {
425  n = -1;
426  break;
427  }
428  n = addToArray(base, arraysize, n, &tzentry, override);
429  if (n < 0)
430  break;
431  }
432 
433  FreeFile(tzFile);
434 
435  return n;
436 }
437 
438 /*
439  * load_tzoffsets --- read and parse the specified timezone offset file
440  *
441  * On success, return a filled-in TimeZoneAbbrevTable, which must have been
442  * guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
443  * and friends to give details of the problem.
444  */
447 {
448  TimeZoneAbbrevTable *result = NULL;
449  MemoryContext tmpContext;
450  MemoryContext oldContext;
451  tzEntry *array;
452  int arraysize;
453  int n;
454 
455  /*
456  * Create a temp memory context to work in. This makes it easy to clean
457  * up afterwards.
458  */
460  "TZParserMemory",
462  oldContext = MemoryContextSwitchTo(tmpContext);
463 
464  /* Initialize array at a reasonable size */
465  arraysize = 128;
466  array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
467 
468  /* Parse the file(s) */
469  n = ParseTzFile(filename, 0, &array, &arraysize, 0);
470 
471  /* If no errors so far, let datetime.c allocate memory & convert format */
472  if (n >= 0)
473  {
474  result = ConvertTimeZoneAbbrevs(array, n);
475  if (!result)
476  GUC_check_errmsg("out of memory");
477  }
478 
479  /* Clean up */
480  MemoryContextSwitchTo(oldContext);
481  MemoryContextDelete(tmpContext);
482 
483  return result;
484 }
TimeZoneAbbrevTable * ConvertTimeZoneAbbrevs(struct tzEntry *abbrevs, int n)
Definition: datetime.c:4748
int FreeDir(DIR *dir)
Definition: fd.c:2858
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2480
int FreeFile(FILE *file)
Definition: fd.c:2678
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2740
char my_exec_path[MAXPGPATH]
Definition: globals.c:76
#define GUC_check_errmsg
Definition: guc.h:430
#define GUC_check_errdetail
Definition: guc.h:434
#define GUC_check_errhint
Definition: guc.h:438
#define TOKMAXLEN
Definition: datetime.h:204
static char * share_path
Definition: initdb.c:134
char * pstrdup(const char *in)
Definition: mcxt.c:1644
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1476
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:403
void * palloc(Size size)
Definition: mcxt.c:1226
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:163
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
#define MAXPGPATH
static char * filename
Definition: pg_dumpall.c:119
void get_share_path(const char *my_exec_path, char *ret_path)
Definition: path.c:825
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define snprintf
Definition: port.h:238
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
Definition: dirent.c:26
const char * filename
Definition: tzparser.h:33
int lineno
Definition: tzparser.h:32
char * zone
Definition: tzparser.h:27
int offset
Definition: tzparser.h:29
char * abbrev
Definition: tzparser.h:26
bool is_dst
Definition: tzparser.h:30
static bool validateTzEntry(tzEntry *tzentry)
Definition: tzparser.c:51
static bool splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
Definition: tzparser.c:97
TimeZoneAbbrevTable * load_tzoffsets(const char *filename)
Definition: tzparser.c:446
static int addToArray(tzEntry **base, int *arraysize, int n, tzEntry *entry, bool override)
Definition: tzparser.c:187
#define WHITESPACE
Definition: tzparser.c:34
static int ParseTzFile(const char *filename, int depth, tzEntry **base, int *arraysize, int n)
Definition: tzparser.c:275