PostgreSQL Source Code  git master
tzparser.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tzparser.c
4  * Functions for parsing timezone offset files
5  *
6  * Note: this code is invoked from the check_hook for the GUC variable
7  * timezone_abbreviations. Therefore, it should report problems using
8  * GUC_check_errmsg() and related functions, and try to avoid throwing
9  * elog(ERROR). This is not completely bulletproof at present --- in
10  * particular out-of-memory will throw an error. Could probably fix with
11  * PG_TRY if necessary.
12  *
13  *
14  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  * IDENTIFICATION
18  * src/backend/utils/misc/tzparser.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 #include "postgres.h"
24 
25 #include <ctype.h>
26 
27 #include "miscadmin.h"
28 #include "storage/fd.h"
29 #include "utils/datetime.h"
30 #include "utils/guc.h"
31 #include "utils/memutils.h"
32 #include "utils/tzparser.h"
33 
34 
35 #define WHITESPACE " \t\n\r"
36 
37 static bool validateTzEntry(tzEntry *tzentry);
38 static bool splitTzLine(const char *filename, int lineno,
39  char *line, tzEntry *tzentry);
40 static int addToArray(tzEntry **base, int *arraysize, int n,
41  tzEntry *entry, bool override);
42 static int ParseTzFile(const char *filename, int depth,
43  tzEntry **base, int *arraysize, int n);
44 
45 
46 /*
47  * Apply additional validation checks to a tzEntry
48  *
49  * Returns true if OK, else false
50  */
51 static bool
53 {
54  unsigned char *p;
55 
56  /*
57  * Check restrictions imposed by datetktbl storage format (see datetime.c)
58  */
59  if (strlen(tzentry->abbrev) > TOKMAXLEN)
60  {
61  GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
62  tzentry->abbrev, TOKMAXLEN,
63  tzentry->filename, tzentry->lineno);
64  return false;
65  }
66 
67  /*
68  * Sanity-check the offset: shouldn't exceed 14 hours
69  */
70  if (tzentry->offset > 14 * SECS_PER_HOUR ||
71  tzentry->offset < -14 * SECS_PER_HOUR)
72  {
73  GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
74  tzentry->offset,
75  tzentry->filename, tzentry->lineno);
76  return false;
77  }
78 
79  /*
80  * Convert abbrev to lowercase (must match datetime.c's conversion)
81  */
82  for (p = (unsigned char *) tzentry->abbrev; *p; p++)
83  *p = pg_tolower(*p);
84 
85  return true;
86 }
87 
88 /*
89  * Attempt to parse the line as a timezone abbrev spec
90  *
91  * Valid formats are:
92  * name zone
93  * name offset dst
94  *
95  * Returns true if OK, else false; data is stored in *tzentry
96  */
97 static bool
98 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
99 {
100  char *abbrev;
101  char *offset;
102  char *offset_endptr;
103  char *remain;
104  char *is_dst;
105 
106  tzentry->lineno = lineno;
107  tzentry->filename = filename;
108 
109  abbrev = strtok(line, WHITESPACE);
110  if (!abbrev)
111  {
112  GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
113  filename, lineno);
114  return false;
115  }
116  tzentry->abbrev = pstrdup(abbrev);
117 
118  offset = strtok(NULL, WHITESPACE);
119  if (!offset)
120  {
121  GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
122  filename, lineno);
123  return false;
124  }
125 
126  /* We assume zone names don't begin with a digit or sign */
127  if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
128  {
129  tzentry->zone = NULL;
130  tzentry->offset = strtol(offset, &offset_endptr, 10);
131  if (offset_endptr == offset || *offset_endptr != '\0')
132  {
133  GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
134  filename, lineno);
135  return false;
136  }
137 
138  is_dst = strtok(NULL, WHITESPACE);
139  if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
140  {
141  tzentry->is_dst = true;
142  remain = strtok(NULL, WHITESPACE);
143  }
144  else
145  {
146  /* there was no 'D' dst specifier */
147  tzentry->is_dst = false;
148  remain = is_dst;
149  }
150  }
151  else
152  {
153  /*
154  * Assume entry is a zone name. We do not try to validate it by
155  * looking up the zone, because that would force loading of a lot of
156  * zones that probably will never be used in the current session.
157  */
158  tzentry->zone = pstrdup(offset);
159  tzentry->offset = 0 * SECS_PER_HOUR;
160  tzentry->is_dst = false;
161  remain = strtok(NULL, WHITESPACE);
162  }
163 
164  if (!remain) /* no more non-whitespace chars */
165  return true;
166 
167  if (remain[0] != '#') /* must be a comment */
168  {
169  GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
170  filename, lineno);
171  return false;
172  }
173  return true;
174 }
175 
176 /*
177  * Insert entry into sorted array
178  *
179  * *base: base address of array (changeable if must enlarge array)
180  * *arraysize: allocated length of array (changeable if must enlarge array)
181  * n: current number of valid elements in array
182  * entry: new data to insert
183  * override: true if OK to override
184  *
185  * Returns the new array length (new value for n), or -1 if error
186  */
187 static int
188 addToArray(tzEntry **base, int *arraysize, int n,
189  tzEntry *entry, bool override)
190 {
191  tzEntry *arrayptr;
192  int low;
193  int high;
194 
195  /*
196  * Search the array for a duplicate; as a useful side effect, the array is
197  * maintained in sorted order. We use strcmp() to ensure we match the
198  * sort order datetime.c expects.
199  */
200  arrayptr = *base;
201  low = 0;
202  high = n - 1;
203  while (low <= high)
204  {
205  int mid = (low + high) >> 1;
206  tzEntry *midptr = arrayptr + mid;
207  int cmp;
208 
209  cmp = strcmp(entry->abbrev, midptr->abbrev);
210  if (cmp < 0)
211  high = mid - 1;
212  else if (cmp > 0)
213  low = mid + 1;
214  else
215  {
216  /*
217  * Found a duplicate entry; complain unless it's the same.
218  */
219  if ((midptr->zone == NULL && entry->zone == NULL &&
220  midptr->offset == entry->offset &&
221  midptr->is_dst == entry->is_dst) ||
222  (midptr->zone != NULL && entry->zone != NULL &&
223  strcmp(midptr->zone, entry->zone) == 0))
224  {
225  /* return unchanged array */
226  return n;
227  }
228  if (override)
229  {
230  /* same abbrev but something is different, override */
231  midptr->zone = entry->zone;
232  midptr->offset = entry->offset;
233  midptr->is_dst = entry->is_dst;
234  return n;
235  }
236  /* same abbrev but something is different, complain */
237  GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
238  entry->abbrev);
239  GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
240  midptr->filename, midptr->lineno,
241  entry->filename, entry->lineno);
242  return -1;
243  }
244  }
245 
246  /*
247  * No match, insert at position "low".
248  */
249  if (n >= *arraysize)
250  {
251  *arraysize *= 2;
252  *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
253  }
254 
255  arrayptr = *base + low;
256 
257  memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
258 
259  memcpy(arrayptr, entry, sizeof(tzEntry));
260 
261  return n + 1;
262 }
263 
264 /*
265  * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
266  *
267  * filename: user-specified file name (does not include path)
268  * depth: current recursion depth
269  * *base: array for results (changeable if must enlarge array)
270  * *arraysize: allocated length of array (changeable if must enlarge array)
271  * n: current number of valid elements in array
272  *
273  * Returns the new array length (new value for n), or -1 if error
274  */
275 static int
276 ParseTzFile(const char *filename, int depth,
277  tzEntry **base, int *arraysize, int n)
278 {
279  char share_path[MAXPGPATH];
280  char file_path[MAXPGPATH];
281  FILE *tzFile;
282  char tzbuf[1024];
283  char *line;
284  tzEntry tzentry;
285  int lineno = 0;
286  bool override = false;
287  const char *p;
288 
289  /*
290  * We enforce that the filename is all alpha characters. This may be
291  * overly restrictive, but we don't want to allow access to anything
292  * outside the timezonesets directory, so for instance '/' *must* be
293  * rejected.
294  */
295  for (p = filename; *p; p++)
296  {
297  if (!isalpha((unsigned char) *p))
298  {
299  /* at level 0, just use guc.c's regular "invalid value" message */
300  if (depth > 0)
301  GUC_check_errmsg("invalid time zone file name \"%s\"",
302  filename);
303  return -1;
304  }
305  }
306 
307  /*
308  * The maximal recursion depth is a pretty arbitrary setting. It is hard
309  * to imagine that someone needs more than 3 levels so stick with this
310  * conservative setting until someone complains.
311  */
312  if (depth > 3)
313  {
314  GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
315  filename);
316  return -1;
317  }
318 
320  snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
322  tzFile = AllocateFile(file_path, "r");
323  if (!tzFile)
324  {
325  /*
326  * Check to see if the problem is not the filename but the directory.
327  * This is worth troubling over because if the installation share/
328  * directory is missing or unreadable, this is likely to be the first
329  * place we notice a problem during postmaster startup.
330  */
331  int save_errno = errno;
332  DIR *tzdir;
333 
334  snprintf(file_path, sizeof(file_path), "%s/timezonesets",
335  share_path);
336  tzdir = AllocateDir(file_path);
337  if (tzdir == NULL)
338  {
339  GUC_check_errmsg("could not open directory \"%s\": %m",
340  file_path);
341  GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
342  my_exec_path);
343  return -1;
344  }
345  FreeDir(tzdir);
346  errno = save_errno;
347 
348  /*
349  * otherwise, if file doesn't exist and it's level 0, guc.c's
350  * complaint is enough
351  */
352  if (errno != ENOENT || depth > 0)
353  GUC_check_errmsg("could not read time zone file \"%s\": %m",
354  filename);
355 
356  return -1;
357  }
358 
359  while (!feof(tzFile))
360  {
361  lineno++;
362  if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
363  {
364  if (ferror(tzFile))
365  {
366  GUC_check_errmsg("could not read time zone file \"%s\": %m",
367  filename);
368  n = -1;
369  break;
370  }
371  /* else we're at EOF after all */
372  break;
373  }
374  if (strlen(tzbuf) == sizeof(tzbuf) - 1)
375  {
376  /* the line is too long for tzbuf */
377  GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
378  filename, lineno);
379  n = -1;
380  break;
381  }
382 
383  /* skip over whitespace */
384  line = tzbuf;
385  while (*line && isspace((unsigned char) *line))
386  line++;
387 
388  if (*line == '\0') /* empty line */
389  continue;
390  if (*line == '#') /* comment line */
391  continue;
392 
393  if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
394  {
395  /* pstrdup so we can use filename in result data structure */
396  char *includeFile = pstrdup(line + strlen("@INCLUDE"));
397 
398  includeFile = strtok(includeFile, WHITESPACE);
399  if (!includeFile || !*includeFile)
400  {
401  GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
402  filename, lineno);
403  n = -1;
404  break;
405  }
406  n = ParseTzFile(includeFile, depth + 1,
407  base, arraysize, n);
408  if (n < 0)
409  break;
410  continue;
411  }
412 
413  if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
414  {
415  override = true;
416  continue;
417  }
418 
419  if (!splitTzLine(filename, lineno, line, &tzentry))
420  {
421  n = -1;
422  break;
423  }
424  if (!validateTzEntry(&tzentry))
425  {
426  n = -1;
427  break;
428  }
429  n = addToArray(base, arraysize, n, &tzentry, override);
430  if (n < 0)
431  break;
432  }
433 
434  FreeFile(tzFile);
435 
436  return n;
437 }
438 
439 /*
440  * load_tzoffsets --- read and parse the specified timezone offset file
441  *
442  * On success, return a filled-in TimeZoneAbbrevTable, which must have been
443  * guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
444  * and friends to give details of the problem.
445  */
448 {
449  TimeZoneAbbrevTable *result = NULL;
450  MemoryContext tmpContext;
451  MemoryContext oldContext;
452  tzEntry *array;
453  int arraysize;
454  int n;
455 
456  /*
457  * Create a temp memory context to work in. This makes it easy to clean
458  * up afterwards.
459  */
461  "TZParserMemory",
463  oldContext = MemoryContextSwitchTo(tmpContext);
464 
465  /* Initialize array at a reasonable size */
466  arraysize = 128;
467  array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
468 
469  /* Parse the file(s) */
470  n = ParseTzFile(filename, 0, &array, &arraysize, 0);
471 
472  /* If no errors so far, let datetime.c allocate memory & convert format */
473  if (n >= 0)
474  {
475  result = ConvertTimeZoneAbbrevs(array, n);
476  if (!result)
477  GUC_check_errmsg("out of memory");
478  }
479 
480  /* Clean up */
481  MemoryContextSwitchTo(oldContext);
482  MemoryContextDelete(tmpContext);
483 
484  return result;
485 }
TimeZoneAbbrevTable * ConvertTimeZoneAbbrevs(struct tzEntry *abbrevs, int n)
Definition: datetime.c:4862
#define SECS_PER_HOUR
Definition: timestamp.h:127
int FreeDir(DIR *dir)
Definition: fd.c:2961
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2583
int FreeFile(FILE *file)
Definition: fd.c:2781
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2843
char my_exec_path[MAXPGPATH]
Definition: globals.c:78
#define GUC_check_errmsg
Definition: guc.h:443
#define GUC_check_errdetail
Definition: guc.h:447
#define GUC_check_errhint
Definition: guc.h:451
#define TOKMAXLEN
Definition: datetime.h:204
static char * share_path
Definition: initdb.c:135
char * pstrdup(const char *in)
Definition: mcxt.c:1683
MemoryContext CurrentMemoryContext
Definition: mcxt.c:131
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1528
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:442
void * palloc(Size size)
Definition: mcxt.c:1304
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:163
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define MAXPGPATH
static char * filename
Definition: pg_dumpall.c:121
void get_share_path(const char *my_exec_path, char *ret_path)
Definition: path.c:824
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define snprintf
Definition: port.h:238
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
Definition: dirent.c:26
const char * filename
Definition: tzparser.h:33
int lineno
Definition: tzparser.h:32
char * zone
Definition: tzparser.h:27
int offset
Definition: tzparser.h:29
char * abbrev
Definition: tzparser.h:26
bool is_dst
Definition: tzparser.h:30
static bool validateTzEntry(tzEntry *tzentry)
Definition: tzparser.c:52
static bool splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
Definition: tzparser.c:98
TimeZoneAbbrevTable * load_tzoffsets(const char *filename)
Definition: tzparser.c:447
static int addToArray(tzEntry **base, int *arraysize, int n, tzEntry *entry, bool override)
Definition: tzparser.c:188
#define WHITESPACE
Definition: tzparser.c:35
static int ParseTzFile(const char *filename, int depth, tzEntry **base, int *arraysize, int n)
Definition: tzparser.c:276