PostgreSQL Source Code git master
tzparser.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * tzparser.c
4 * Functions for parsing timezone offset files
5 *
6 * Note: this code is invoked from the check_hook for the GUC variable
7 * timezone_abbreviations. Therefore, it should report problems using
8 * GUC_check_errmsg() and related functions, and try to avoid throwing
9 * elog(ERROR). This is not completely bulletproof at present --- in
10 * particular out-of-memory will throw an error. Could probably fix with
11 * PG_TRY if necessary.
12 *
13 *
14 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
15 * Portions Copyright (c) 1994, Regents of the University of California
16 *
17 * IDENTIFICATION
18 * src/backend/utils/misc/tzparser.c
19 *
20 *-------------------------------------------------------------------------
21 */
22
23#include "postgres.h"
24
25#include <ctype.h>
26
27#include "miscadmin.h"
28#include "storage/fd.h"
29#include "utils/datetime.h"
30#include "utils/guc.h"
31#include "utils/memutils.h"
32#include "utils/tzparser.h"
33
34
35#define WHITESPACE " \t\n\r"
36
37static bool validateTzEntry(tzEntry *tzentry);
38static bool splitTzLine(const char *filename, int lineno,
39 char *line, tzEntry *tzentry);
40static int addToArray(tzEntry **base, int *arraysize, int n,
41 tzEntry *entry, bool override);
42static int ParseTzFile(const char *filename, int depth,
43 tzEntry **base, int *arraysize, int n);
44
45
46/*
47 * Apply additional validation checks to a tzEntry
48 *
49 * Returns true if OK, else false
50 */
51static bool
53{
54 unsigned char *p;
55
56 /*
57 * Check restrictions imposed by datetktbl storage format (see datetime.c)
58 */
59 if (strlen(tzentry->abbrev) > TOKMAXLEN)
60 {
61 GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
62 tzentry->abbrev, TOKMAXLEN,
63 tzentry->filename, tzentry->lineno);
64 return false;
65 }
66
67 /*
68 * Sanity-check the offset: shouldn't exceed 14 hours
69 */
70 if (tzentry->offset > 14 * SECS_PER_HOUR ||
71 tzentry->offset < -14 * SECS_PER_HOUR)
72 {
73 GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
74 tzentry->offset,
75 tzentry->filename, tzentry->lineno);
76 return false;
77 }
78
79 /*
80 * Convert abbrev to lowercase (must match datetime.c's conversion)
81 */
82 for (p = (unsigned char *) tzentry->abbrev; *p; p++)
83 *p = pg_tolower(*p);
84
85 return true;
86}
87
88/*
89 * Attempt to parse the line as a timezone abbrev spec
90 *
91 * Valid formats are:
92 * name zone
93 * name offset dst
94 *
95 * Returns true if OK, else false; data is stored in *tzentry
96 */
97static bool
98splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
99{
100 char *brkl;
101 char *abbrev;
102 char *offset;
103 char *offset_endptr;
104 char *remain;
105 char *is_dst;
106
107 tzentry->lineno = lineno;
108 tzentry->filename = filename;
109
110 abbrev = strtok_r(line, WHITESPACE, &brkl);
111 if (!abbrev)
112 {
113 GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
114 filename, lineno);
115 return false;
116 }
117 tzentry->abbrev = pstrdup(abbrev);
118
119 offset = strtok_r(NULL, WHITESPACE, &brkl);
120 if (!offset)
121 {
122 GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
123 filename, lineno);
124 return false;
125 }
126
127 /* We assume zone names don't begin with a digit or sign */
128 if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
129 {
130 tzentry->zone = NULL;
131 tzentry->offset = strtol(offset, &offset_endptr, 10);
132 if (offset_endptr == offset || *offset_endptr != '\0')
133 {
134 GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
135 filename, lineno);
136 return false;
137 }
138
139 is_dst = strtok_r(NULL, WHITESPACE, &brkl);
140 if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
141 {
142 tzentry->is_dst = true;
143 remain = strtok_r(NULL, WHITESPACE, &brkl);
144 }
145 else
146 {
147 /* there was no 'D' dst specifier */
148 tzentry->is_dst = false;
149 remain = is_dst;
150 }
151 }
152 else
153 {
154 /*
155 * Assume entry is a zone name. We do not try to validate it by
156 * looking up the zone, because that would force loading of a lot of
157 * zones that probably will never be used in the current session.
158 */
159 tzentry->zone = pstrdup(offset);
160 tzentry->offset = 0 * SECS_PER_HOUR;
161 tzentry->is_dst = false;
162 remain = strtok_r(NULL, WHITESPACE, &brkl);
163 }
164
165 if (!remain) /* no more non-whitespace chars */
166 return true;
167
168 if (remain[0] != '#') /* must be a comment */
169 {
170 GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
171 filename, lineno);
172 return false;
173 }
174 return true;
175}
176
177/*
178 * Insert entry into sorted array
179 *
180 * *base: base address of array (changeable if must enlarge array)
181 * *arraysize: allocated length of array (changeable if must enlarge array)
182 * n: current number of valid elements in array
183 * entry: new data to insert
184 * override: true if OK to override
185 *
186 * Returns the new array length (new value for n), or -1 if error
187 */
188static int
189addToArray(tzEntry **base, int *arraysize, int n,
190 tzEntry *entry, bool override)
191{
192 tzEntry *arrayptr;
193 int low;
194 int high;
195
196 /*
197 * Search the array for a duplicate; as a useful side effect, the array is
198 * maintained in sorted order. We use strcmp() to ensure we match the
199 * sort order datetime.c expects.
200 */
201 arrayptr = *base;
202 low = 0;
203 high = n - 1;
204 while (low <= high)
205 {
206 int mid = (low + high) >> 1;
207 tzEntry *midptr = arrayptr + mid;
208 int cmp;
209
210 cmp = strcmp(entry->abbrev, midptr->abbrev);
211 if (cmp < 0)
212 high = mid - 1;
213 else if (cmp > 0)
214 low = mid + 1;
215 else
216 {
217 /*
218 * Found a duplicate entry; complain unless it's the same.
219 */
220 if ((midptr->zone == NULL && entry->zone == NULL &&
221 midptr->offset == entry->offset &&
222 midptr->is_dst == entry->is_dst) ||
223 (midptr->zone != NULL && entry->zone != NULL &&
224 strcmp(midptr->zone, entry->zone) == 0))
225 {
226 /* return unchanged array */
227 return n;
228 }
229 if (override)
230 {
231 /* same abbrev but something is different, override */
232 midptr->zone = entry->zone;
233 midptr->offset = entry->offset;
234 midptr->is_dst = entry->is_dst;
235 return n;
236 }
237 /* same abbrev but something is different, complain */
238 GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
239 entry->abbrev);
240 GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
241 midptr->filename, midptr->lineno,
242 entry->filename, entry->lineno);
243 return -1;
244 }
245 }
246
247 /*
248 * No match, insert at position "low".
249 */
250 if (n >= *arraysize)
251 {
252 *arraysize *= 2;
253 *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
254 }
255
256 arrayptr = *base + low;
257
258 memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
259
260 memcpy(arrayptr, entry, sizeof(tzEntry));
261
262 return n + 1;
263}
264
265/*
266 * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
267 *
268 * filename: user-specified file name (does not include path)
269 * depth: current recursion depth
270 * *base: array for results (changeable if must enlarge array)
271 * *arraysize: allocated length of array (changeable if must enlarge array)
272 * n: current number of valid elements in array
273 *
274 * Returns the new array length (new value for n), or -1 if error
275 */
276static int
277ParseTzFile(const char *filename, int depth,
278 tzEntry **base, int *arraysize, int n)
279{
280 char share_path[MAXPGPATH];
281 char file_path[MAXPGPATH];
282 FILE *tzFile;
283 char tzbuf[1024];
284 char *line;
285 tzEntry tzentry;
286 int lineno = 0;
287 bool override = false;
288 const char *p;
289
290 /*
291 * We enforce that the filename is all alpha characters. This may be
292 * overly restrictive, but we don't want to allow access to anything
293 * outside the timezonesets directory, so for instance '/' *must* be
294 * rejected.
295 */
296 for (p = filename; *p; p++)
297 {
298 if (!isalpha((unsigned char) *p))
299 {
300 /* at level 0, just use guc.c's regular "invalid value" message */
301 if (depth > 0)
302 GUC_check_errmsg("invalid time zone file name \"%s\"",
303 filename);
304 return -1;
305 }
306 }
307
308 /*
309 * The maximal recursion depth is a pretty arbitrary setting. It is hard
310 * to imagine that someone needs more than 3 levels so stick with this
311 * conservative setting until someone complains.
312 */
313 if (depth > 3)
314 {
315 GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
316 filename);
317 return -1;
318 }
319
321 snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
323 tzFile = AllocateFile(file_path, "r");
324 if (!tzFile)
325 {
326 /*
327 * Check to see if the problem is not the filename but the directory.
328 * This is worth troubling over because if the installation share/
329 * directory is missing or unreadable, this is likely to be the first
330 * place we notice a problem during postmaster startup.
331 */
332 int save_errno = errno;
333 DIR *tzdir;
334
335 snprintf(file_path, sizeof(file_path), "%s/timezonesets",
336 share_path);
337 tzdir = AllocateDir(file_path);
338 if (tzdir == NULL)
339 {
340 GUC_check_errmsg("could not open directory \"%s\": %m",
341 file_path);
342 GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
344 return -1;
345 }
346 FreeDir(tzdir);
347 errno = save_errno;
348
349 /*
350 * otherwise, if file doesn't exist and it's level 0, guc.c's
351 * complaint is enough
352 */
353 if (errno != ENOENT || depth > 0)
354 GUC_check_errmsg("could not read time zone file \"%s\": %m",
355 filename);
356
357 return -1;
358 }
359
360 while (!feof(tzFile))
361 {
362 lineno++;
363 if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
364 {
365 if (ferror(tzFile))
366 {
367 GUC_check_errmsg("could not read time zone file \"%s\": %m",
368 filename);
369 n = -1;
370 break;
371 }
372 /* else we're at EOF after all */
373 break;
374 }
375 if (strlen(tzbuf) == sizeof(tzbuf) - 1)
376 {
377 /* the line is too long for tzbuf */
378 GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
379 filename, lineno);
380 n = -1;
381 break;
382 }
383
384 /* skip over whitespace */
385 line = tzbuf;
386 while (*line && isspace((unsigned char) *line))
387 line++;
388
389 if (*line == '\0') /* empty line */
390 continue;
391 if (*line == '#') /* comment line */
392 continue;
393
394 if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
395 {
396 /* pstrdup so we can use filename in result data structure */
397 char *includeFile = pstrdup(line + strlen("@INCLUDE"));
398 char *brki;
399
400 includeFile = strtok_r(includeFile, WHITESPACE, &brki);
401 if (!includeFile || !*includeFile)
402 {
403 GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
404 filename, lineno);
405 n = -1;
406 break;
407 }
408 n = ParseTzFile(includeFile, depth + 1,
409 base, arraysize, n);
410 if (n < 0)
411 break;
412 continue;
413 }
414
415 if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
416 {
417 override = true;
418 continue;
419 }
420
421 if (!splitTzLine(filename, lineno, line, &tzentry))
422 {
423 n = -1;
424 break;
425 }
426 if (!validateTzEntry(&tzentry))
427 {
428 n = -1;
429 break;
430 }
431 n = addToArray(base, arraysize, n, &tzentry, override);
432 if (n < 0)
433 break;
434 }
435
436 FreeFile(tzFile);
437
438 return n;
439}
440
441/*
442 * load_tzoffsets --- read and parse the specified timezone offset file
443 *
444 * On success, return a filled-in TimeZoneAbbrevTable, which must have been
445 * guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
446 * and friends to give details of the problem.
447 */
450{
451 TimeZoneAbbrevTable *result = NULL;
452 MemoryContext tmpContext;
453 MemoryContext oldContext;
454 tzEntry *array;
455 int arraysize;
456 int n;
457
458 /*
459 * Create a temp memory context to work in. This makes it easy to clean
460 * up afterwards.
461 */
463 "TZParserMemory",
465 oldContext = MemoryContextSwitchTo(tmpContext);
466
467 /* Initialize array at a reasonable size */
468 arraysize = 128;
469 array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
470
471 /* Parse the file(s) */
472 n = ParseTzFile(filename, 0, &array, &arraysize, 0);
473
474 /* If no errors so far, let datetime.c allocate memory & convert format */
475 if (n >= 0)
476 {
477 result = ConvertTimeZoneAbbrevs(array, n);
478 if (!result)
479 GUC_check_errmsg("out of memory");
480 }
481
482 /* Clean up */
483 MemoryContextSwitchTo(oldContext);
484 MemoryContextDelete(tmpContext);
485
486 return result;
487}
TimeZoneAbbrevTable * ConvertTimeZoneAbbrevs(struct tzEntry *abbrevs, int n)
Definition: datetime.c:4989
#define SECS_PER_HOUR
Definition: timestamp.h:127
int FreeDir(DIR *dir)
Definition: fd.c:2983
int FreeFile(FILE *file)
Definition: fd.c:2803
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2865
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2605
char my_exec_path[MAXPGPATH]
Definition: globals.c:80
#define GUC_check_errmsg
Definition: guc.h:476
#define GUC_check_errdetail
Definition: guc.h:480
#define GUC_check_errhint
Definition: guc.h:484
#define TOKMAXLEN
Definition: datetime.h:204
static char * share_path
Definition: initdb.c:135
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:170
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define MAXPGPATH
static char * filename
Definition: pg_dumpall.c:119
void get_share_path(const char *my_exec_path, char *ret_path)
Definition: path.c:902
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define snprintf
Definition: port.h:239
unsigned char pg_tolower(unsigned char ch)
Definition: pgstrcasecmp.c:122
int pg_strncasecmp(const char *s1, const char *s2, size_t n)
Definition: pgstrcasecmp.c:69
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
Definition: dirent.c:26
const char * filename
Definition: tzparser.h:33
int lineno
Definition: tzparser.h:32
char * zone
Definition: tzparser.h:27
int offset
Definition: tzparser.h:29
char * abbrev
Definition: tzparser.h:26
bool is_dst
Definition: tzparser.h:30
static bool validateTzEntry(tzEntry *tzentry)
Definition: tzparser.c:52
static bool splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
Definition: tzparser.c:98
static int addToArray(tzEntry **base, int *arraysize, int n, tzEntry *entry, bool override)
Definition: tzparser.c:189
TimeZoneAbbrevTable * load_tzoffsets(const char *filename)
Definition: tzparser.c:449
#define WHITESPACE
Definition: tzparser.c:35
static int ParseTzFile(const char *filename, int depth, tzEntry **base, int *arraysize, int n)
Definition: tzparser.c:277
#define strtok_r
Definition: win32_port.h:411