PostgreSQL Source Code git master
Loading...
Searching...
No Matches
compression.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * compression.c
4 *
5 * Shared code for compression methods and specifications.
6 *
7 * A compression specification specifies the parameters that should be used
8 * when performing compression with a specific algorithm. The simplest
9 * possible compression specification is an integer, which sets the
10 * compression level.
11 *
12 * Otherwise, a compression specification is a comma-separated list of items,
13 * each having the form keyword or keyword=value.
14 *
15 * Currently, the supported keywords are "level", "long", and "workers".
16 *
17 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
18 *
19 * IDENTIFICATION
20 * src/common/compression.c
21 *-------------------------------------------------------------------------
22 */
23
24#ifndef FRONTEND
25#include "postgres.h"
26#else
27#include "postgres_fe.h"
28#endif
29
30#ifdef USE_ZSTD
31#include <zstd.h>
32#endif
33#ifdef HAVE_LIBZ
34#include <zlib.h>
35#endif
36
37#include "common/compression.h"
38
39static int expect_integer_value(char *keyword, char *value,
41static bool expect_boolean_value(char *keyword, char *value,
43
44/*
45 * Look up a compression algorithm by archive file extension. Returns true and
46 * sets *algorithm if the extension is recognized. Otherwise returns false.
47 */
48bool
50{
51 size_t fname_len = strlen(fname);
52
53 if (fname_len >= 4 &&
54 strcmp(fname + fname_len - 4, ".tar") == 0)
55 *algorithm = PG_COMPRESSION_NONE;
56 else if (fname_len >= 4 &&
57 strcmp(fname + fname_len - 4, ".tgz") == 0)
58 *algorithm = PG_COMPRESSION_GZIP;
59 else if (fname_len >= 7 &&
60 strcmp(fname + fname_len - 7, ".tar.gz") == 0)
61 *algorithm = PG_COMPRESSION_GZIP;
62 else if (fname_len >= 8 &&
63 strcmp(fname + fname_len - 8, ".tar.lz4") == 0)
64 *algorithm = PG_COMPRESSION_LZ4;
65 else if (fname_len >= 8 &&
66 strcmp(fname + fname_len - 8, ".tar.zst") == 0)
67 *algorithm = PG_COMPRESSION_ZSTD;
68 else
69 return false;
70
71 return true;
72}
73
74/*
75 * Look up a compression algorithm by name. Returns true and sets *algorithm
76 * if the name is recognized. Otherwise returns false.
77 */
78bool
80{
81 if (strcmp(name, "none") == 0)
82 *algorithm = PG_COMPRESSION_NONE;
83 else if (strcmp(name, "gzip") == 0)
84 *algorithm = PG_COMPRESSION_GZIP;
85 else if (strcmp(name, "lz4") == 0)
86 *algorithm = PG_COMPRESSION_LZ4;
87 else if (strcmp(name, "zstd") == 0)
88 *algorithm = PG_COMPRESSION_ZSTD;
89 else
90 return false;
91 return true;
92}
93
94/*
95 * Get the human-readable name corresponding to a particular compression
96 * algorithm.
97 */
98const char *
100{
101 switch (algorithm)
102 {
104 return "none";
106 return "gzip";
108 return "lz4";
110 return "zstd";
111 /* no default, to provoke compiler warnings if values are added */
112 }
113 Assert(false);
114 return "???"; /* placate compiler */
115}
116
117/*
118 * Parse a compression specification for a specified algorithm.
119 *
120 * See the file header comments for a brief description of what a compression
121 * specification is expected to look like.
122 *
123 * On return, all fields of the result object will be initialized.
124 * In particular, result->parse_error will be NULL if no errors occurred
125 * during parsing, and will otherwise contain an appropriate error message.
126 * The caller may free this error message string using pfree, if desired.
127 * Note, however, even if there's no parse error, the string might not make
128 * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
129 *
130 * The compression level is assigned by default if not directly specified
131 * by the specification.
132 *
133 * Use validate_compress_specification() to find out whether a compression
134 * specification is semantically sensible.
135 */
136void
139{
140 int bare_level;
141 char *bare_level_endp;
142
143 /* Initial setup of result object. */
144 result->algorithm = algorithm;
145 result->options = 0;
146 result->parse_error = NULL;
147
148 /*
149 * Assign a default level depending on the compression method. This may
150 * be enforced later.
151 */
152 switch (result->algorithm)
153 {
155 result->level = 0;
156 break;
158#ifdef USE_LZ4
159 result->level = 0; /* fast compression mode */
160#else
161 result->parse_error =
162 psprintf(_("this build does not support compression with %s"),
163 "LZ4");
164#endif
165 break;
167#ifdef USE_ZSTD
168 result->level = ZSTD_CLEVEL_DEFAULT;
169#else
170 result->parse_error =
171 psprintf(_("this build does not support compression with %s"),
172 "ZSTD");
173#endif
174 break;
176#ifdef HAVE_LIBZ
178#else
179 result->parse_error =
180 psprintf(_("this build does not support compression with %s"),
181 "gzip");
182#endif
183 break;
184 }
185
186 /* If there is no specification, we're done already. */
187 if (specification == NULL)
188 return;
189
190 /* As a special case, the specification can be a bare integer. */
193 {
194 result->level = bare_level;
195 return;
196 }
197
198 /* Look for comma-separated keyword or keyword=value entries. */
199 while (1)
200 {
201 char *kwstart;
202 char *kwend;
203 char *vstart;
204 char *vend;
205 int kwlen;
206 int vlen;
207 bool has_value;
208 char *keyword;
209 char *value;
210
211 /* Figure start, end, and length of next keyword and any value. */
213 while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
214 ++kwend;
215 kwlen = kwend - kwstart;
216 if (*kwend != '=')
217 {
218 vstart = vend = NULL;
219 vlen = 0;
220 has_value = false;
221 }
222 else
223 {
224 vstart = vend = kwend + 1;
225 while (*vend != '\0' && *vend != ',')
226 ++vend;
227 vlen = vend - vstart;
228 has_value = true;
229 }
230
231 /* Reject empty keyword. */
232 if (kwlen == 0)
233 {
234 result->parse_error =
235 pstrdup(_("found empty string where a compression option was expected"));
236 break;
237 }
238
239 /* Extract keyword and value as separate C strings. */
240 keyword = palloc(kwlen + 1);
241 memcpy(keyword, kwstart, kwlen);
242 keyword[kwlen] = '\0';
243 if (!has_value)
244 value = NULL;
245 else
246 {
247 value = palloc(vlen + 1);
249 value[vlen] = '\0';
250 }
251
252 /* Handle whatever keyword we found. */
253 if (strcmp(keyword, "level") == 0)
254 {
255 result->level = expect_integer_value(keyword, value, result);
256
257 /*
258 * No need to set a flag in "options", there is a default level
259 * set at least thanks to the logic above.
260 */
261 }
262 else if (strcmp(keyword, "workers") == 0)
263 {
264 result->workers = expect_integer_value(keyword, value, result);
266 }
267 else if (strcmp(keyword, "long") == 0)
268 {
269 result->long_distance = expect_boolean_value(keyword, value, result);
271 }
272 else
273 result->parse_error =
274 psprintf(_("unrecognized compression option: \"%s\""), keyword);
275
276 /* Release memory, just to be tidy. */
277 pfree(keyword);
278 if (value != NULL)
279 pfree(value);
280
281 /*
282 * If we got an error or have reached the end of the string, stop.
283 *
284 * If there is no value, then the end of the keyword might have been
285 * the end of the string. If there is a value, then the end of the
286 * keyword cannot have been the end of the string, but the end of the
287 * value might have been.
288 */
289 if (result->parse_error != NULL ||
290 (vend == NULL ? *kwend == '\0' : *vend == '\0'))
291 break;
292
293 /* Advance to next entry and loop around. */
294 specification = vend == NULL ? kwend + 1 : vend + 1;
295 }
296}
297
298/*
299 * Parse 'value' as an integer and return the result.
300 *
301 * If parsing fails, set result->parse_error to an appropriate message
302 * and return -1.
303 */
304static int
306{
307 int ivalue;
308 char *ivalue_endp;
309
310 if (value == NULL)
311 {
312 result->parse_error =
313 psprintf(_("compression option \"%s\" requires a value"),
314 keyword);
315 return -1;
316 }
317
319 if (ivalue_endp == value || *ivalue_endp != '\0')
320 {
321 result->parse_error =
322 psprintf(_("value for compression option \"%s\" must be an integer"),
323 keyword);
324 return -1;
325 }
326 return ivalue;
327}
328
329/*
330 * Parse 'value' as a boolean and return the result.
331 *
332 * If parsing fails, set result->parse_error to an appropriate message
333 * and return -1. The caller must check result->parse_error to determine if
334 * the call was successful.
335 *
336 * Valid values are: yes, no, on, off, 1, 0.
337 *
338 * Inspired by ParseVariableBool().
339 */
340static bool
342{
343 if (value == NULL)
344 return true;
345
346 if (pg_strcasecmp(value, "yes") == 0)
347 return true;
348 if (pg_strcasecmp(value, "on") == 0)
349 return true;
350 if (pg_strcasecmp(value, "1") == 0)
351 return true;
352
353 if (pg_strcasecmp(value, "no") == 0)
354 return false;
355 if (pg_strcasecmp(value, "off") == 0)
356 return false;
357 if (pg_strcasecmp(value, "0") == 0)
358 return false;
359
360 result->parse_error =
361 psprintf(_("value for compression option \"%s\" must be a Boolean value"),
362 keyword);
363 return false;
364}
365
366/*
367 * Returns NULL if the compression specification string was syntactically
368 * valid and semantically sensible. Otherwise, returns an error message.
369 *
370 * Does not test whether this build of PostgreSQL supports the requested
371 * compression method.
372 */
373char *
375{
376 int min_level = 1;
377 int max_level = 1;
378 int default_level = 0;
379
380 /* If it didn't even parse OK, it's definitely no good. */
381 if (spec->parse_error != NULL)
382 return spec->parse_error;
383
384 /*
385 * Check that the algorithm expects a compression level and it is within
386 * the legal range for the algorithm.
387 */
388 switch (spec->algorithm)
389 {
391 max_level = 9;
392#ifdef HAVE_LIBZ
394#endif
395 break;
397 max_level = 12;
398 default_level = 0; /* fast mode */
399 break;
401#ifdef USE_ZSTD
405#endif
406 break;
408 if (spec->level != 0)
409 return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
411 break;
412 }
413
414 if ((spec->level < min_level || spec->level > max_level) &&
415 spec->level != default_level)
416 return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
419
420 /*
421 * Of the compression algorithms that we currently support, only zstd
422 * allows parallel workers.
423 */
424 if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
425 (spec->algorithm != PG_COMPRESSION_ZSTD))
426 {
427 return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
429 }
430
431 /*
432 * Of the compression algorithms that we currently support, only zstd
433 * supports long-distance mode.
434 */
435 if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
436 (spec->algorithm != PG_COMPRESSION_ZSTD))
437 {
438 return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
440 }
441
442 return NULL;
443}
444
445#ifdef FRONTEND
446
447/*
448 * Basic parsing of a value specified through a command-line option, commonly
449 * -Z/--compress.
450 *
451 * The parsing consists of a METHOD:DETAIL string fed later to
452 * parse_compress_specification(). This only extracts METHOD and DETAIL.
453 * If only an integer is found, the method is implied by the value specified.
454 */
455void
456parse_compress_options(const char *option, char **algorithm, char **detail)
457{
458 const char *sep;
459 char *endp;
460 long result;
461
462 /*
463 * Check whether the compression specification consists of a bare integer.
464 *
465 * For backward-compatibility, assume "none" if the integer found is zero
466 * and "gzip" otherwise.
467 */
468 result = strtol(option, &endp, 10);
469 if (*endp == '\0')
470 {
471 if (result == 0)
472 {
473 *algorithm = pstrdup("none");
474 *detail = NULL;
475 }
476 else
477 {
478 *algorithm = pstrdup("gzip");
479 *detail = pstrdup(option);
480 }
481 return;
482 }
483
484 /*
485 * Check whether there is a compression detail following the algorithm
486 * name.
487 */
488 sep = strchr(option, ':');
489 if (sep == NULL)
490 {
491 *algorithm = pstrdup(option);
492 *detail = NULL;
493 }
494 else
495 {
496 char *alg;
497
498 alg = palloc((sep - option) + 1);
500 alg[sep - option] = '\0';
501
502 *algorithm = alg;
503 *detail = pstrdup(sep + 1);
504 }
505}
506#endif /* FRONTEND */
#define Assert(condition)
Definition c.h:945
bool parse_tar_compress_algorithm(const char *fname, pg_compress_algorithm *algorithm)
Definition compression.c:49
static bool expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
const char * get_compress_algorithm_name(pg_compress_algorithm algorithm)
Definition compression.c:99
char * validate_compress_specification(pg_compress_specification *spec)
bool parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
Definition compression.c:79
static int expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
void parse_compress_specification(pg_compress_algorithm algorithm, char *specification, pg_compress_specification *result)
#define PG_COMPRESSION_OPTION_WORKERS
Definition compression.h:29
pg_compress_algorithm
Definition compression.h:22
@ PG_COMPRESSION_GZIP
Definition compression.h:24
@ PG_COMPRESSION_LZ4
Definition compression.h:25
@ PG_COMPRESSION_NONE
Definition compression.h:23
@ PG_COMPRESSION_ZSTD
Definition compression.h:26
#define PG_COMPRESSION_OPTION_LONG_DISTANCE
Definition compression.h:30
void parse_compress_options(const char *option, char **algorithm, char **detail)
#define _(x)
Definition elog.c:95
static struct @174 value
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
int pg_strcasecmp(const char *s1, const char *s2)
static int fb(int x)
char * psprintf(const char *fmt,...)
Definition psprintf.c:43
pg_compress_algorithm algorithm
Definition compression.h:34
const char * name