PostgreSQL Source Code git master
compression.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * compression.c
4 *
5 * Shared code for compression methods and specifications.
6 *
7 * A compression specification specifies the parameters that should be used
8 * when performing compression with a specific algorithm. The simplest
9 * possible compression specification is an integer, which sets the
10 * compression level.
11 *
12 * Otherwise, a compression specification is a comma-separated list of items,
13 * each having the form keyword or keyword=value.
14 *
15 * Currently, the supported keywords are "level", "long", and "workers".
16 *
17 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
18 *
19 * IDENTIFICATION
20 * src/common/compression.c
21 *-------------------------------------------------------------------------
22 */
23
24#ifndef FRONTEND
25#include "postgres.h"
26#else
27#include "postgres_fe.h"
28#endif
29
30#ifdef USE_ZSTD
31#include <zstd.h>
32#endif
33#ifdef HAVE_LIBZ
34#include <zlib.h>
35#endif
36
37#include "common/compression.h"
38
39static int expect_integer_value(char *keyword, char *value,
41static bool expect_boolean_value(char *keyword, char *value,
43
44/*
45 * Look up a compression algorithm by name. Returns true and sets *algorithm
46 * if the name is recognized. Otherwise returns false.
47 */
48bool
50{
51 if (strcmp(name, "none") == 0)
52 *algorithm = PG_COMPRESSION_NONE;
53 else if (strcmp(name, "gzip") == 0)
54 *algorithm = PG_COMPRESSION_GZIP;
55 else if (strcmp(name, "lz4") == 0)
56 *algorithm = PG_COMPRESSION_LZ4;
57 else if (strcmp(name, "zstd") == 0)
58 *algorithm = PG_COMPRESSION_ZSTD;
59 else
60 return false;
61 return true;
62}
63
64/*
65 * Get the human-readable name corresponding to a particular compression
66 * algorithm.
67 */
68const char *
70{
71 switch (algorithm)
72 {
74 return "none";
76 return "gzip";
78 return "lz4";
80 return "zstd";
81 /* no default, to provoke compiler warnings if values are added */
82 }
83 Assert(false);
84 return "???"; /* placate compiler */
85}
86
87/*
88 * Parse a compression specification for a specified algorithm.
89 *
90 * See the file header comments for a brief description of what a compression
91 * specification is expected to look like.
92 *
93 * On return, all fields of the result object will be initialized.
94 * In particular, result->parse_error will be NULL if no errors occurred
95 * during parsing, and will otherwise contain an appropriate error message.
96 * The caller may free this error message string using pfree, if desired.
97 * Note, however, even if there's no parse error, the string might not make
98 * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
99 *
100 * The compression level is assigned by default if not directly specified
101 * by the specification.
102 *
103 * Use validate_compress_specification() to find out whether a compression
104 * specification is semantically sensible.
105 */
106void
109{
110 int bare_level;
111 char *bare_level_endp;
112
113 /* Initial setup of result object. */
114 result->algorithm = algorithm;
115 result->options = 0;
116 result->parse_error = NULL;
117
118 /*
119 * Assign a default level depending on the compression method. This may
120 * be enforced later.
121 */
122 switch (result->algorithm)
123 {
125 result->level = 0;
126 break;
128#ifdef USE_LZ4
129 result->level = 0; /* fast compression mode */
130#else
131 result->parse_error =
132 psprintf(_("this build does not support compression with %s"),
133 "LZ4");
134#endif
135 break;
137#ifdef USE_ZSTD
138 result->level = ZSTD_CLEVEL_DEFAULT;
139#else
140 result->parse_error =
141 psprintf(_("this build does not support compression with %s"),
142 "ZSTD");
143#endif
144 break;
146#ifdef HAVE_LIBZ
147 result->level = Z_DEFAULT_COMPRESSION;
148#else
149 result->parse_error =
150 psprintf(_("this build does not support compression with %s"),
151 "gzip");
152#endif
153 break;
154 }
155
156 /* If there is no specification, we're done already. */
157 if (specification == NULL)
158 return;
159
160 /* As a special case, the specification can be a bare integer. */
161 bare_level = strtol(specification, &bare_level_endp, 10);
162 if (specification != bare_level_endp && *bare_level_endp == '\0')
163 {
164 result->level = bare_level;
165 return;
166 }
167
168 /* Look for comma-separated keyword or keyword=value entries. */
169 while (1)
170 {
171 char *kwstart;
172 char *kwend;
173 char *vstart;
174 char *vend;
175 int kwlen;
176 int vlen;
177 bool has_value;
178 char *keyword;
179 char *value;
180
181 /* Figure start, end, and length of next keyword and any value. */
182 kwstart = kwend = specification;
183 while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
184 ++kwend;
185 kwlen = kwend - kwstart;
186 if (*kwend != '=')
187 {
188 vstart = vend = NULL;
189 vlen = 0;
190 has_value = false;
191 }
192 else
193 {
194 vstart = vend = kwend + 1;
195 while (*vend != '\0' && *vend != ',')
196 ++vend;
197 vlen = vend - vstart;
198 has_value = true;
199 }
200
201 /* Reject empty keyword. */
202 if (kwlen == 0)
203 {
204 result->parse_error =
205 pstrdup(_("found empty string where a compression option was expected"));
206 break;
207 }
208
209 /* Extract keyword and value as separate C strings. */
210 keyword = palloc(kwlen + 1);
211 memcpy(keyword, kwstart, kwlen);
212 keyword[kwlen] = '\0';
213 if (!has_value)
214 value = NULL;
215 else
216 {
217 value = palloc(vlen + 1);
218 memcpy(value, vstart, vlen);
219 value[vlen] = '\0';
220 }
221
222 /* Handle whatever keyword we found. */
223 if (strcmp(keyword, "level") == 0)
224 {
225 result->level = expect_integer_value(keyword, value, result);
226
227 /*
228 * No need to set a flag in "options", there is a default level
229 * set at least thanks to the logic above.
230 */
231 }
232 else if (strcmp(keyword, "workers") == 0)
233 {
234 result->workers = expect_integer_value(keyword, value, result);
236 }
237 else if (strcmp(keyword, "long") == 0)
238 {
239 result->long_distance = expect_boolean_value(keyword, value, result);
241 }
242 else
243 result->parse_error =
244 psprintf(_("unrecognized compression option: \"%s\""), keyword);
245
246 /* Release memory, just to be tidy. */
247 pfree(keyword);
248 if (value != NULL)
249 pfree(value);
250
251 /*
252 * If we got an error or have reached the end of the string, stop.
253 *
254 * If there is no value, then the end of the keyword might have been
255 * the end of the string. If there is a value, then the end of the
256 * keyword cannot have been the end of the string, but the end of the
257 * value might have been.
258 */
259 if (result->parse_error != NULL ||
260 (vend == NULL ? *kwend == '\0' : *vend == '\0'))
261 break;
262
263 /* Advance to next entry and loop around. */
264 specification = vend == NULL ? kwend + 1 : vend + 1;
265 }
266}
267
268/*
269 * Parse 'value' as an integer and return the result.
270 *
271 * If parsing fails, set result->parse_error to an appropriate message
272 * and return -1.
273 */
274static int
276{
277 int ivalue;
278 char *ivalue_endp;
279
280 if (value == NULL)
281 {
282 result->parse_error =
283 psprintf(_("compression option \"%s\" requires a value"),
284 keyword);
285 return -1;
286 }
287
288 ivalue = strtol(value, &ivalue_endp, 10);
289 if (ivalue_endp == value || *ivalue_endp != '\0')
290 {
291 result->parse_error =
292 psprintf(_("value for compression option \"%s\" must be an integer"),
293 keyword);
294 return -1;
295 }
296 return ivalue;
297}
298
299/*
300 * Parse 'value' as a boolean and return the result.
301 *
302 * If parsing fails, set result->parse_error to an appropriate message
303 * and return -1. The caller must check result->parse_error to determine if
304 * the call was successful.
305 *
306 * Valid values are: yes, no, on, off, 1, 0.
307 *
308 * Inspired by ParseVariableBool().
309 */
310static bool
312{
313 if (value == NULL)
314 return true;
315
316 if (pg_strcasecmp(value, "yes") == 0)
317 return true;
318 if (pg_strcasecmp(value, "on") == 0)
319 return true;
320 if (pg_strcasecmp(value, "1") == 0)
321 return true;
322
323 if (pg_strcasecmp(value, "no") == 0)
324 return false;
325 if (pg_strcasecmp(value, "off") == 0)
326 return false;
327 if (pg_strcasecmp(value, "0") == 0)
328 return false;
329
330 result->parse_error =
331 psprintf(_("value for compression option \"%s\" must be a Boolean value"),
332 keyword);
333 return false;
334}
335
336/*
337 * Returns NULL if the compression specification string was syntactically
338 * valid and semantically sensible. Otherwise, returns an error message.
339 *
340 * Does not test whether this build of PostgreSQL supports the requested
341 * compression method.
342 */
343char *
345{
346 int min_level = 1;
347 int max_level = 1;
348 int default_level = 0;
349
350 /* If it didn't even parse OK, it's definitely no good. */
351 if (spec->parse_error != NULL)
352 return spec->parse_error;
353
354 /*
355 * Check that the algorithm expects a compression level and it is within
356 * the legal range for the algorithm.
357 */
358 switch (spec->algorithm)
359 {
361 max_level = 9;
362#ifdef HAVE_LIBZ
363 default_level = Z_DEFAULT_COMPRESSION;
364#endif
365 break;
367 max_level = 12;
368 default_level = 0; /* fast mode */
369 break;
371#ifdef USE_ZSTD
372 max_level = ZSTD_maxCLevel();
373 min_level = ZSTD_minCLevel();
374 default_level = ZSTD_CLEVEL_DEFAULT;
375#endif
376 break;
378 if (spec->level != 0)
379 return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
381 break;
382 }
383
384 if ((spec->level < min_level || spec->level > max_level) &&
385 spec->level != default_level)
386 return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
388 min_level, max_level, default_level);
389
390 /*
391 * Of the compression algorithms that we currently support, only zstd
392 * allows parallel workers.
393 */
394 if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
396 {
397 return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
399 }
400
401 /*
402 * Of the compression algorithms that we currently support, only zstd
403 * supports long-distance mode.
404 */
405 if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
407 {
408 return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
410 }
411
412 return NULL;
413}
414
415#ifdef FRONTEND
416
417/*
418 * Basic parsing of a value specified through a command-line option, commonly
419 * -Z/--compress.
420 *
421 * The parsing consists of a METHOD:DETAIL string fed later to
422 * parse_compress_specification(). This only extracts METHOD and DETAIL.
423 * If only an integer is found, the method is implied by the value specified.
424 */
425void
426parse_compress_options(const char *option, char **algorithm, char **detail)
427{
428 char *sep;
429 char *endp;
430 long result;
431
432 /*
433 * Check whether the compression specification consists of a bare integer.
434 *
435 * For backward-compatibility, assume "none" if the integer found is zero
436 * and "gzip" otherwise.
437 */
438 result = strtol(option, &endp, 10);
439 if (*endp == '\0')
440 {
441 if (result == 0)
442 {
443 *algorithm = pstrdup("none");
444 *detail = NULL;
445 }
446 else
447 {
448 *algorithm = pstrdup("gzip");
449 *detail = pstrdup(option);
450 }
451 return;
452 }
453
454 /*
455 * Check whether there is a compression detail following the algorithm
456 * name.
457 */
458 sep = strchr(option, ':');
459 if (sep == NULL)
460 {
461 *algorithm = pstrdup(option);
462 *detail = NULL;
463 }
464 else
465 {
466 char *alg;
467
468 alg = palloc((sep - option) + 1);
469 memcpy(alg, option, sep - option);
470 alg[sep - option] = '\0';
471
472 *algorithm = alg;
473 *detail = pstrdup(sep + 1);
474 }
475}
476#endif /* FRONTEND */
static bool expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
Definition: compression.c:311
const char * get_compress_algorithm_name(pg_compress_algorithm algorithm)
Definition: compression.c:69
char * validate_compress_specification(pg_compress_specification *spec)
Definition: compression.c:344
bool parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
Definition: compression.c:49
static int expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
Definition: compression.c:275
void parse_compress_specification(pg_compress_algorithm algorithm, char *specification, pg_compress_specification *result)
Definition: compression.c:107
#define PG_COMPRESSION_OPTION_WORKERS
Definition: compression.h:29
pg_compress_algorithm
Definition: compression.h:22
@ PG_COMPRESSION_GZIP
Definition: compression.h:24
@ PG_COMPRESSION_LZ4
Definition: compression.h:25
@ PG_COMPRESSION_NONE
Definition: compression.h:23
@ PG_COMPRESSION_ZSTD
Definition: compression.h:26
#define PG_COMPRESSION_OPTION_LONG_DISTANCE
Definition: compression.h:30
void parse_compress_options(const char *option, char **algorithm, char **detail)
#define _(x)
Definition: elog.c:90
Assert(PointerIsAligned(start, uint64))
static struct @165 value
char * pstrdup(const char *in)
Definition: mcxt.c:1699
void pfree(void *pointer)
Definition: mcxt.c:1524
void * palloc(Size size)
Definition: mcxt.c:1317
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
pg_compress_algorithm algorithm
Definition: compression.h:34
const char * name