PostgreSQL Source Code  git master
compression.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * compression.c
4  *
5  * Shared code for compression methods and specifications.
6  *
7  * A compression specification specifies the parameters that should be used
8  * when performing compression with a specific algorithm. The simplest
9  * possible compression specification is an integer, which sets the
10  * compression level.
11  *
12  * Otherwise, a compression specification is a comma-separated list of items,
13  * each having the form keyword or keyword=value.
14  *
15  * Currently, the supported keywords are "level", "long", and "workers".
16  *
17  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
18  *
19  * IDENTIFICATION
20  * src/common/compression.c
21  *-------------------------------------------------------------------------
22  */
23 
24 #ifndef FRONTEND
25 #include "postgres.h"
26 #else
27 #include "postgres_fe.h"
28 #endif
29 
30 #ifdef USE_ZSTD
31 #include <zstd.h>
32 #endif
33 #ifdef HAVE_LIBZ
34 #include <zlib.h>
35 #endif
36 
37 #include "common/compression.h"
38 
39 static int expect_integer_value(char *keyword, char *value,
41 static bool expect_boolean_value(char *keyword, char *value,
43 
44 /*
45  * Look up a compression algorithm by name. Returns true and sets *algorithm
46  * if the name is recognized. Otherwise returns false.
47  */
48 bool
50 {
51  if (strcmp(name, "none") == 0)
52  *algorithm = PG_COMPRESSION_NONE;
53  else if (strcmp(name, "gzip") == 0)
54  *algorithm = PG_COMPRESSION_GZIP;
55  else if (strcmp(name, "lz4") == 0)
56  *algorithm = PG_COMPRESSION_LZ4;
57  else if (strcmp(name, "zstd") == 0)
58  *algorithm = PG_COMPRESSION_ZSTD;
59  else
60  return false;
61  return true;
62 }
63 
64 /*
65  * Get the human-readable name corresponding to a particular compression
66  * algorithm.
67  */
68 const char *
70 {
71  switch (algorithm)
72  {
74  return "none";
76  return "gzip";
77  case PG_COMPRESSION_LZ4:
78  return "lz4";
80  return "zstd";
81  /* no default, to provoke compiler warnings if values are added */
82  }
83  Assert(false);
84  return "???"; /* placate compiler */
85 }
86 
87 /*
88  * Parse a compression specification for a specified algorithm.
89  *
90  * See the file header comments for a brief description of what a compression
91  * specification is expected to look like.
92  *
93  * On return, all fields of the result object will be initialized.
94  * In particular, result->parse_error will be NULL if no errors occurred
95  * during parsing, and will otherwise contain an appropriate error message.
96  * The caller may free this error message string using pfree, if desired.
97  * Note, however, even if there's no parse error, the string might not make
98  * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
99  *
100  * The compression level is assigned by default if not directly specified
101  * by the specification.
102  *
103  * Use validate_compress_specification() to find out whether a compression
104  * specification is semantically sensible.
105  */
106 void
109 {
110  int bare_level;
111  char *bare_level_endp;
112 
113  /* Initial setup of result object. */
114  result->algorithm = algorithm;
115  result->options = 0;
116  result->parse_error = NULL;
117 
118  /*
119  * Assign a default level depending on the compression method. This may
120  * be enforced later.
121  */
122  switch (result->algorithm)
123  {
124  case PG_COMPRESSION_NONE:
125  result->level = 0;
126  break;
127  case PG_COMPRESSION_LZ4:
128 #ifdef USE_LZ4
129  result->level = 0; /* fast compression mode */
130 #else
131  result->parse_error =
132  psprintf(_("this build does not support compression with %s"),
133  "LZ4");
134 #endif
135  break;
136  case PG_COMPRESSION_ZSTD:
137 #ifdef USE_ZSTD
138  result->level = ZSTD_CLEVEL_DEFAULT;
139 #else
140  result->parse_error =
141  psprintf(_("this build does not support compression with %s"),
142  "ZSTD");
143 #endif
144  break;
145  case PG_COMPRESSION_GZIP:
146 #ifdef HAVE_LIBZ
147  result->level = Z_DEFAULT_COMPRESSION;
148 #else
149  result->parse_error =
150  psprintf(_("this build does not support compression with %s"),
151  "gzip");
152 #endif
153  break;
154  }
155 
156  /* If there is no specification, we're done already. */
157  if (specification == NULL)
158  return;
159 
160  /* As a special case, the specification can be a bare integer. */
161  bare_level = strtol(specification, &bare_level_endp, 10);
162  if (specification != bare_level_endp && *bare_level_endp == '\0')
163  {
164  result->level = bare_level;
165  return;
166  }
167 
168  /* Look for comma-separated keyword or keyword=value entries. */
169  while (1)
170  {
171  char *kwstart;
172  char *kwend;
173  char *vstart;
174  char *vend;
175  int kwlen;
176  int vlen;
177  bool has_value;
178  char *keyword;
179  char *value;
180 
181  /* Figure start, end, and length of next keyword and any value. */
182  kwstart = kwend = specification;
183  while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
184  ++kwend;
185  kwlen = kwend - kwstart;
186  if (*kwend != '=')
187  {
188  vstart = vend = NULL;
189  vlen = 0;
190  has_value = false;
191  }
192  else
193  {
194  vstart = vend = kwend + 1;
195  while (*vend != '\0' && *vend != ',')
196  ++vend;
197  vlen = vend - vstart;
198  has_value = true;
199  }
200 
201  /* Reject empty keyword. */
202  if (kwlen == 0)
203  {
204  result->parse_error =
205  pstrdup(_("found empty string where a compression option was expected"));
206  break;
207  }
208 
209  /* Extract keyword and value as separate C strings. */
210  keyword = palloc(kwlen + 1);
211  memcpy(keyword, kwstart, kwlen);
212  keyword[kwlen] = '\0';
213  if (!has_value)
214  value = NULL;
215  else
216  {
217  value = palloc(vlen + 1);
218  memcpy(value, vstart, vlen);
219  value[vlen] = '\0';
220  }
221 
222  /* Handle whatever keyword we found. */
223  if (strcmp(keyword, "level") == 0)
224  {
225  result->level = expect_integer_value(keyword, value, result);
226 
227  /*
228  * No need to set a flag in "options", there is a default level
229  * set at least thanks to the logic above.
230  */
231  }
232  else if (strcmp(keyword, "workers") == 0)
233  {
234  result->workers = expect_integer_value(keyword, value, result);
236  }
237  else if (strcmp(keyword, "long") == 0)
238  {
239  result->long_distance = expect_boolean_value(keyword, value, result);
241  }
242  else
243  result->parse_error =
244  psprintf(_("unrecognized compression option: \"%s\""), keyword);
245 
246  /* Release memory, just to be tidy. */
247  pfree(keyword);
248  if (value != NULL)
249  pfree(value);
250 
251  /*
252  * If we got an error or have reached the end of the string, stop.
253  *
254  * If there is no value, then the end of the keyword might have been
255  * the end of the string. If there is a value, then the end of the
256  * keyword cannot have been the end of the string, but the end of the
257  * value might have been.
258  */
259  if (result->parse_error != NULL ||
260  (vend == NULL ? *kwend == '\0' : *vend == '\0'))
261  break;
262 
263  /* Advance to next entry and loop around. */
264  specification = vend == NULL ? kwend + 1 : vend + 1;
265  }
266 }
267 
268 /*
269  * Parse 'value' as an integer and return the result.
270  *
271  * If parsing fails, set result->parse_error to an appropriate message
272  * and return -1.
273  */
274 static int
276 {
277  int ivalue;
278  char *ivalue_endp;
279 
280  if (value == NULL)
281  {
282  result->parse_error =
283  psprintf(_("compression option \"%s\" requires a value"),
284  keyword);
285  return -1;
286  }
287 
288  ivalue = strtol(value, &ivalue_endp, 10);
289  if (ivalue_endp == value || *ivalue_endp != '\0')
290  {
291  result->parse_error =
292  psprintf(_("value for compression option \"%s\" must be an integer"),
293  keyword);
294  return -1;
295  }
296  return ivalue;
297 }
298 
299 /*
300  * Parse 'value' as a boolean and return the result.
301  *
302  * If parsing fails, set result->parse_error to an appropriate message
303  * and return -1. The caller must check result->parse_error to determine if
304  * the call was successful.
305  *
306  * Valid values are: yes, no, on, off, 1, 0.
307  *
308  * Inspired by ParseVariableBool().
309  */
310 static bool
312 {
313  if (value == NULL)
314  return true;
315 
316  if (pg_strcasecmp(value, "yes") == 0)
317  return true;
318  if (pg_strcasecmp(value, "on") == 0)
319  return true;
320  if (pg_strcasecmp(value, "1") == 0)
321  return true;
322 
323  if (pg_strcasecmp(value, "no") == 0)
324  return false;
325  if (pg_strcasecmp(value, "off") == 0)
326  return false;
327  if (pg_strcasecmp(value, "0") == 0)
328  return false;
329 
330  result->parse_error =
331  psprintf(_("value for compression option \"%s\" must be a Boolean value"),
332  keyword);
333  return false;
334 }
335 
336 /*
337  * Returns NULL if the compression specification string was syntactically
338  * valid and semantically sensible. Otherwise, returns an error message.
339  *
340  * Does not test whether this build of PostgreSQL supports the requested
341  * compression method.
342  */
343 char *
345 {
346  int min_level = 1;
347  int max_level = 1;
348  int default_level = 0;
349 
350  /* If it didn't even parse OK, it's definitely no good. */
351  if (spec->parse_error != NULL)
352  return spec->parse_error;
353 
354  /*
355  * Check that the algorithm expects a compression level and it is within
356  * the legal range for the algorithm.
357  */
358  switch (spec->algorithm)
359  {
360  case PG_COMPRESSION_GZIP:
361  max_level = 9;
362 #ifdef HAVE_LIBZ
363  default_level = Z_DEFAULT_COMPRESSION;
364 #endif
365  break;
366  case PG_COMPRESSION_LZ4:
367  max_level = 12;
368  default_level = 0; /* fast mode */
369  break;
370  case PG_COMPRESSION_ZSTD:
371 #ifdef USE_ZSTD
372  max_level = ZSTD_maxCLevel();
373  min_level = ZSTD_minCLevel();
374  default_level = ZSTD_CLEVEL_DEFAULT;
375 #endif
376  break;
377  case PG_COMPRESSION_NONE:
378  if (spec->level != 0)
379  return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
381  break;
382  }
383 
384  if ((spec->level < min_level || spec->level > max_level) &&
385  spec->level != default_level)
386  return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
388  min_level, max_level, default_level);
389 
390  /*
391  * Of the compression algorithms that we currently support, only zstd
392  * allows parallel workers.
393  */
394  if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
395  (spec->algorithm != PG_COMPRESSION_ZSTD))
396  {
397  return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
399  }
400 
401  /*
402  * Of the compression algorithms that we currently support, only zstd
403  * supports long-distance mode.
404  */
405  if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
406  (spec->algorithm != PG_COMPRESSION_ZSTD))
407  {
408  return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
410  }
411 
412  return NULL;
413 }
414 
415 #ifdef FRONTEND
416 
417 /*
418  * Basic parsing of a value specified through a command-line option, commonly
419  * -Z/--compress.
420  *
421  * The parsing consists of a METHOD:DETAIL string fed later to
422  * parse_compress_specification(). This only extracts METHOD and DETAIL.
423  * If only an integer is found, the method is implied by the value specified.
424  */
425 void
426 parse_compress_options(const char *option, char **algorithm, char **detail)
427 {
428  char *sep;
429  char *endp;
430  long result;
431 
432  /*
433  * Check whether the compression specification consists of a bare integer.
434  *
435  * For backward-compatibility, assume "none" if the integer found is zero
436  * and "gzip" otherwise.
437  */
438  result = strtol(option, &endp, 10);
439  if (*endp == '\0')
440  {
441  if (result == 0)
442  {
443  *algorithm = pstrdup("none");
444  *detail = NULL;
445  }
446  else
447  {
448  *algorithm = pstrdup("gzip");
449  *detail = pstrdup(option);
450  }
451  return;
452  }
453 
454  /*
455  * Check whether there is a compression detail following the algorithm
456  * name.
457  */
458  sep = strchr(option, ':');
459  if (sep == NULL)
460  {
461  *algorithm = pstrdup(option);
462  *detail = NULL;
463  }
464  else
465  {
466  char *alg;
467 
468  alg = palloc((sep - option) + 1);
469  memcpy(alg, option, sep - option);
470  alg[sep - option] = '\0';
471 
472  *algorithm = alg;
473  *detail = pstrdup(sep + 1);
474  }
475 }
476 #endif /* FRONTEND */
#define Assert(condition)
Definition: c.h:812
static bool expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
Definition: compression.c:311
bool parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
Definition: compression.c:49
const char * get_compress_algorithm_name(pg_compress_algorithm algorithm)
Definition: compression.c:69
static int expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
Definition: compression.c:275
void parse_compress_specification(pg_compress_algorithm algorithm, char *specification, pg_compress_specification *result)
Definition: compression.c:107
char * validate_compress_specification(pg_compress_specification *spec)
Definition: compression.c:344
#define PG_COMPRESSION_OPTION_WORKERS
Definition: compression.h:29
pg_compress_algorithm
Definition: compression.h:22
@ PG_COMPRESSION_GZIP
Definition: compression.h:24
@ PG_COMPRESSION_LZ4
Definition: compression.h:25
@ PG_COMPRESSION_NONE
Definition: compression.h:23
@ PG_COMPRESSION_ZSTD
Definition: compression.h:26
#define PG_COMPRESSION_OPTION_LONG_DISTANCE
Definition: compression.h:30
void parse_compress_options(const char *option, char **algorithm, char **detail)
#define _(x)
Definition: elog.c:90
static struct @160 value
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
pg_compress_algorithm algorithm
Definition: compression.h:34
const char * name