PostgreSQL Source Code  git master
toast_compression.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * toast_compression.c
4  * Functions for toast compression.
5  *
6  * Copyright (c) 2021-2022, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/access/common/toast_compression.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #ifdef USE_LZ4
17 #include <lz4.h>
18 #endif
19 
20 #include "access/detoast.h"
22 #include "common/pg_lzcompress.h"
23 #include "fmgr.h"
24 #include "utils/builtins.h"
25 
26 /* GUC */
28 
29 #define NO_LZ4_SUPPORT() \
30  ereport(ERROR, \
31  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
32  errmsg("compression method lz4 not supported"), \
33  errdetail("This functionality requires the server to be built with lz4 support.")))
34 
35 /*
36  * Compress a varlena using PGLZ.
37  *
38  * Returns the compressed varlena, or NULL if compression fails.
39  */
40 struct varlena *
42 {
43  int32 valsize,
44  len;
45  struct varlena *tmp = NULL;
46 
47  valsize = VARSIZE_ANY_EXHDR(value);
48 
49  /*
50  * No point in wasting a palloc cycle if value size is outside the allowed
51  * range for compression.
52  */
53  if (valsize < PGLZ_strategy_default->min_input_size ||
55  return NULL;
56 
57  /*
58  * Figure out the maximum possible size of the pglz output, add the bytes
59  * that will be needed for varlena overhead, and allocate that amount.
60  */
61  tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
63 
65  valsize,
66  (char *) tmp + VARHDRSZ_COMPRESSED,
67  NULL);
68  if (len < 0)
69  {
70  pfree(tmp);
71  return NULL;
72  }
73 
75 
76  return tmp;
77 }
78 
79 /*
80  * Decompress a varlena that was compressed using PGLZ.
81  */
82 struct varlena *
84 {
85  struct varlena *result;
86  int32 rawsize;
87 
88  /* allocate memory for the uncompressed data */
90 
91  /* decompress the data */
92  rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
94  VARDATA(result),
96  if (rawsize < 0)
97  ereport(ERROR,
99  errmsg_internal("compressed pglz data is corrupt")));
100 
101  SET_VARSIZE(result, rawsize + VARHDRSZ);
102 
103  return result;
104 }
105 
106 /*
107  * Decompress part of a varlena that was compressed using PGLZ.
108  */
109 struct varlena *
111  int32 slicelength)
112 {
113  struct varlena *result;
114  int32 rawsize;
115 
116  /* allocate memory for the uncompressed data */
117  result = (struct varlena *) palloc(slicelength + VARHDRSZ);
118 
119  /* decompress the data */
120  rawsize = pglz_decompress((char *) value + VARHDRSZ_COMPRESSED,
122  VARDATA(result),
123  slicelength, false);
124  if (rawsize < 0)
125  ereport(ERROR,
127  errmsg_internal("compressed pglz data is corrupt")));
128 
129  SET_VARSIZE(result, rawsize + VARHDRSZ);
130 
131  return result;
132 }
133 
134 /*
135  * Compress a varlena using LZ4.
136  *
137  * Returns the compressed varlena, or NULL if compression fails.
138  */
139 struct varlena *
141 {
142 #ifndef USE_LZ4
143  NO_LZ4_SUPPORT();
144  return NULL; /* keep compiler quiet */
145 #else
146  int32 valsize;
147  int32 len;
148  int32 max_size;
149  struct varlena *tmp = NULL;
150 
151  valsize = VARSIZE_ANY_EXHDR(value);
152 
153  /*
154  * Figure out the maximum possible size of the LZ4 output, add the bytes
155  * that will be needed for varlena overhead, and allocate that amount.
156  */
157  max_size = LZ4_compressBound(valsize);
158  tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED);
159 
160  len = LZ4_compress_default(VARDATA_ANY(value),
161  (char *) tmp + VARHDRSZ_COMPRESSED,
162  valsize, max_size);
163  if (len <= 0)
164  elog(ERROR, "lz4 compression failed");
165 
166  /* data is incompressible so just free the memory and return NULL */
167  if (len > valsize)
168  {
169  pfree(tmp);
170  return NULL;
171  }
172 
174 
175  return tmp;
176 #endif
177 }
178 
179 /*
180  * Decompress a varlena that was compressed using LZ4.
181  */
182 struct varlena *
184 {
185 #ifndef USE_LZ4
186  NO_LZ4_SUPPORT();
187  return NULL; /* keep compiler quiet */
188 #else
189  int32 rawsize;
190  struct varlena *result;
191 
192  /* allocate memory for the uncompressed data */
194 
195  /* decompress the data */
196  rawsize = LZ4_decompress_safe((char *) value + VARHDRSZ_COMPRESSED,
197  VARDATA(result),
200  if (rawsize < 0)
201  ereport(ERROR,
203  errmsg_internal("compressed lz4 data is corrupt")));
204 
205 
206  SET_VARSIZE(result, rawsize + VARHDRSZ);
207 
208  return result;
209 #endif
210 }
211 
212 /*
213  * Decompress part of a varlena that was compressed using LZ4.
214  */
215 struct varlena *
216 lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
217 {
218 #ifndef USE_LZ4
219  NO_LZ4_SUPPORT();
220  return NULL; /* keep compiler quiet */
221 #else
222  int32 rawsize;
223  struct varlena *result;
224 
225  /* slice decompression not supported prior to 1.8.3 */
226  if (LZ4_versionNumber() < 10803)
227  return lz4_decompress_datum(value);
228 
229  /* allocate memory for the uncompressed data */
230  result = (struct varlena *) palloc(slicelength + VARHDRSZ);
231 
232  /* decompress the data */
233  rawsize = LZ4_decompress_safe_partial((char *) value + VARHDRSZ_COMPRESSED,
234  VARDATA(result),
236  slicelength,
237  slicelength);
238  if (rawsize < 0)
239  ereport(ERROR,
241  errmsg_internal("compressed lz4 data is corrupt")));
242 
243  SET_VARSIZE(result, rawsize + VARHDRSZ);
244 
245  return result;
246 #endif
247 }
248 
249 /*
250  * Extract compression ID from a varlena.
251  *
252  * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed.
253  */
256 {
258 
259  /*
260  * If it is stored externally then fetch the compression method id from
261  * the external toast pointer. If compressed inline, fetch it from the
262  * toast compression header.
263  */
264  if (VARATT_IS_EXTERNAL_ONDISK(attr))
265  {
266  struct varatt_external toast_pointer;
267 
268  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
269 
270  if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
271  cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
272  }
273  else if (VARATT_IS_COMPRESSED(attr))
275 
276  return cmid;
277 }
278 
279 /*
280  * CompressionNameToMethod - Get compression method from compression name
281  *
282  * Search in the available built-in methods. If the compression not found
283  * in the built-in methods then return InvalidCompressionMethod.
284  */
285 char
286 CompressionNameToMethod(const char *compression)
287 {
288  if (strcmp(compression, "pglz") == 0)
289  return TOAST_PGLZ_COMPRESSION;
290  else if (strcmp(compression, "lz4") == 0)
291  {
292 #ifndef USE_LZ4
293  NO_LZ4_SUPPORT();
294 #endif
295  return TOAST_LZ4_COMPRESSION;
296  }
297 
299 }
300 
301 /*
302  * GetCompressionMethodName - Get compression method name
303  */
304 const char *
306 {
307  switch (method)
308  {
310  return "pglz";
312  return "lz4";
313  default:
314  elog(ERROR, "invalid compression method %c", method);
315  return NULL; /* keep compiler quiet */
316  }
317 }
signed int int32
Definition: c.h:430
#define VARHDRSZ
Definition: c.h:628
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition: detoast.h:22
int errmsg_internal(const char *fmt,...)
Definition: elog.c:993
int errcode(int sqlerrcode)
Definition: elog.c:695
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
static struct @143 value
void pfree(void *pointer)
Definition: mcxt.c:1306
void * palloc(Size size)
Definition: mcxt.c:1199
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
const void size_t len
const PGLZ_Strategy *const PGLZ_strategy_default
int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete)
int32 pglz_compress(const char *source, int32 slen, char *dest, const PGLZ_Strategy *strategy)
#define PGLZ_MAX_OUTPUT(_dlen)
Definition: pg_lzcompress.h:21
#define VARATT_IS_EXTERNAL_ONDISK(PTR)
Definition: postgres.h:328
#define SET_VARSIZE_COMPRESSED(PTR, len)
Definition: postgres.h:345
#define VARDATA(PTR)
Definition: postgres.h:316
#define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer)
Definition: postgres.h:374
#define VARDATA_COMPRESSED_GET_EXTSIZE(PTR)
Definition: postgres.h:366
#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)
Definition: postgres.h:392
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:326
#define VARDATA_ANY(PTR)
Definition: postgres.h:362
#define VARHDRSZ_COMPRESSED
Definition: postgres.h:292
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:343
#define VARSIZE(PTR)
Definition: postgres.h:317
#define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR)
Definition: postgres.h:368
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:355
int32 max_input_size
Definition: pg_lzcompress.h:60
Definition: c.h:623
struct varlena * lz4_decompress_datum(const struct varlena *value)
#define NO_LZ4_SUPPORT()
struct varlena * lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
int default_toast_compression
struct varlena * pglz_decompress_datum(const struct varlena *value)
struct varlena * pglz_decompress_datum_slice(const struct varlena *value, int32 slicelength)
const char * GetCompressionMethodName(char method)
struct varlena * lz4_compress_datum(const struct varlena *value)
struct varlena * pglz_compress_datum(const struct varlena *value)
char CompressionNameToMethod(const char *compression)
ToastCompressionId toast_get_compression_id(struct varlena *attr)
ToastCompressionId
@ TOAST_INVALID_COMPRESSION_ID
#define TOAST_PGLZ_COMPRESSION
#define InvalidCompressionMethod
#define TOAST_LZ4_COMPRESSION