PostgreSQL Source Code  git master
detoast.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * detoast.c
4  * Retrieve compressed or external variable size attributes.
5  *
6  * Copyright (c) 2000-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/access/common/detoast.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "access/detoast.h"
17 #include "access/table.h"
18 #include "access/tableam.h"
19 #include "access/toast_internals.h"
20 #include "common/pg_lzcompress.h"
21 #include "utils/expandeddatum.h"
22 #include "utils/rel.h"
23 
24 static struct varlena *toast_fetch_datum(struct varlena *attr);
25 static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
26  int32 sliceoffset,
27  int32 slicelength);
28 static struct varlena *toast_decompress_datum(struct varlena *attr);
29 static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
30 
31 /* ----------
32  * detoast_external_attr -
33  *
34  * Public entry point to get back a toasted value from
35  * external source (possibly still in compressed format).
36  *
37  * This will return a datum that contains all the data internally, ie, not
38  * relying on external storage or memory, but it can still be compressed or
39  * have a short header. Note some callers assume that if the input is an
40  * EXTERNAL datum, the result will be a pfree'able chunk.
41  * ----------
42  */
43 struct varlena *
45 {
46  struct varlena *result;
47 
48  if (VARATT_IS_EXTERNAL_ONDISK(attr))
49  {
50  /*
51  * This is an external stored plain value
52  */
53  result = toast_fetch_datum(attr);
54  }
55  else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
56  {
57  /*
58  * This is an indirect pointer --- dereference it
59  */
60  struct varatt_indirect redirect;
61 
62  VARATT_EXTERNAL_GET_POINTER(redirect, attr);
63  attr = (struct varlena *) redirect.pointer;
64 
65  /* nested indirect Datums aren't allowed */
67 
68  /* recurse if value is still external in some other way */
69  if (VARATT_IS_EXTERNAL(attr))
70  return detoast_external_attr(attr);
71 
72  /*
73  * Copy into the caller's memory context, in case caller tries to
74  * pfree the result.
75  */
76  result = (struct varlena *) palloc(VARSIZE_ANY(attr));
77  memcpy(result, attr, VARSIZE_ANY(attr));
78  }
79  else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
80  {
81  /*
82  * This is an expanded-object pointer --- get flat format
83  */
85  Size resultsize;
86 
87  eoh = DatumGetEOHP(PointerGetDatum(attr));
88  resultsize = EOH_get_flat_size(eoh);
89  result = (struct varlena *) palloc(resultsize);
90  EOH_flatten_into(eoh, (void *) result, resultsize);
91  }
92  else
93  {
94  /*
95  * This is a plain value inside of the main tuple - why am I called?
96  */
97  result = attr;
98  }
99 
100  return result;
101 }
102 
103 
104 /* ----------
105  * detoast_attr -
106  *
107  * Public entry point to get back a toasted value from compression
108  * or external storage. The result is always non-extended varlena form.
109  *
110  * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
111  * datum, the result will be a pfree'able chunk.
112  * ----------
113  */
114 struct varlena *
115 detoast_attr(struct varlena *attr)
116 {
117  if (VARATT_IS_EXTERNAL_ONDISK(attr))
118  {
119  /*
120  * This is an externally stored datum --- fetch it back from there
121  */
122  attr = toast_fetch_datum(attr);
123  /* If it's compressed, decompress it */
124  if (VARATT_IS_COMPRESSED(attr))
125  {
126  struct varlena *tmp = attr;
127 
128  attr = toast_decompress_datum(tmp);
129  pfree(tmp);
130  }
131  }
132  else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
133  {
134  /*
135  * This is an indirect pointer --- dereference it
136  */
137  struct varatt_indirect redirect;
138 
139  VARATT_EXTERNAL_GET_POINTER(redirect, attr);
140  attr = (struct varlena *) redirect.pointer;
141 
142  /* nested indirect Datums aren't allowed */
144 
145  /* recurse in case value is still extended in some other way */
146  attr = detoast_attr(attr);
147 
148  /* if it isn't, we'd better copy it */
149  if (attr == (struct varlena *) redirect.pointer)
150  {
151  struct varlena *result;
152 
153  result = (struct varlena *) palloc(VARSIZE_ANY(attr));
154  memcpy(result, attr, VARSIZE_ANY(attr));
155  attr = result;
156  }
157  }
158  else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
159  {
160  /*
161  * This is an expanded-object pointer --- get flat format
162  */
163  attr = detoast_external_attr(attr);
164  /* flatteners are not allowed to produce compressed/short output */
165  Assert(!VARATT_IS_EXTENDED(attr));
166  }
167  else if (VARATT_IS_COMPRESSED(attr))
168  {
169  /*
170  * This is a compressed value inside of the main tuple
171  */
172  attr = toast_decompress_datum(attr);
173  }
174  else if (VARATT_IS_SHORT(attr))
175  {
176  /*
177  * This is a short-header varlena --- convert to 4-byte header format
178  */
179  Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
180  Size new_size = data_size + VARHDRSZ;
181  struct varlena *new_attr;
182 
183  new_attr = (struct varlena *) palloc(new_size);
184  SET_VARSIZE(new_attr, new_size);
185  memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
186  attr = new_attr;
187  }
188 
189  return attr;
190 }
191 
192 
193 /* ----------
194  * detoast_attr_slice -
195  *
196  * Public entry point to get back part of a toasted value
197  * from compression or external storage.
198  *
199  * Note: When slicelength is negative, return suffix of the value.
200  * ----------
201  */
202 struct varlena *
204  int32 sliceoffset, int32 slicelength)
205 {
206  struct varlena *preslice;
207  struct varlena *result;
208  char *attrdata;
209  int32 attrsize;
210 
211  if (VARATT_IS_EXTERNAL_ONDISK(attr))
212  {
213  struct varatt_external toast_pointer;
214 
215  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
216 
217  /* fast path for non-compressed external datums */
218  if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
219  return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
220 
221  /*
222  * For compressed values, we need to fetch enough slices to decompress
223  * at least the requested part (when a prefix is requested).
224  * Otherwise, just fetch all slices.
225  */
226  if (slicelength > 0 && sliceoffset >= 0)
227  {
228  int32 max_size;
229 
230  /*
231  * Determine maximum amount of compressed data needed for a prefix
232  * of a given length (after decompression).
233  */
234  max_size = pglz_maximum_compressed_size(sliceoffset + slicelength,
235  toast_pointer.va_extsize);
236 
237  /*
238  * Fetch enough compressed slices (compressed marker will get set
239  * automatically).
240  */
241  preslice = toast_fetch_datum_slice(attr, 0, max_size);
242  }
243  else
244  preslice = toast_fetch_datum(attr);
245  }
246  else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
247  {
248  struct varatt_indirect redirect;
249 
250  VARATT_EXTERNAL_GET_POINTER(redirect, attr);
251 
252  /* nested indirect Datums aren't allowed */
254 
255  return detoast_attr_slice(redirect.pointer,
256  sliceoffset, slicelength);
257  }
258  else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
259  {
260  /* pass it off to detoast_external_attr to flatten */
261  preslice = detoast_external_attr(attr);
262  }
263  else
264  preslice = attr;
265 
266  Assert(!VARATT_IS_EXTERNAL(preslice));
267 
268  if (VARATT_IS_COMPRESSED(preslice))
269  {
270  struct varlena *tmp = preslice;
271 
272  /* Decompress enough to encompass the slice and the offset */
273  if (slicelength > 0 && sliceoffset >= 0)
274  preslice = toast_decompress_datum_slice(tmp, slicelength + sliceoffset);
275  else
276  preslice = toast_decompress_datum(tmp);
277 
278  if (tmp != attr)
279  pfree(tmp);
280  }
281 
282  if (VARATT_IS_SHORT(preslice))
283  {
284  attrdata = VARDATA_SHORT(preslice);
285  attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
286  }
287  else
288  {
289  attrdata = VARDATA(preslice);
290  attrsize = VARSIZE(preslice) - VARHDRSZ;
291  }
292 
293  /* slicing of datum for compressed cases and plain value */
294 
295  if (sliceoffset >= attrsize)
296  {
297  sliceoffset = 0;
298  slicelength = 0;
299  }
300 
301  if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
302  slicelength = attrsize - sliceoffset;
303 
304  result = (struct varlena *) palloc(slicelength + VARHDRSZ);
305  SET_VARSIZE(result, slicelength + VARHDRSZ);
306 
307  memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
308 
309  if (preslice != attr)
310  pfree(preslice);
311 
312  return result;
313 }
314 
315 /* ----------
316  * toast_fetch_datum -
317  *
318  * Reconstruct an in memory Datum from the chunks saved
319  * in the toast relation
320  * ----------
321  */
322 static struct varlena *
324 {
325  Relation toastrel;
326  struct varlena *result;
327  struct varatt_external toast_pointer;
328  int32 attrsize;
329 
330  if (!VARATT_IS_EXTERNAL_ONDISK(attr))
331  elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
332 
333  /* Must copy to access aligned fields */
334  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
335 
336  attrsize = toast_pointer.va_extsize;
337 
338  result = (struct varlena *) palloc(attrsize + VARHDRSZ);
339 
340  if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
341  SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
342  else
343  SET_VARSIZE(result, attrsize + VARHDRSZ);
344 
345  if (attrsize == 0)
346  return result; /* Probably shouldn't happen, but just in
347  * case. */
348 
349  /*
350  * Open the toast relation and its indexes
351  */
352  toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
353 
354  /* Fetch all chunks */
355  table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
356  attrsize, 0, attrsize, result);
357 
358  /* Close toast table */
359  table_close(toastrel, AccessShareLock);
360 
361  return result;
362 }
363 
364 /* ----------
365  * toast_fetch_datum_slice -
366  *
367  * Reconstruct a segment of a Datum from the chunks saved
368  * in the toast relation
369  *
370  * Note that this function supports non-compressed external datums
371  * and compressed external datums (in which case the requested slice
372  * has to be a prefix, i.e. sliceoffset has to be 0).
373  * ----------
374  */
375 static struct varlena *
376 toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
377  int32 slicelength)
378 {
379  Relation toastrel;
380  struct varlena *result;
381  struct varatt_external toast_pointer;
382  int32 attrsize;
383 
384  if (!VARATT_IS_EXTERNAL_ONDISK(attr))
385  elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
386 
387  /* Must copy to access aligned fields */
388  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
389 
390  /*
391  * It's nonsense to fetch slices of a compressed datum unless when it's a
392  * prefix -- this isn't lo_* we can't return a compressed datum which is
393  * meaningful to toast later.
394  */
395  Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
396 
397  attrsize = toast_pointer.va_extsize;
398 
399  if (sliceoffset >= attrsize)
400  {
401  sliceoffset = 0;
402  slicelength = 0;
403  }
404 
405  /*
406  * When fetching a prefix of a compressed external datum, account for the
407  * rawsize tracking amount of raw data, which is stored at the beginning
408  * as an int32 value).
409  */
410  if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
411  slicelength = slicelength + sizeof(int32);
412 
413  if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
414  slicelength = attrsize - sliceoffset;
415 
416  result = (struct varlena *) palloc(slicelength + VARHDRSZ);
417 
418  if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
419  SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
420  else
421  SET_VARSIZE(result, slicelength + VARHDRSZ);
422 
423  if (slicelength == 0)
424  return result; /* Can save a lot of work at this point! */
425 
426  /* Open the toast relation */
427  toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
428 
429  /* Fetch all chunks */
430  table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
431  attrsize, sliceoffset, slicelength,
432  result);
433 
434  /* Close toast table */
435  table_close(toastrel, AccessShareLock);
436 
437  return result;
438 }
439 
440 /* ----------
441  * toast_decompress_datum -
442  *
443  * Decompress a compressed version of a varlena datum
444  */
445 static struct varlena *
447 {
448  struct varlena *result;
449 
451 
452  result = (struct varlena *)
455 
457  TOAST_COMPRESS_SIZE(attr),
458  VARDATA(result),
459  TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
460  elog(ERROR, "compressed data is corrupted");
461 
462  return result;
463 }
464 
465 
466 /* ----------
467  * toast_decompress_datum_slice -
468  *
469  * Decompress the front of a compressed version of a varlena datum.
470  * offset handling happens in detoast_attr_slice.
471  * Here we just decompress a slice from the front.
472  */
473 static struct varlena *
474 toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
475 {
476  struct varlena *result;
477  int32 rawsize;
478 
480 
481  result = (struct varlena *) palloc(slicelength + VARHDRSZ);
482 
483  rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
485  VARDATA(result),
486  slicelength, false);
487  if (rawsize < 0)
488  elog(ERROR, "compressed data is corrupted");
489 
490  SET_VARSIZE(result, rawsize + VARHDRSZ);
491  return result;
492 }
493 
494 /* ----------
495  * toast_raw_datum_size -
496  *
497  * Return the raw (detoasted) size of a varlena datum
498  * (including the VARHDRSZ header)
499  * ----------
500  */
501 Size
503 {
504  struct varlena *attr = (struct varlena *) DatumGetPointer(value);
505  Size result;
506 
507  if (VARATT_IS_EXTERNAL_ONDISK(attr))
508  {
509  /* va_rawsize is the size of the original datum -- including header */
510  struct varatt_external toast_pointer;
511 
512  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
513  result = toast_pointer.va_rawsize;
514  }
515  else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
516  {
517  struct varatt_indirect toast_pointer;
518 
519  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
520 
521  /* nested indirect Datums aren't allowed */
522  Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
523 
524  return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
525  }
526  else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
527  {
528  result = EOH_get_flat_size(DatumGetEOHP(value));
529  }
530  else if (VARATT_IS_COMPRESSED(attr))
531  {
532  /* here, va_rawsize is just the payload size */
533  result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
534  }
535  else if (VARATT_IS_SHORT(attr))
536  {
537  /*
538  * we have to normalize the header length to VARHDRSZ or else the
539  * callers of this function will be confused.
540  */
541  result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
542  }
543  else
544  {
545  /* plain untoasted datum */
546  result = VARSIZE(attr);
547  }
548  return result;
549 }
550 
551 /* ----------
552  * toast_datum_size
553  *
554  * Return the physical storage size (possibly compressed) of a varlena datum
555  * ----------
556  */
557 Size
559 {
560  struct varlena *attr = (struct varlena *) DatumGetPointer(value);
561  Size result;
562 
563  if (VARATT_IS_EXTERNAL_ONDISK(attr))
564  {
565  /*
566  * Attribute is stored externally - return the extsize whether
567  * compressed or not. We do not count the size of the toast pointer
568  * ... should we?
569  */
570  struct varatt_external toast_pointer;
571 
572  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
573  result = toast_pointer.va_extsize;
574  }
575  else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
576  {
577  struct varatt_indirect toast_pointer;
578 
579  VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
580 
581  /* nested indirect Datums aren't allowed */
583 
584  return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
585  }
586  else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
587  {
588  result = EOH_get_flat_size(DatumGetEOHP(value));
589  }
590  else if (VARATT_IS_SHORT(attr))
591  {
592  result = VARSIZE_SHORT(attr);
593  }
594  else
595  {
596  /*
597  * Attribute is stored inline either compressed or not, just calculate
598  * the size of the datum in either case.
599  */
600  result = VARSIZE(attr);
601  }
602  return result;
603 }
#define VARATT_IS_EXTERNAL_ONDISK(PTR)
Definition: postgres.h:314
#define VARATT_IS_COMPRESSED(PTR)
Definition: postgres.h:312
int32 pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size)
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
#define VARDATA(PTR)
Definition: postgres.h:302
static struct @142 value
#define VARATT_IS_EXTERNAL_EXPANDED(PTR)
Definition: postgres.h:322
#define VARHDRSZ_SHORT
Definition: postgres.h:268
#define VARSIZE(PTR)
Definition: postgres.h:303
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr)
Definition: detoast.h:32
#define PointerGetDatum(X)
Definition: postgres.h:556
#define VARHDRSZ
Definition: c.h:568
struct varlena * detoast_external_attr(struct varlena *attr)
Definition: detoast.c:44
static void table_relation_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: tableam.h:1693
#define AccessShareLock
Definition: lockdefs.h:36
Oid va_toastrelid
Definition: postgres.h:72
int32 va_rawsize
Definition: postgres.h:69
struct varlena * detoast_attr_slice(struct varlena *attr, int32 sliceoffset, int32 slicelength)
Definition: detoast.c:203
int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete)
signed int int32
Definition: c.h:362
Size toast_datum_size(Datum value)
Definition: detoast.c:558
#define VARATT_IS_EXTERNAL(PTR)
Definition: postgres.h:313
struct varlena * detoast_attr(struct varlena *attr)
Definition: detoast.c:115
void pfree(void *pointer)
Definition: mcxt.c:1057
#define VARATT_IS_EXTERNAL_INDIRECT(PTR)
Definition: postgres.h:316
#define ERROR
Definition: elog.h:43
#define TOAST_COMPRESS_RAWDATA(ptr)
#define VARATT_IS_SHORT(PTR)
Definition: postgres.h:326
Size EOH_get_flat_size(ExpandedObjectHeader *eohptr)
Definition: expandeddatum.c:75
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:502
#define VARSIZE_SHORT(PTR)
Definition: postgres.h:305
ExpandedObjectHeader * DatumGetEOHP(Datum d)
Definition: expandeddatum.c:29
#define VARRAWSIZE_4B_C(PTR)
Definition: postgres.h:283
#define TOAST_COMPRESS_SIZE(ptr)
uintptr_t Datum
Definition: postgres.h:367
void EOH_flatten_into(ExpandedObjectHeader *eohptr, void *result, Size allocated_size)
Definition: expandeddatum.c:81
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
static struct varlena * toast_fetch_datum(struct varlena *attr)
Definition: detoast.c:323
#define Assert(condition)
Definition: c.h:745
static struct varlena * toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 slicelength)
Definition: detoast.c:376
int32 va_extsize
Definition: postgres.h:70
size_t Size
Definition: c.h:473
#define TOAST_COMPRESS_HDRSZ
struct varlena * pointer
Definition: postgres.h:86
#define VARATT_IS_EXTENDED(PTR)
Definition: postgres.h:327
#define DatumGetPointer(X)
Definition: postgres.h:549
#define VARDATA_SHORT(PTR)
Definition: postgres.h:306
void * palloc(Size size)
Definition: mcxt.c:950
#define elog(elevel,...)
Definition: elog.h:214
#define SET_VARSIZE_COMPRESSED(PTR, len)
Definition: postgres.h:331
#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)
Definition: detoast.h:22
Definition: c.h:562
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
static struct varlena * toast_decompress_datum(struct varlena *attr)
Definition: detoast.c:446
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
static struct varlena * toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
Definition: detoast.c:474
#define TOAST_COMPRESS_RAWSIZE(ptr)