PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
inv_api.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * inv_api.c
4  * routines for manipulating inversion fs large objects. This file
5  * contains the user-level large object application interface routines.
6  *
7  *
8  * Note: we access pg_largeobject.data using its C struct declaration.
9  * This is safe because it immediately follows pageno which is an int4 field,
10  * and therefore the data field will always be 4-byte aligned, even if it
11  * is in the short 1-byte-header format. We have to detoast it since it's
12  * quite likely to be in compressed or short format. We also need to check
13  * for NULLs, since initdb will mark loid and pageno but not data as NOT NULL.
14  *
15  * Note: many of these routines leak memory in CurrentMemoryContext, as indeed
16  * does most of the backend code. We expect that CurrentMemoryContext will
17  * be a short-lived context. Data that must persist across function calls
18  * is kept either in CacheMemoryContext (the Relation structs) or in the
19  * memory context given to inv_open (for LargeObjectDesc structs).
20  *
21  *
22  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
23  * Portions Copyright (c) 1994, Regents of the University of California
24  *
25  *
26  * IDENTIFICATION
27  * src/backend/storage/large_object/inv_api.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 #include "postgres.h"
32 
33 #include <limits.h>
34 
35 #include "access/genam.h"
36 #include "access/heapam.h"
37 #include "access/sysattr.h"
38 #include "access/tuptoaster.h"
39 #include "access/xact.h"
40 #include "catalog/dependency.h"
41 #include "catalog/indexing.h"
42 #include "catalog/objectaccess.h"
43 #include "catalog/pg_largeobject.h"
45 #include "libpq/libpq-fs.h"
46 #include "miscadmin.h"
47 #include "storage/large_object.h"
48 #include "utils/fmgroids.h"
49 #include "utils/rel.h"
50 #include "utils/snapmgr.h"
51 #include "utils/tqual.h"
52 
53 
54 /*
55  * All accesses to pg_largeobject and its index make use of a single Relation
56  * reference, so that we only need to open pg_relation once per transaction.
57  * To avoid problems when the first such reference occurs inside a
58  * subtransaction, we execute a slightly klugy maneuver to assign ownership of
59  * the Relation reference to TopTransactionResourceOwner.
60  */
63 
64 
65 /*
66  * Open pg_largeobject and its index, if not already done in current xact
67  */
68 static void
70 {
71  ResourceOwner currentOwner;
72 
73  if (lo_heap_r && lo_index_r)
74  return; /* already open in current xact */
75 
76  /* Arrange for the top xact to own these relation references */
77  currentOwner = CurrentResourceOwner;
78  PG_TRY();
79  {
81 
82  /* Use RowExclusiveLock since we might either read or write */
83  if (lo_heap_r == NULL)
85  if (lo_index_r == NULL)
87  }
88  PG_CATCH();
89  {
90  /* Ensure CurrentResourceOwner is restored on error */
91  CurrentResourceOwner = currentOwner;
92  PG_RE_THROW();
93  }
94  PG_END_TRY();
95  CurrentResourceOwner = currentOwner;
96 }
97 
98 /*
99  * Clean up at main transaction end
100  */
101 void
102 close_lo_relation(bool isCommit)
103 {
104  if (lo_heap_r || lo_index_r)
105  {
106  /*
107  * Only bother to close if committing; else abort cleanup will handle
108  * it
109  */
110  if (isCommit)
111  {
112  ResourceOwner currentOwner;
113 
114  currentOwner = CurrentResourceOwner;
115  PG_TRY();
116  {
118 
119  if (lo_index_r)
120  index_close(lo_index_r, NoLock);
121  if (lo_heap_r)
122  heap_close(lo_heap_r, NoLock);
123  }
124  PG_CATCH();
125  {
126  /* Ensure CurrentResourceOwner is restored on error */
127  CurrentResourceOwner = currentOwner;
128  PG_RE_THROW();
129  }
130  PG_END_TRY();
131  CurrentResourceOwner = currentOwner;
132  }
133  lo_heap_r = NULL;
134  lo_index_r = NULL;
135  }
136 }
137 
138 
139 /*
140  * Same as pg_largeobject.c's LargeObjectExists(), except snapshot to
141  * read with can be specified.
142  */
143 static bool
145 {
146  Relation pg_lo_meta;
147  ScanKeyData skey[1];
148  SysScanDesc sd;
149  HeapTuple tuple;
150  bool retval = false;
151 
152  ScanKeyInit(&skey[0],
154  BTEqualStrategyNumber, F_OIDEQ,
155  ObjectIdGetDatum(loid));
156 
159 
160  sd = systable_beginscan(pg_lo_meta,
162  snapshot, 1, skey);
163 
164  tuple = systable_getnext(sd);
165  if (HeapTupleIsValid(tuple))
166  retval = true;
167 
168  systable_endscan(sd);
169 
170  heap_close(pg_lo_meta, AccessShareLock);
171 
172  return retval;
173 }
174 
175 
176 /*
177  * Extract data field from a pg_largeobject tuple, detoasting if needed
178  * and verifying that the length is sane. Returns data pointer (a bytea *),
179  * data length, and an indication of whether to pfree the data pointer.
180  */
181 static void
183  bytea **pdatafield,
184  int *plen,
185  bool *pfreeit)
186 {
187  bytea *datafield;
188  int len;
189  bool freeit;
190 
191  datafield = &(tuple->data); /* see note at top of file */
192  freeit = false;
193  if (VARATT_IS_EXTENDED(datafield))
194  {
195  datafield = (bytea *)
196  heap_tuple_untoast_attr((struct varlena *) datafield);
197  freeit = true;
198  }
199  len = VARSIZE(datafield) - VARHDRSZ;
200  if (len < 0 || len > LOBLKSIZE)
201  ereport(ERROR,
202  (errcode(ERRCODE_DATA_CORRUPTED),
203  errmsg("pg_largeobject entry for OID %u, page %d has invalid data field size %d",
204  tuple->loid, tuple->pageno, len)));
205  *pdatafield = datafield;
206  *plen = len;
207  *pfreeit = freeit;
208 }
209 
210 
211 /*
212  * inv_create -- create a new large object
213  *
214  * Arguments:
215  * lobjId - OID to use for new large object, or InvalidOid to pick one
216  *
217  * Returns:
218  * OID of new object
219  *
220  * If lobjId is not InvalidOid, then an error occurs if the OID is already
221  * in use.
222  */
223 Oid
225 {
226  Oid lobjId_new;
227 
228  /*
229  * Create a new largeobject with empty data pages
230  */
231  lobjId_new = LargeObjectCreate(lobjId);
232 
233  /*
234  * dependency on the owner of largeobject
235  *
236  * The reason why we use LargeObjectRelationId instead of
237  * LargeObjectMetadataRelationId here is to provide backward compatibility
238  * to the applications which utilize a knowledge about internal layout of
239  * system catalogs. OID of pg_largeobject_metadata and loid of
240  * pg_largeobject are same value, so there are no actual differences here.
241  */
243  lobjId_new, GetUserId());
244 
245  /* Post creation hook for new large object */
247 
248  /*
249  * Advance command counter to make new tuple visible to later operations.
250  */
252 
253  return lobjId_new;
254 }
255 
256 /*
257  * inv_open -- access an existing large object.
258  *
259  * Returns:
260  * Large object descriptor, appropriately filled in. The descriptor
261  * and subsidiary data are allocated in the specified memory context,
262  * which must be suitably long-lived for the caller's purposes.
263  */
265 inv_open(Oid lobjId, int flags, MemoryContext mcxt)
266 {
267  LargeObjectDesc *retval;
268  Snapshot snapshot = NULL;
269  int descflags = 0;
270 
271  if (flags & INV_WRITE)
272  {
273  snapshot = NULL; /* instantaneous MVCC snapshot */
274  descflags = IFS_WRLOCK | IFS_RDLOCK;
275  }
276  else if (flags & INV_READ)
277  {
278  snapshot = GetActiveSnapshot();
279  descflags = IFS_RDLOCK;
280  }
281  else
282  ereport(ERROR,
283  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
284  errmsg("invalid flags for opening a large object: %d",
285  flags)));
286 
287  /* Can't use LargeObjectExists here because we need to specify snapshot */
288  if (!myLargeObjectExists(lobjId, snapshot))
289  ereport(ERROR,
290  (errcode(ERRCODE_UNDEFINED_OBJECT),
291  errmsg("large object %u does not exist", lobjId)));
292 
293  /*
294  * We must register the snapshot in TopTransaction's resowner, because it
295  * must stay alive until the LO is closed rather than until the current
296  * portal shuts down. Do this after checking that the LO exists, to avoid
297  * leaking the snapshot if an error is thrown.
298  */
299  if (snapshot)
300  snapshot = RegisterSnapshotOnOwner(snapshot,
302 
303  /* All set, create a descriptor */
304  retval = (LargeObjectDesc *) MemoryContextAlloc(mcxt,
305  sizeof(LargeObjectDesc));
306  retval->id = lobjId;
307  retval->subid = GetCurrentSubTransactionId();
308  retval->offset = 0;
309  retval->snapshot = snapshot;
310  retval->flags = descflags;
311 
312  return retval;
313 }
314 
315 /*
316  * Closes a large object descriptor previously made by inv_open(), and
317  * releases the long-term memory used by it.
318  */
319 void
321 {
322  Assert(PointerIsValid(obj_desc));
323 
326 
327  pfree(obj_desc);
328 }
329 
330 /*
331  * Destroys an existing large object (not to be confused with a descriptor!)
332  *
333  * returns -1 if failed
334  */
335 int
336 inv_drop(Oid lobjId)
337 {
338  ObjectAddress object;
339 
340  /*
341  * Delete any comments and dependencies on the large object
342  */
344  object.objectId = lobjId;
345  object.objectSubId = 0;
346  performDeletion(&object, DROP_CASCADE, 0);
347 
348  /*
349  * Advance command counter so that tuple removal will be seen by later
350  * large-object operations in this transaction.
351  */
353 
354  return 1;
355 }
356 
357 /*
358  * Determine size of a large object
359  *
360  * NOTE: LOs can contain gaps, just like Unix files. We actually return
361  * the offset of the last byte + 1.
362  */
363 static uint64
365 {
366  uint64 lastbyte = 0;
367  ScanKeyData skey[1];
368  SysScanDesc sd;
369  HeapTuple tuple;
370 
371  Assert(PointerIsValid(obj_desc));
372 
374 
375  ScanKeyInit(&skey[0],
377  BTEqualStrategyNumber, F_OIDEQ,
378  ObjectIdGetDatum(obj_desc->id));
379 
380  sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
381  obj_desc->snapshot, 1, skey);
382 
383  /*
384  * Because the pg_largeobject index is on both loid and pageno, but we
385  * constrain only loid, a backwards scan should visit all pages of the
386  * large object in reverse pageno order. So, it's sufficient to examine
387  * the first valid tuple (== last valid page).
388  */
390  if (HeapTupleIsValid(tuple))
391  {
392  Form_pg_largeobject data;
393  bytea *datafield;
394  int len;
395  bool pfreeit;
396 
397  if (HeapTupleHasNulls(tuple)) /* paranoia */
398  elog(ERROR, "null field found in pg_largeobject");
399  data = (Form_pg_largeobject) GETSTRUCT(tuple);
400  getdatafield(data, &datafield, &len, &pfreeit);
401  lastbyte = (uint64) data->pageno * LOBLKSIZE + len;
402  if (pfreeit)
403  pfree(datafield);
404  }
405 
407 
408  return lastbyte;
409 }
410 
411 int64
412 inv_seek(LargeObjectDesc *obj_desc, int64 offset, int whence)
413 {
414  int64 newoffset;
415 
416  Assert(PointerIsValid(obj_desc));
417 
418  /*
419  * Note: overflow in the additions is possible, but since we will reject
420  * negative results, we don't need any extra test for that.
421  */
422  switch (whence)
423  {
424  case SEEK_SET:
425  newoffset = offset;
426  break;
427  case SEEK_CUR:
428  newoffset = obj_desc->offset + offset;
429  break;
430  case SEEK_END:
431  newoffset = inv_getsize(obj_desc) + offset;
432  break;
433  default:
434  ereport(ERROR,
435  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
436  errmsg("invalid whence setting: %d", whence)));
437  newoffset = 0; /* keep compiler quiet */
438  break;
439  }
440 
441  /*
442  * use errmsg_internal here because we don't want to expose INT64_FORMAT
443  * in translatable strings; doing better is not worth the trouble
444  */
445  if (newoffset < 0 || newoffset > MAX_LARGE_OBJECT_SIZE)
446  ereport(ERROR,
447  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
448  errmsg_internal("invalid large object seek target: " INT64_FORMAT,
449  newoffset)));
450 
451  obj_desc->offset = newoffset;
452  return newoffset;
453 }
454 
455 int64
457 {
458  Assert(PointerIsValid(obj_desc));
459 
460  return obj_desc->offset;
461 }
462 
463 int
464 inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
465 {
466  int nread = 0;
467  int64 n;
468  int64 off;
469  int len;
470  int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE);
471  uint64 pageoff;
472  ScanKeyData skey[2];
473  SysScanDesc sd;
474  HeapTuple tuple;
475 
476  Assert(PointerIsValid(obj_desc));
477  Assert(buf != NULL);
478 
479  if (nbytes <= 0)
480  return 0;
481 
483 
484  ScanKeyInit(&skey[0],
486  BTEqualStrategyNumber, F_OIDEQ,
487  ObjectIdGetDatum(obj_desc->id));
488 
489  ScanKeyInit(&skey[1],
492  Int32GetDatum(pageno));
493 
494  sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
495  obj_desc->snapshot, 2, skey);
496 
497  while ((tuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
498  {
499  Form_pg_largeobject data;
500  bytea *datafield;
501  bool pfreeit;
502 
503  if (HeapTupleHasNulls(tuple)) /* paranoia */
504  elog(ERROR, "null field found in pg_largeobject");
505  data = (Form_pg_largeobject) GETSTRUCT(tuple);
506 
507  /*
508  * We expect the indexscan will deliver pages in order. However,
509  * there may be missing pages if the LO contains unwritten "holes". We
510  * want missing sections to read out as zeroes.
511  */
512  pageoff = ((uint64) data->pageno) * LOBLKSIZE;
513  if (pageoff > obj_desc->offset)
514  {
515  n = pageoff - obj_desc->offset;
516  n = (n <= (nbytes - nread)) ? n : (nbytes - nread);
517  MemSet(buf + nread, 0, n);
518  nread += n;
519  obj_desc->offset += n;
520  }
521 
522  if (nread < nbytes)
523  {
524  Assert(obj_desc->offset >= pageoff);
525  off = (int) (obj_desc->offset - pageoff);
526  Assert(off >= 0 && off < LOBLKSIZE);
527 
528  getdatafield(data, &datafield, &len, &pfreeit);
529  if (len > off)
530  {
531  n = len - off;
532  n = (n <= (nbytes - nread)) ? n : (nbytes - nread);
533  memcpy(buf + nread, VARDATA(datafield) + off, n);
534  nread += n;
535  obj_desc->offset += n;
536  }
537  if (pfreeit)
538  pfree(datafield);
539  }
540 
541  if (nread >= nbytes)
542  break;
543  }
544 
546 
547  return nread;
548 }
549 
550 int
551 inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes)
552 {
553  int nwritten = 0;
554  int n;
555  int off;
556  int len;
557  int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE);
558  ScanKeyData skey[2];
559  SysScanDesc sd;
560  HeapTuple oldtuple;
561  Form_pg_largeobject olddata;
562  bool neednextpage;
563  bytea *datafield;
564  bool pfreeit;
565  union
566  {
567  bytea hdr;
568  /* this is to make the union big enough for a LO data chunk: */
569  char data[LOBLKSIZE + VARHDRSZ];
570  /* ensure union is aligned well enough: */
571  int32 align_it;
572  } workbuf;
573  char *workb = VARDATA(&workbuf.hdr);
574  HeapTuple newtup;
576  bool nulls[Natts_pg_largeobject];
577  bool replace[Natts_pg_largeobject];
578  CatalogIndexState indstate;
579 
580  Assert(PointerIsValid(obj_desc));
581  Assert(buf != NULL);
582 
583  /* enforce writability because snapshot is probably wrong otherwise */
584  Assert(obj_desc->flags & IFS_WRLOCK);
585 
586  if (nbytes <= 0)
587  return 0;
588 
589  /* this addition can't overflow because nbytes is only int32 */
590  if ((nbytes + obj_desc->offset) > MAX_LARGE_OBJECT_SIZE)
591  ereport(ERROR,
592  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
593  errmsg("invalid large object write request size: %d",
594  nbytes)));
595 
597 
598  indstate = CatalogOpenIndexes(lo_heap_r);
599 
600  ScanKeyInit(&skey[0],
602  BTEqualStrategyNumber, F_OIDEQ,
603  ObjectIdGetDatum(obj_desc->id));
604 
605  ScanKeyInit(&skey[1],
608  Int32GetDatum(pageno));
609 
610  sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
611  obj_desc->snapshot, 2, skey);
612 
613  oldtuple = NULL;
614  olddata = NULL;
615  neednextpage = true;
616 
617  while (nwritten < nbytes)
618  {
619  /*
620  * If possible, get next pre-existing page of the LO. We expect the
621  * indexscan will deliver these in order --- but there may be holes.
622  */
623  if (neednextpage)
624  {
625  if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
626  {
627  if (HeapTupleHasNulls(oldtuple)) /* paranoia */
628  elog(ERROR, "null field found in pg_largeobject");
629  olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
630  Assert(olddata->pageno >= pageno);
631  }
632  neednextpage = false;
633  }
634 
635  /*
636  * If we have a pre-existing page, see if it is the page we want to
637  * write, or a later one.
638  */
639  if (olddata != NULL && olddata->pageno == pageno)
640  {
641  /*
642  * Update an existing page with fresh data.
643  *
644  * First, load old data into workbuf
645  */
646  getdatafield(olddata, &datafield, &len, &pfreeit);
647  memcpy(workb, VARDATA(datafield), len);
648  if (pfreeit)
649  pfree(datafield);
650 
651  /*
652  * Fill any hole
653  */
654  off = (int) (obj_desc->offset % LOBLKSIZE);
655  if (off > len)
656  MemSet(workb + len, 0, off - len);
657 
658  /*
659  * Insert appropriate portion of new data
660  */
661  n = LOBLKSIZE - off;
662  n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten);
663  memcpy(workb + off, buf + nwritten, n);
664  nwritten += n;
665  obj_desc->offset += n;
666  off += n;
667  /* compute valid length of new page */
668  len = (len >= off) ? len : off;
669  SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ);
670 
671  /*
672  * Form and insert updated tuple
673  */
674  memset(values, 0, sizeof(values));
675  memset(nulls, false, sizeof(nulls));
676  memset(replace, false, sizeof(replace));
677  values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
678  replace[Anum_pg_largeobject_data - 1] = true;
679  newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r),
680  values, nulls, replace);
681  CatalogTupleUpdateWithInfo(lo_heap_r, &newtup->t_self, newtup,
682  indstate);
683  heap_freetuple(newtup);
684 
685  /*
686  * We're done with this old page.
687  */
688  oldtuple = NULL;
689  olddata = NULL;
690  neednextpage = true;
691  }
692  else
693  {
694  /*
695  * Write a brand new page.
696  *
697  * First, fill any hole
698  */
699  off = (int) (obj_desc->offset % LOBLKSIZE);
700  if (off > 0)
701  MemSet(workb, 0, off);
702 
703  /*
704  * Insert appropriate portion of new data
705  */
706  n = LOBLKSIZE - off;
707  n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten);
708  memcpy(workb + off, buf + nwritten, n);
709  nwritten += n;
710  obj_desc->offset += n;
711  /* compute valid length of new page */
712  len = off + n;
713  SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ);
714 
715  /*
716  * Form and insert updated tuple
717  */
718  memset(values, 0, sizeof(values));
719  memset(nulls, false, sizeof(nulls));
720  values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
721  values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
722  values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
723  newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls);
724  CatalogTupleInsertWithInfo(lo_heap_r, newtup, indstate);
725  heap_freetuple(newtup);
726  }
727  pageno++;
728  }
729 
731 
732  CatalogCloseIndexes(indstate);
733 
734  /*
735  * Advance command counter so that my tuple updates will be seen by later
736  * large-object operations in this transaction.
737  */
739 
740  return nwritten;
741 }
742 
743 void
744 inv_truncate(LargeObjectDesc *obj_desc, int64 len)
745 {
746  int32 pageno = (int32) (len / LOBLKSIZE);
747  int32 off;
748  ScanKeyData skey[2];
749  SysScanDesc sd;
750  HeapTuple oldtuple;
751  Form_pg_largeobject olddata;
752  union
753  {
754  bytea hdr;
755  /* this is to make the union big enough for a LO data chunk: */
756  char data[LOBLKSIZE + VARHDRSZ];
757  /* ensure union is aligned well enough: */
758  int32 align_it;
759  } workbuf;
760  char *workb = VARDATA(&workbuf.hdr);
761  HeapTuple newtup;
763  bool nulls[Natts_pg_largeobject];
764  bool replace[Natts_pg_largeobject];
765  CatalogIndexState indstate;
766 
767  Assert(PointerIsValid(obj_desc));
768 
769  /* enforce writability because snapshot is probably wrong otherwise */
770  Assert(obj_desc->flags & IFS_WRLOCK);
771 
772  /*
773  * use errmsg_internal here because we don't want to expose INT64_FORMAT
774  * in translatable strings; doing better is not worth the trouble
775  */
776  if (len < 0 || len > MAX_LARGE_OBJECT_SIZE)
777  ereport(ERROR,
778  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
779  errmsg_internal("invalid large object truncation target: " INT64_FORMAT,
780  len)));
781 
783 
784  indstate = CatalogOpenIndexes(lo_heap_r);
785 
786  /*
787  * Set up to find all pages with desired loid and pageno >= target
788  */
789  ScanKeyInit(&skey[0],
791  BTEqualStrategyNumber, F_OIDEQ,
792  ObjectIdGetDatum(obj_desc->id));
793 
794  ScanKeyInit(&skey[1],
797  Int32GetDatum(pageno));
798 
799  sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
800  obj_desc->snapshot, 2, skey);
801 
802  /*
803  * If possible, get the page the truncation point is in. The truncation
804  * point may be beyond the end of the LO or in a hole.
805  */
806  olddata = NULL;
807  if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
808  {
809  if (HeapTupleHasNulls(oldtuple)) /* paranoia */
810  elog(ERROR, "null field found in pg_largeobject");
811  olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
812  Assert(olddata->pageno >= pageno);
813  }
814 
815  /*
816  * If we found the page of the truncation point we need to truncate the
817  * data in it. Otherwise if we're in a hole, we need to create a page to
818  * mark the end of data.
819  */
820  if (olddata != NULL && olddata->pageno == pageno)
821  {
822  /* First, load old data into workbuf */
823  bytea *datafield;
824  int pagelen;
825  bool pfreeit;
826 
827  getdatafield(olddata, &datafield, &pagelen, &pfreeit);
828  memcpy(workb, VARDATA(datafield), pagelen);
829  if (pfreeit)
830  pfree(datafield);
831 
832  /*
833  * Fill any hole
834  */
835  off = len % LOBLKSIZE;
836  if (off > pagelen)
837  MemSet(workb + pagelen, 0, off - pagelen);
838 
839  /* compute length of new page */
840  SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);
841 
842  /*
843  * Form and insert updated tuple
844  */
845  memset(values, 0, sizeof(values));
846  memset(nulls, false, sizeof(nulls));
847  memset(replace, false, sizeof(replace));
848  values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
849  replace[Anum_pg_largeobject_data - 1] = true;
850  newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r),
851  values, nulls, replace);
852  CatalogTupleUpdateWithInfo(lo_heap_r, &newtup->t_self, newtup,
853  indstate);
854  heap_freetuple(newtup);
855  }
856  else
857  {
858  /*
859  * If the first page we found was after the truncation point, we're in
860  * a hole that we'll fill, but we need to delete the later page
861  * because the loop below won't visit it again.
862  */
863  if (olddata != NULL)
864  {
865  Assert(olddata->pageno > pageno);
866  CatalogTupleDelete(lo_heap_r, &oldtuple->t_self);
867  }
868 
869  /*
870  * Write a brand new page.
871  *
872  * Fill the hole up to the truncation point
873  */
874  off = len % LOBLKSIZE;
875  if (off > 0)
876  MemSet(workb, 0, off);
877 
878  /* compute length of new page */
879  SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);
880 
881  /*
882  * Form and insert new tuple
883  */
884  memset(values, 0, sizeof(values));
885  memset(nulls, false, sizeof(nulls));
886  values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
887  values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
888  values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
889  newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls);
890  CatalogTupleInsertWithInfo(lo_heap_r, newtup, indstate);
891  heap_freetuple(newtup);
892  }
893 
894  /*
895  * Delete any pages after the truncation point. If the initial search
896  * didn't find a page, then of course there's nothing more to do.
897  */
898  if (olddata != NULL)
899  {
900  while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
901  {
902  CatalogTupleDelete(lo_heap_r, &oldtuple->t_self);
903  }
904  }
905 
907 
908  CatalogCloseIndexes(indstate);
909 
910  /*
911  * Advance command counter so that tuple updates will be seen by later
912  * large-object operations in this transaction.
913  */
915 }
static bool myLargeObjectExists(Oid loid, Snapshot snapshot)
Definition: inv_api.c:144
#define MAX_LARGE_OBJECT_SIZE
Definition: large_object.h:78
static void open_lo_relation(void)
Definition: inv_api.c:69
#define LOBLKSIZE
Definition: large_object.h:72
#define IFS_RDLOCK
Definition: large_object.h:48
int64 inv_seek(LargeObjectDesc *obj_desc, int64 offset, int whence)
Definition: inv_api.c:412
#define VARDATA(PTR)
Definition: postgres.h:303
void inv_truncate(LargeObjectDesc *obj_desc, int64 len)
Definition: inv_api.c:744
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:499
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
#define InvokeObjectPostCreateHook(classId, objectId, subId)
Definition: objectaccess.h:145
Oid inv_create(Oid lobjId)
Definition: inv_api.c:224
#define RelationGetDescr(relation)
Definition: rel.h:428
Oid GetUserId(void)
Definition: miscinit.c:284
#define ObjectIdAttributeNumber
Definition: sysattr.h:22
ResourceOwner TopTransactionResourceOwner
Definition: resowner.c:140
#define VARSIZE(PTR)
Definition: postgres.h:304
#define PointerGetDatum(X)
Definition: postgres.h:562
#define VARHDRSZ
Definition: c.h:445
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void inv_close(LargeObjectDesc *obj_desc)
Definition: inv_api.c:320
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:839
#define AccessShareLock
Definition: lockdefs.h:36
int errcode(int sqlerrcode)
Definition: elog.c:575
#define MemSet(start, val, len)
Definition: c.h:858
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
Definition: indexing.c:255
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:695
struct varlena * heap_tuple_untoast_attr(struct varlena *attr)
Definition: tuptoaster.c:172
#define heap_close(r, l)
Definition: heapam.h:97
void recordDependencyOnOwner(Oid classId, Oid objectId, Oid owner)
Definition: pg_shdepend.c:159
Oid LargeObjectCreate(Oid loid)
HeapTuple systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
Definition: genam.c:597
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1373
unsigned int Oid
Definition: postgres_ext.h:31
SubTransactionId subid
Definition: large_object.h:43
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:328
static Relation lo_heap_r
Definition: inv_api.c:61
#define IFS_WRLOCK
Definition: large_object.h:49
signed int int32
Definition: c.h:256
int64 inv_tell(LargeObjectDesc *obj_desc)
Definition: inv_api.c:456
static Relation lo_index_r
Definition: inv_api.c:62
#define INV_READ
Definition: libpq-fs.h:22
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:416
void pfree(void *pointer)
Definition: mcxt.c:950
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
Snapshot snapshot
Definition: large_object.h:42
ItemPointerData t_self
Definition: htup.h:65
int inv_drop(Oid lobjId)
Definition: inv_api.c:336
#define HeapTupleHasNulls(tuple)
Definition: htup_details.h:662
#define LargeObjectLOidPNIndexId
Definition: indexing.h:180
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:66
#define RowExclusiveLock
Definition: lockdefs.h:38
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
Definition: dependency.c:303
#define ereport(elevel, rest)
Definition: elog.h:122
#define LargeObjectMetadataOidIndexId
Definition: indexing.h:183
void CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup, CatalogIndexState indstate)
Definition: indexing.c:231
static uint64 inv_getsize(LargeObjectDesc *obj_desc)
Definition: inv_api.c:364
#define Natts_pg_largeobject
void close_lo_relation(bool isCommit)
Definition: inv_api.c:102
uintptr_t Datum
Definition: postgres.h:372
void CommandCounterIncrement(void)
Definition: xact.c:922
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1290
void systable_endscan_ordered(SysScanDesc sysscan)
Definition: genam.c:614
TupleDesc rd_att
Definition: rel.h:115
Snapshot RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:876
int errmsg_internal(const char *fmt,...)
Definition: elog.c:827
#define PG_CATCH()
Definition: elog.h:293
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:676
static void getdatafield(Form_pg_largeobject tuple, bytea **pdatafield, int *plen, bool *pfreeit)
Definition: inv_api.c:182
SubTransactionId GetCurrentSubTransactionId(void)
Definition: xact.c:649
#define LargeObjectMetadataRelationId
CatalogIndexState CatalogOpenIndexes(Relation heapRel)
Definition: indexing.c:40
#define Anum_pg_largeobject_data
void UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:918
#define PG_RE_THROW()
Definition: elog.h:314
#define INT64_FORMAT
Definition: c.h:315
#define INV_WRITE
Definition: libpq-fs.h:21
#define Anum_pg_largeobject_pageno
#define VARATT_IS_EXTENDED(PTR)
Definition: postgres.h:326
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:176
static Datum values[MAXATTR]
Definition: bootstrap.c:163
#define Int32GetDatum(X)
Definition: postgres.h:485
LargeObjectDesc * inv_open(Oid lobjId, int flags, MemoryContext mcxt)
Definition: inv_api.c:265
int errmsg(const char *fmt,...)
Definition: elog.c:797
SysScanDesc systable_beginscan_ordered(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:533
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:707
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
void CatalogCloseIndexes(CatalogIndexState indstate)
Definition: indexing.c:58
Definition: c.h:439
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
#define elog
Definition: elog.h:219
Oid CatalogTupleInsertWithInfo(Relation heapRel, HeapTuple tup, CatalogIndexState indstate)
Definition: indexing.c:186
FormData_pg_largeobject * Form_pg_largeobject
#define PG_TRY()
Definition: elog.h:284
HeapTuple heap_modify_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *replValues, bool *replIsnull, bool *doReplace)
Definition: heaptuple.c:794
#define Anum_pg_largeobject_loid
int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
Definition: inv_api.c:464
#define LargeObjectRelationId
#define PointerIsValid(pointer)
Definition: c.h:526
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:151
#define PG_END_TRY()
Definition: elog.h:300
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32
int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes)
Definition: inv_api.c:551