PostgreSQL Source Code  git master
inv_api.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * inv_api.c
4  * routines for manipulating inversion fs large objects. This file
5  * contains the user-level large object application interface routines.
6  *
7  *
8  * Note: we access pg_largeobject.data using its C struct declaration.
9  * This is safe because it immediately follows pageno which is an int4 field,
10  * and therefore the data field will always be 4-byte aligned, even if it
11  * is in the short 1-byte-header format. We have to detoast it since it's
12  * quite likely to be in compressed or short format. We also need to check
13  * for NULLs, since initdb will mark loid and pageno but not data as NOT NULL.
14  *
15  * Note: many of these routines leak memory in CurrentMemoryContext, as indeed
16  * does most of the backend code. We expect that CurrentMemoryContext will
17  * be a short-lived context. Data that must persist across function calls
18  * is kept either in CacheMemoryContext (the Relation structs) or in the
19  * memory context given to inv_open (for LargeObjectDesc structs).
20  *
21  *
22  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
23  * Portions Copyright (c) 1994, Regents of the University of California
24  *
25  *
26  * IDENTIFICATION
27  * src/backend/storage/large_object/inv_api.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 #include "postgres.h"
32 
33 #include <limits.h>
34 
35 #include "access/genam.h"
36 #include "access/heapam.h"
37 #include "access/sysattr.h"
38 #include "access/tuptoaster.h"
39 #include "access/xact.h"
40 #include "catalog/dependency.h"
41 #include "catalog/indexing.h"
42 #include "catalog/objectaccess.h"
43 #include "catalog/pg_largeobject.h"
45 #include "libpq/libpq-fs.h"
46 #include "miscadmin.h"
47 #include "storage/large_object.h"
48 #include "utils/fmgroids.h"
49 #include "utils/rel.h"
50 #include "utils/snapmgr.h"
51 #include "utils/tqual.h"
52 
53 
54 /*
55  * GUC: backwards-compatibility flag to suppress LO permission checks
56  */
58 
59 /*
60  * All accesses to pg_largeobject and its index make use of a single Relation
61  * reference, so that we only need to open pg_relation once per transaction.
62  * To avoid problems when the first such reference occurs inside a
63  * subtransaction, we execute a slightly klugy maneuver to assign ownership of
64  * the Relation reference to TopTransactionResourceOwner.
65  */
66 static Relation lo_heap_r = NULL;
67 static Relation lo_index_r = NULL;
68 
69 
70 /*
71  * Open pg_largeobject and its index, if not already done in current xact
72  */
73 static void
75 {
76  ResourceOwner currentOwner;
77 
78  if (lo_heap_r && lo_index_r)
79  return; /* already open in current xact */
80 
81  /* Arrange for the top xact to own these relation references */
82  currentOwner = CurrentResourceOwner;
84 
85  /* Use RowExclusiveLock since we might either read or write */
86  if (lo_heap_r == NULL)
88  if (lo_index_r == NULL)
90 
91  CurrentResourceOwner = currentOwner;
92 }
93 
94 /*
95  * Clean up at main transaction end
96  */
97 void
98 close_lo_relation(bool isCommit)
99 {
100  if (lo_heap_r || lo_index_r)
101  {
102  /*
103  * Only bother to close if committing; else abort cleanup will handle
104  * it
105  */
106  if (isCommit)
107  {
108  ResourceOwner currentOwner;
109 
110  currentOwner = CurrentResourceOwner;
112 
113  if (lo_index_r)
114  index_close(lo_index_r, NoLock);
115  if (lo_heap_r)
116  heap_close(lo_heap_r, NoLock);
117 
118  CurrentResourceOwner = currentOwner;
119  }
120  lo_heap_r = NULL;
121  lo_index_r = NULL;
122  }
123 }
124 
125 
126 /*
127  * Same as pg_largeobject.c's LargeObjectExists(), except snapshot to
128  * read with can be specified.
129  */
130 static bool
132 {
133  Relation pg_lo_meta;
134  ScanKeyData skey[1];
135  SysScanDesc sd;
136  HeapTuple tuple;
137  bool retval = false;
138 
139  ScanKeyInit(&skey[0],
141  BTEqualStrategyNumber, F_OIDEQ,
142  ObjectIdGetDatum(loid));
143 
146 
147  sd = systable_beginscan(pg_lo_meta,
149  snapshot, 1, skey);
150 
151  tuple = systable_getnext(sd);
152  if (HeapTupleIsValid(tuple))
153  retval = true;
154 
155  systable_endscan(sd);
156 
157  heap_close(pg_lo_meta, AccessShareLock);
158 
159  return retval;
160 }
161 
162 
163 /*
164  * Extract data field from a pg_largeobject tuple, detoasting if needed
165  * and verifying that the length is sane. Returns data pointer (a bytea *),
166  * data length, and an indication of whether to pfree the data pointer.
167  */
168 static void
170  bytea **pdatafield,
171  int *plen,
172  bool *pfreeit)
173 {
174  bytea *datafield;
175  int len;
176  bool freeit;
177 
178  datafield = &(tuple->data); /* see note at top of file */
179  freeit = false;
180  if (VARATT_IS_EXTENDED(datafield))
181  {
182  datafield = (bytea *)
183  heap_tuple_untoast_attr((struct varlena *) datafield);
184  freeit = true;
185  }
186  len = VARSIZE(datafield) - VARHDRSZ;
187  if (len < 0 || len > LOBLKSIZE)
188  ereport(ERROR,
189  (errcode(ERRCODE_DATA_CORRUPTED),
190  errmsg("pg_largeobject entry for OID %u, page %d has invalid data field size %d",
191  tuple->loid, tuple->pageno, len)));
192  *pdatafield = datafield;
193  *plen = len;
194  *pfreeit = freeit;
195 }
196 
197 
198 /*
199  * inv_create -- create a new large object
200  *
201  * Arguments:
202  * lobjId - OID to use for new large object, or InvalidOid to pick one
203  *
204  * Returns:
205  * OID of new object
206  *
207  * If lobjId is not InvalidOid, then an error occurs if the OID is already
208  * in use.
209  */
210 Oid
212 {
213  Oid lobjId_new;
214 
215  /*
216  * Create a new largeobject with empty data pages
217  */
218  lobjId_new = LargeObjectCreate(lobjId);
219 
220  /*
221  * dependency on the owner of largeobject
222  *
223  * The reason why we use LargeObjectRelationId instead of
224  * LargeObjectMetadataRelationId here is to provide backward compatibility
225  * to the applications which utilize a knowledge about internal layout of
226  * system catalogs. OID of pg_largeobject_metadata and loid of
227  * pg_largeobject are same value, so there are no actual differences here.
228  */
230  lobjId_new, GetUserId());
231 
232  /* Post creation hook for new large object */
234 
235  /*
236  * Advance command counter to make new tuple visible to later operations.
237  */
239 
240  return lobjId_new;
241 }
242 
243 /*
244  * inv_open -- access an existing large object.
245  *
246  * Returns:
247  * Large object descriptor, appropriately filled in. The descriptor
248  * and subsidiary data are allocated in the specified memory context,
249  * which must be suitably long-lived for the caller's purposes.
250  */
252 inv_open(Oid lobjId, int flags, MemoryContext mcxt)
253 {
254  LargeObjectDesc *retval;
255  Snapshot snapshot = NULL;
256  int descflags = 0;
257 
258  /*
259  * Historically, no difference is made between (INV_WRITE) and (INV_WRITE
260  * | INV_READ), the caller being allowed to read the large object
261  * descriptor in either case.
262  */
263  if (flags & INV_WRITE)
264  descflags |= IFS_WRLOCK | IFS_RDLOCK;
265  if (flags & INV_READ)
266  descflags |= IFS_RDLOCK;
267 
268  if (descflags == 0)
269  ereport(ERROR,
270  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
271  errmsg("invalid flags for opening a large object: %d",
272  flags)));
273 
274  /* Get snapshot. If write is requested, use an instantaneous snapshot. */
275  if (descflags & IFS_WRLOCK)
276  snapshot = NULL;
277  else
278  snapshot = GetActiveSnapshot();
279 
280  /* Can't use LargeObjectExists here because we need to specify snapshot */
281  if (!myLargeObjectExists(lobjId, snapshot))
282  ereport(ERROR,
283  (errcode(ERRCODE_UNDEFINED_OBJECT),
284  errmsg("large object %u does not exist", lobjId)));
285 
286  /* Apply permission checks, again specifying snapshot */
287  if ((descflags & IFS_RDLOCK) != 0)
288  {
289  if (!lo_compat_privileges &&
291  GetUserId(),
292  ACL_SELECT,
293  snapshot) != ACLCHECK_OK)
294  ereport(ERROR,
295  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
296  errmsg("permission denied for large object %u",
297  lobjId)));
298  }
299  if ((descflags & IFS_WRLOCK) != 0)
300  {
301  if (!lo_compat_privileges &&
303  GetUserId(),
304  ACL_UPDATE,
305  snapshot) != ACLCHECK_OK)
306  ereport(ERROR,
307  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
308  errmsg("permission denied for large object %u",
309  lobjId)));
310  }
311 
312  /* OK to create a descriptor */
313  retval = (LargeObjectDesc *) MemoryContextAlloc(mcxt,
314  sizeof(LargeObjectDesc));
315  retval->id = lobjId;
316  retval->subid = GetCurrentSubTransactionId();
317  retval->offset = 0;
318  retval->flags = descflags;
319 
320  /*
321  * We must register the snapshot in TopTransaction's resowner, because it
322  * must stay alive until the LO is closed rather than until the current
323  * portal shuts down. Do this last to avoid uselessly leaking the
324  * snapshot if an error is thrown above.
325  */
326  if (snapshot)
327  snapshot = RegisterSnapshotOnOwner(snapshot,
329  retval->snapshot = snapshot;
330 
331  return retval;
332 }
333 
334 /*
335  * Closes a large object descriptor previously made by inv_open(), and
336  * releases the long-term memory used by it.
337  */
338 void
340 {
341  Assert(PointerIsValid(obj_desc));
342 
345 
346  pfree(obj_desc);
347 }
348 
349 /*
350  * Destroys an existing large object (not to be confused with a descriptor!)
351  *
352  * Note we expect caller to have done any required permissions check.
353  */
354 int
355 inv_drop(Oid lobjId)
356 {
357  ObjectAddress object;
358 
359  /*
360  * Delete any comments and dependencies on the large object
361  */
363  object.objectId = lobjId;
364  object.objectSubId = 0;
365  performDeletion(&object, DROP_CASCADE, 0);
366 
367  /*
368  * Advance command counter so that tuple removal will be seen by later
369  * large-object operations in this transaction.
370  */
372 
373  /* For historical reasons, we always return 1 on success. */
374  return 1;
375 }
376 
377 /*
378  * Determine size of a large object
379  *
380  * NOTE: LOs can contain gaps, just like Unix files. We actually return
381  * the offset of the last byte + 1.
382  */
383 static uint64
385 {
386  uint64 lastbyte = 0;
387  ScanKeyData skey[1];
388  SysScanDesc sd;
389  HeapTuple tuple;
390 
391  Assert(PointerIsValid(obj_desc));
392 
394 
395  ScanKeyInit(&skey[0],
397  BTEqualStrategyNumber, F_OIDEQ,
398  ObjectIdGetDatum(obj_desc->id));
399 
400  sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
401  obj_desc->snapshot, 1, skey);
402 
403  /*
404  * Because the pg_largeobject index is on both loid and pageno, but we
405  * constrain only loid, a backwards scan should visit all pages of the
406  * large object in reverse pageno order. So, it's sufficient to examine
407  * the first valid tuple (== last valid page).
408  */
410  if (HeapTupleIsValid(tuple))
411  {
412  Form_pg_largeobject data;
413  bytea *datafield;
414  int len;
415  bool pfreeit;
416 
417  if (HeapTupleHasNulls(tuple)) /* paranoia */
418  elog(ERROR, "null field found in pg_largeobject");
419  data = (Form_pg_largeobject) GETSTRUCT(tuple);
420  getdatafield(data, &datafield, &len, &pfreeit);
421  lastbyte = (uint64) data->pageno * LOBLKSIZE + len;
422  if (pfreeit)
423  pfree(datafield);
424  }
425 
427 
428  return lastbyte;
429 }
430 
431 int64
432 inv_seek(LargeObjectDesc *obj_desc, int64 offset, int whence)
433 {
434  int64 newoffset;
435 
436  Assert(PointerIsValid(obj_desc));
437 
438  /*
439  * We allow seek/tell if you have either read or write permission, so no
440  * need for a permission check here.
441  */
442 
443  /*
444  * Note: overflow in the additions is possible, but since we will reject
445  * negative results, we don't need any extra test for that.
446  */
447  switch (whence)
448  {
449  case SEEK_SET:
450  newoffset = offset;
451  break;
452  case SEEK_CUR:
453  newoffset = obj_desc->offset + offset;
454  break;
455  case SEEK_END:
456  newoffset = inv_getsize(obj_desc) + offset;
457  break;
458  default:
459  ereport(ERROR,
460  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
461  errmsg("invalid whence setting: %d", whence)));
462  newoffset = 0; /* keep compiler quiet */
463  break;
464  }
465 
466  /*
467  * use errmsg_internal here because we don't want to expose INT64_FORMAT
468  * in translatable strings; doing better is not worth the trouble
469  */
470  if (newoffset < 0 || newoffset > MAX_LARGE_OBJECT_SIZE)
471  ereport(ERROR,
472  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
473  errmsg_internal("invalid large object seek target: " INT64_FORMAT,
474  newoffset)));
475 
476  obj_desc->offset = newoffset;
477  return newoffset;
478 }
479 
480 int64
482 {
483  Assert(PointerIsValid(obj_desc));
484 
485  /*
486  * We allow seek/tell if you have either read or write permission, so no
487  * need for a permission check here.
488  */
489 
490  return obj_desc->offset;
491 }
492 
493 int
494 inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
495 {
496  int nread = 0;
497  int64 n;
498  int64 off;
499  int len;
500  int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE);
501  uint64 pageoff;
502  ScanKeyData skey[2];
503  SysScanDesc sd;
504  HeapTuple tuple;
505 
506  Assert(PointerIsValid(obj_desc));
507  Assert(buf != NULL);
508 
509  if ((obj_desc->flags & IFS_RDLOCK) == 0)
510  ereport(ERROR,
511  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
512  errmsg("permission denied for large object %u",
513  obj_desc->id)));
514 
515  if (nbytes <= 0)
516  return 0;
517 
519 
520  ScanKeyInit(&skey[0],
522  BTEqualStrategyNumber, F_OIDEQ,
523  ObjectIdGetDatum(obj_desc->id));
524 
525  ScanKeyInit(&skey[1],
528  Int32GetDatum(pageno));
529 
530  sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
531  obj_desc->snapshot, 2, skey);
532 
533  while ((tuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
534  {
535  Form_pg_largeobject data;
536  bytea *datafield;
537  bool pfreeit;
538 
539  if (HeapTupleHasNulls(tuple)) /* paranoia */
540  elog(ERROR, "null field found in pg_largeobject");
541  data = (Form_pg_largeobject) GETSTRUCT(tuple);
542 
543  /*
544  * We expect the indexscan will deliver pages in order. However,
545  * there may be missing pages if the LO contains unwritten "holes". We
546  * want missing sections to read out as zeroes.
547  */
548  pageoff = ((uint64) data->pageno) * LOBLKSIZE;
549  if (pageoff > obj_desc->offset)
550  {
551  n = pageoff - obj_desc->offset;
552  n = (n <= (nbytes - nread)) ? n : (nbytes - nread);
553  MemSet(buf + nread, 0, n);
554  nread += n;
555  obj_desc->offset += n;
556  }
557 
558  if (nread < nbytes)
559  {
560  Assert(obj_desc->offset >= pageoff);
561  off = (int) (obj_desc->offset - pageoff);
562  Assert(off >= 0 && off < LOBLKSIZE);
563 
564  getdatafield(data, &datafield, &len, &pfreeit);
565  if (len > off)
566  {
567  n = len - off;
568  n = (n <= (nbytes - nread)) ? n : (nbytes - nread);
569  memcpy(buf + nread, VARDATA(datafield) + off, n);
570  nread += n;
571  obj_desc->offset += n;
572  }
573  if (pfreeit)
574  pfree(datafield);
575  }
576 
577  if (nread >= nbytes)
578  break;
579  }
580 
582 
583  return nread;
584 }
585 
586 int
587 inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes)
588 {
589  int nwritten = 0;
590  int n;
591  int off;
592  int len;
593  int32 pageno = (int32) (obj_desc->offset / LOBLKSIZE);
594  ScanKeyData skey[2];
595  SysScanDesc sd;
596  HeapTuple oldtuple;
597  Form_pg_largeobject olddata;
598  bool neednextpage;
599  bytea *datafield;
600  bool pfreeit;
601  union
602  {
603  bytea hdr;
604  /* this is to make the union big enough for a LO data chunk: */
605  char data[LOBLKSIZE + VARHDRSZ];
606  /* ensure union is aligned well enough: */
607  int32 align_it;
608  } workbuf;
609  char *workb = VARDATA(&workbuf.hdr);
610  HeapTuple newtup;
612  bool nulls[Natts_pg_largeobject];
613  bool replace[Natts_pg_largeobject];
614  CatalogIndexState indstate;
615 
616  Assert(PointerIsValid(obj_desc));
617  Assert(buf != NULL);
618 
619  /* enforce writability because snapshot is probably wrong otherwise */
620  if ((obj_desc->flags & IFS_WRLOCK) == 0)
621  ereport(ERROR,
622  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
623  errmsg("permission denied for large object %u",
624  obj_desc->id)));
625 
626  if (nbytes <= 0)
627  return 0;
628 
629  /* this addition can't overflow because nbytes is only int32 */
630  if ((nbytes + obj_desc->offset) > MAX_LARGE_OBJECT_SIZE)
631  ereport(ERROR,
632  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
633  errmsg("invalid large object write request size: %d",
634  nbytes)));
635 
637 
638  indstate = CatalogOpenIndexes(lo_heap_r);
639 
640  ScanKeyInit(&skey[0],
642  BTEqualStrategyNumber, F_OIDEQ,
643  ObjectIdGetDatum(obj_desc->id));
644 
645  ScanKeyInit(&skey[1],
648  Int32GetDatum(pageno));
649 
650  sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
651  obj_desc->snapshot, 2, skey);
652 
653  oldtuple = NULL;
654  olddata = NULL;
655  neednextpage = true;
656 
657  while (nwritten < nbytes)
658  {
659  /*
660  * If possible, get next pre-existing page of the LO. We expect the
661  * indexscan will deliver these in order --- but there may be holes.
662  */
663  if (neednextpage)
664  {
665  if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
666  {
667  if (HeapTupleHasNulls(oldtuple)) /* paranoia */
668  elog(ERROR, "null field found in pg_largeobject");
669  olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
670  Assert(olddata->pageno >= pageno);
671  }
672  neednextpage = false;
673  }
674 
675  /*
676  * If we have a pre-existing page, see if it is the page we want to
677  * write, or a later one.
678  */
679  if (olddata != NULL && olddata->pageno == pageno)
680  {
681  /*
682  * Update an existing page with fresh data.
683  *
684  * First, load old data into workbuf
685  */
686  getdatafield(olddata, &datafield, &len, &pfreeit);
687  memcpy(workb, VARDATA(datafield), len);
688  if (pfreeit)
689  pfree(datafield);
690 
691  /*
692  * Fill any hole
693  */
694  off = (int) (obj_desc->offset % LOBLKSIZE);
695  if (off > len)
696  MemSet(workb + len, 0, off - len);
697 
698  /*
699  * Insert appropriate portion of new data
700  */
701  n = LOBLKSIZE - off;
702  n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten);
703  memcpy(workb + off, buf + nwritten, n);
704  nwritten += n;
705  obj_desc->offset += n;
706  off += n;
707  /* compute valid length of new page */
708  len = (len >= off) ? len : off;
709  SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ);
710 
711  /*
712  * Form and insert updated tuple
713  */
714  memset(values, 0, sizeof(values));
715  memset(nulls, false, sizeof(nulls));
716  memset(replace, false, sizeof(replace));
717  values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
718  replace[Anum_pg_largeobject_data - 1] = true;
719  newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r),
720  values, nulls, replace);
721  CatalogTupleUpdateWithInfo(lo_heap_r, &newtup->t_self, newtup,
722  indstate);
723  heap_freetuple(newtup);
724 
725  /*
726  * We're done with this old page.
727  */
728  oldtuple = NULL;
729  olddata = NULL;
730  neednextpage = true;
731  }
732  else
733  {
734  /*
735  * Write a brand new page.
736  *
737  * First, fill any hole
738  */
739  off = (int) (obj_desc->offset % LOBLKSIZE);
740  if (off > 0)
741  MemSet(workb, 0, off);
742 
743  /*
744  * Insert appropriate portion of new data
745  */
746  n = LOBLKSIZE - off;
747  n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten);
748  memcpy(workb + off, buf + nwritten, n);
749  nwritten += n;
750  obj_desc->offset += n;
751  /* compute valid length of new page */
752  len = off + n;
753  SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ);
754 
755  /*
756  * Form and insert updated tuple
757  */
758  memset(values, 0, sizeof(values));
759  memset(nulls, false, sizeof(nulls));
760  values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
761  values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
762  values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
763  newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls);
764  CatalogTupleInsertWithInfo(lo_heap_r, newtup, indstate);
765  heap_freetuple(newtup);
766  }
767  pageno++;
768  }
769 
771 
772  CatalogCloseIndexes(indstate);
773 
774  /*
775  * Advance command counter so that my tuple updates will be seen by later
776  * large-object operations in this transaction.
777  */
779 
780  return nwritten;
781 }
782 
783 void
784 inv_truncate(LargeObjectDesc *obj_desc, int64 len)
785 {
786  int32 pageno = (int32) (len / LOBLKSIZE);
787  int32 off;
788  ScanKeyData skey[2];
789  SysScanDesc sd;
790  HeapTuple oldtuple;
791  Form_pg_largeobject olddata;
792  union
793  {
794  bytea hdr;
795  /* this is to make the union big enough for a LO data chunk: */
796  char data[LOBLKSIZE + VARHDRSZ];
797  /* ensure union is aligned well enough: */
798  int32 align_it;
799  } workbuf;
800  char *workb = VARDATA(&workbuf.hdr);
801  HeapTuple newtup;
803  bool nulls[Natts_pg_largeobject];
804  bool replace[Natts_pg_largeobject];
805  CatalogIndexState indstate;
806 
807  Assert(PointerIsValid(obj_desc));
808 
809  /* enforce writability because snapshot is probably wrong otherwise */
810  if ((obj_desc->flags & IFS_WRLOCK) == 0)
811  ereport(ERROR,
812  (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
813  errmsg("permission denied for large object %u",
814  obj_desc->id)));
815 
816  /*
817  * use errmsg_internal here because we don't want to expose INT64_FORMAT
818  * in translatable strings; doing better is not worth the trouble
819  */
820  if (len < 0 || len > MAX_LARGE_OBJECT_SIZE)
821  ereport(ERROR,
822  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
823  errmsg_internal("invalid large object truncation target: " INT64_FORMAT,
824  len)));
825 
827 
828  indstate = CatalogOpenIndexes(lo_heap_r);
829 
830  /*
831  * Set up to find all pages with desired loid and pageno >= target
832  */
833  ScanKeyInit(&skey[0],
835  BTEqualStrategyNumber, F_OIDEQ,
836  ObjectIdGetDatum(obj_desc->id));
837 
838  ScanKeyInit(&skey[1],
841  Int32GetDatum(pageno));
842 
843  sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
844  obj_desc->snapshot, 2, skey);
845 
846  /*
847  * If possible, get the page the truncation point is in. The truncation
848  * point may be beyond the end of the LO or in a hole.
849  */
850  olddata = NULL;
851  if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
852  {
853  if (HeapTupleHasNulls(oldtuple)) /* paranoia */
854  elog(ERROR, "null field found in pg_largeobject");
855  olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
856  Assert(olddata->pageno >= pageno);
857  }
858 
859  /*
860  * If we found the page of the truncation point we need to truncate the
861  * data in it. Otherwise if we're in a hole, we need to create a page to
862  * mark the end of data.
863  */
864  if (olddata != NULL && olddata->pageno == pageno)
865  {
866  /* First, load old data into workbuf */
867  bytea *datafield;
868  int pagelen;
869  bool pfreeit;
870 
871  getdatafield(olddata, &datafield, &pagelen, &pfreeit);
872  memcpy(workb, VARDATA(datafield), pagelen);
873  if (pfreeit)
874  pfree(datafield);
875 
876  /*
877  * Fill any hole
878  */
879  off = len % LOBLKSIZE;
880  if (off > pagelen)
881  MemSet(workb + pagelen, 0, off - pagelen);
882 
883  /* compute length of new page */
884  SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);
885 
886  /*
887  * Form and insert updated tuple
888  */
889  memset(values, 0, sizeof(values));
890  memset(nulls, false, sizeof(nulls));
891  memset(replace, false, sizeof(replace));
892  values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
893  replace[Anum_pg_largeobject_data - 1] = true;
894  newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r),
895  values, nulls, replace);
896  CatalogTupleUpdateWithInfo(lo_heap_r, &newtup->t_self, newtup,
897  indstate);
898  heap_freetuple(newtup);
899  }
900  else
901  {
902  /*
903  * If the first page we found was after the truncation point, we're in
904  * a hole that we'll fill, but we need to delete the later page
905  * because the loop below won't visit it again.
906  */
907  if (olddata != NULL)
908  {
909  Assert(olddata->pageno > pageno);
910  CatalogTupleDelete(lo_heap_r, &oldtuple->t_self);
911  }
912 
913  /*
914  * Write a brand new page.
915  *
916  * Fill the hole up to the truncation point
917  */
918  off = len % LOBLKSIZE;
919  if (off > 0)
920  MemSet(workb, 0, off);
921 
922  /* compute length of new page */
923  SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);
924 
925  /*
926  * Form and insert new tuple
927  */
928  memset(values, 0, sizeof(values));
929  memset(nulls, false, sizeof(nulls));
930  values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
931  values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
932  values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
933  newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls);
934  CatalogTupleInsertWithInfo(lo_heap_r, newtup, indstate);
935  heap_freetuple(newtup);
936  }
937 
938  /*
939  * Delete any pages after the truncation point. If the initial search
940  * didn't find a page, then of course there's nothing more to do.
941  */
942  if (olddata != NULL)
943  {
944  while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
945  {
946  CatalogTupleDelete(lo_heap_r, &oldtuple->t_self);
947  }
948  }
949 
951 
952  CatalogCloseIndexes(indstate);
953 
954  /*
955  * Advance command counter so that tuple updates will be seen by later
956  * large-object operations in this transaction.
957  */
959 }
static bool myLargeObjectExists(Oid loid, Snapshot snapshot)
Definition: inv_api.c:131
#define MAX_LARGE_OBJECT_SIZE
Definition: large_object.h:76
static void open_lo_relation(void)
Definition: inv_api.c:74
#define LOBLKSIZE
Definition: large_object.h:70
#define IFS_RDLOCK
Definition: large_object.h:48
int64 inv_seek(LargeObjectDesc *obj_desc, int64 offset, int whence)
Definition: inv_api.c:432
#define VARDATA(PTR)
Definition: postgres.h:303
void inv_truncate(LargeObjectDesc *obj_desc, int64 len)
Definition: inv_api.c:784
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:499
#define GETSTRUCT(TUP)
Definition: htup_details.h:661
bool lo_compat_privileges
Definition: inv_api.c:57
#define InvokeObjectPostCreateHook(classId, objectId, subId)
Definition: objectaccess.h:145
Oid inv_create(Oid lobjId)
Definition: inv_api.c:211
#define RelationGetDescr(relation)
Definition: rel.h:437
Oid GetUserId(void)
Definition: miscinit.c:284
#define ObjectIdAttributeNumber
Definition: sysattr.h:22
ResourceOwner TopTransactionResourceOwner
Definition: resowner.c:140
#define VARSIZE(PTR)
Definition: postgres.h:304
#define PointerGetDatum(X)
Definition: postgres.h:562
#define VARHDRSZ
Definition: c.h:493
ResourceOwner CurrentResourceOwner
Definition: resowner.c:138
void inv_close(LargeObjectDesc *obj_desc)
Definition: inv_api.c:339
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:839
#define AccessShareLock
Definition: lockdefs.h:36
int errcode(int sqlerrcode)
Definition: elog.c:575
#define MemSet(start, val, len)
Definition: c.h:853
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
Definition: indexing.c:255
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:695
struct varlena * heap_tuple_untoast_attr(struct varlena *attr)
Definition: tuptoaster.c:172
#define heap_close(r, l)
Definition: heapam.h:97
void recordDependencyOnOwner(Oid classId, Oid objectId, Oid owner)
Definition: pg_shdepend.c:159
Oid LargeObjectCreate(Oid loid)
HeapTuple systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
Definition: genam.c:597
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1373
unsigned int Oid
Definition: postgres_ext.h:31
SubTransactionId subid
Definition: large_object.h:43
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:328
static Relation lo_heap_r
Definition: inv_api.c:66
#define IFS_WRLOCK
Definition: large_object.h:49
signed int int32
Definition: c.h:284
int64 inv_tell(LargeObjectDesc *obj_desc)
Definition: inv_api.c:481
static Relation lo_index_r
Definition: inv_api.c:67
#define INV_READ
Definition: libpq-fs.h:22
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:416
void pfree(void *pointer)
Definition: mcxt.c:949
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
Snapshot snapshot
Definition: large_object.h:42
ItemPointerData t_self
Definition: htup.h:65
int inv_drop(Oid lobjId)
Definition: inv_api.c:355
#define HeapTupleHasNulls(tuple)
Definition: htup_details.h:667
#define LargeObjectLOidPNIndexId
Definition: indexing.h:180
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:67
#define RowExclusiveLock
Definition: lockdefs.h:38
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
Definition: dependency.c:303
#define ereport(elevel, rest)
Definition: elog.h:122
#define LargeObjectMetadataOidIndexId
Definition: indexing.h:183
void CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup, CatalogIndexState indstate)
Definition: indexing.c:231
#define ACL_UPDATE
Definition: parsenodes.h:74
static uint64 inv_getsize(LargeObjectDesc *obj_desc)
Definition: inv_api.c:384
#define Natts_pg_largeobject
void close_lo_relation(bool isCommit)
Definition: inv_api.c:98
uintptr_t Datum
Definition: postgres.h:372
void CommandCounterIncrement(void)
Definition: xact.c:915
#define ACL_SELECT
Definition: parsenodes.h:73
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1290
void systable_endscan_ordered(SysScanDesc sysscan)
Definition: genam.c:614
TupleDesc rd_att
Definition: rel.h:115
Snapshot RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:876
int errmsg_internal(const char *fmt,...)
Definition: elog.c:827
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define Assert(condition)
Definition: c.h:670
static void getdatafield(Form_pg_largeobject tuple, bytea **pdatafield, int *plen, bool *pfreeit)
Definition: inv_api.c:169
SubTransactionId GetCurrentSubTransactionId(void)
Definition: xact.c:642
#define LargeObjectMetadataRelationId
CatalogIndexState CatalogOpenIndexes(Relation heapRel)
Definition: indexing.c:40
#define Anum_pg_largeobject_data
void UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
Definition: snapmgr.c:918
#define INT64_FORMAT
Definition: c.h:338
#define INV_WRITE
Definition: libpq-fs.h:21
#define Anum_pg_largeobject_pageno
#define VARATT_IS_EXTENDED(PTR)
Definition: postgres.h:326
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:176
static Datum values[MAXATTR]
Definition: bootstrap.c:164
#define Int32GetDatum(X)
Definition: postgres.h:485
LargeObjectDesc * inv_open(Oid lobjId, int flags, MemoryContext mcxt)
Definition: inv_api.c:252
int errmsg(const char *fmt,...)
Definition: elog.c:797
SysScanDesc systable_beginscan_ordered(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:533
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:706
AclResult pg_largeobject_aclcheck_snapshot(Oid lobj_oid, Oid roleid, AclMode mode, Snapshot snapshot)
Definition: aclchk.c:4528
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
void CatalogCloseIndexes(CatalogIndexState indstate)
Definition: indexing.c:58
Definition: c.h:487
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
#define elog
Definition: elog.h:219
Oid CatalogTupleInsertWithInfo(Relation heapRel, HeapTuple tup, CatalogIndexState indstate)
Definition: indexing.c:186
FormData_pg_largeobject * Form_pg_largeobject
HeapTuple heap_modify_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *replValues, bool *replIsnull, bool *doReplace)
Definition: heaptuple.c:794
#define Anum_pg_largeobject_loid
int inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
Definition: inv_api.c:494
#define LargeObjectRelationId
#define PointerIsValid(pointer)
Definition: c.h:564
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:151
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32
int inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes)
Definition: inv_api.c:587