PostgreSQL Source Code  git master
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/nbtree.h"
36 #include "access/parallel.h"
37 #include "access/reloptions.h"
38 #include "access/sysattr.h"
39 #include "access/table.h"
40 #include "access/tableam.h"
41 #include "access/tupdesc_details.h"
42 #include "access/xact.h"
43 #include "access/xlog.h"
44 #include "catalog/catalog.h"
45 #include "catalog/indexing.h"
46 #include "catalog/namespace.h"
47 #include "catalog/partition.h"
48 #include "catalog/pg_am.h"
49 #include "catalog/pg_amproc.h"
50 #include "catalog/pg_attrdef.h"
52 #include "catalog/pg_authid.h"
53 #include "catalog/pg_constraint.h"
54 #include "catalog/pg_database.h"
55 #include "catalog/pg_namespace.h"
56 #include "catalog/pg_opclass.h"
57 #include "catalog/pg_proc.h"
58 #include "catalog/pg_publication.h"
59 #include "catalog/pg_rewrite.h"
60 #include "catalog/pg_shseclabel.h"
63 #include "catalog/pg_tablespace.h"
64 #include "catalog/pg_trigger.h"
65 #include "catalog/pg_type.h"
66 #include "catalog/schemapg.h"
67 #include "catalog/storage.h"
68 #include "commands/policy.h"
69 #include "commands/trigger.h"
70 #include "miscadmin.h"
71 #include "nodes/makefuncs.h"
72 #include "nodes/nodeFuncs.h"
73 #include "optimizer/optimizer.h"
74 #include "rewrite/rewriteDefine.h"
75 #include "rewrite/rowsecurity.h"
76 #include "storage/lmgr.h"
77 #include "storage/smgr.h"
78 #include "utils/array.h"
79 #include "utils/builtins.h"
80 #include "utils/datum.h"
81 #include "utils/fmgroids.h"
82 #include "utils/inval.h"
83 #include "utils/lsyscache.h"
84 #include "utils/memutils.h"
85 #include "utils/relmapper.h"
86 #include "utils/resowner_private.h"
87 #include "utils/snapmgr.h"
88 #include "utils/syscache.h"
89 
90 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
91 
92 /*
93  * Default policy for whether to apply RECOVER_RELATION_BUILD_MEMORY:
94  * do so in clobber-cache builds but not otherwise. This choice can be
95  * overridden at compile time with -DRECOVER_RELATION_BUILD_MEMORY=1 or =0.
96  */
97 #ifndef RECOVER_RELATION_BUILD_MEMORY
98 #if defined(CLOBBER_CACHE_ALWAYS) || defined(CLOBBER_CACHE_RECURSIVELY)
99 #define RECOVER_RELATION_BUILD_MEMORY 1
100 #else
101 #define RECOVER_RELATION_BUILD_MEMORY 0
102 #endif
103 #endif
104 
105 /*
106  * hardcoded tuple descriptors, contents generated by genbki.pl
107  */
108 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
109 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
110 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
111 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
112 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
113 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
114 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
115 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
116 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
117 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
118 
119 /*
120  * Hash tables that index the relation cache
121  *
122  * We used to index the cache by both name and OID, but now there
123  * is only an index by OID.
124  */
125 typedef struct relidcacheent
126 {
129 } RelIdCacheEnt;
130 
132 
133 /*
134  * This flag is false until we have prepared the critical relcache entries
135  * that are needed to do indexscans on the tables read by relcache building.
136  */
138 
139 /*
140  * This flag is false until we have prepared the critical relcache entries
141  * for shared catalogs (which are the tables needed for login).
142  */
144 
145 /*
146  * This counter counts relcache inval events received since backend startup
147  * (but only for rels that are actually in cache). Presently, we use it only
148  * to detect whether data about to be written by write_relcache_init_file()
149  * might already be obsolete.
150  */
151 static long relcacheInvalsReceived = 0L;
152 
153 /*
154  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
155  * cleanup work. This list intentionally has limited size; if it overflows,
156  * we fall back to scanning the whole hashtable. There is no value in a very
157  * large list because (1) at some point, a hash_seq_search scan is faster than
158  * retail lookups, and (2) the value of this is to reduce EOXact work for
159  * short transactions, which can't have dirtied all that many tables anyway.
160  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
161  * cleanup processing must be idempotent.
162  */
163 #define MAX_EOXACT_LIST 32
165 static int eoxact_list_len = 0;
166 static bool eoxact_list_overflowed = false;
167 
168 #define EOXactListAdd(rel) \
169  do { \
170  if (eoxact_list_len < MAX_EOXACT_LIST) \
171  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
172  else \
173  eoxact_list_overflowed = true; \
174  } while (0)
175 
176 /*
177  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
178  * cleanup work. The array expands as needed; there is no hashtable because
179  * we don't need to access individual items except at EOXact.
180  */
182 static int NextEOXactTupleDescNum = 0;
183 static int EOXactTupleDescArrayLen = 0;
184 
185 /*
186  * macros to manipulate the lookup hashtable
187  */
188 #define RelationCacheInsert(RELATION, replace_allowed) \
189 do { \
190  RelIdCacheEnt *hentry; bool found; \
191  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
192  (void *) &((RELATION)->rd_id), \
193  HASH_ENTER, &found); \
194  if (found) \
195  { \
196  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
197  Relation _old_rel = hentry->reldesc; \
198  Assert(replace_allowed); \
199  hentry->reldesc = (RELATION); \
200  if (RelationHasReferenceCountZero(_old_rel)) \
201  RelationDestroyRelation(_old_rel, false); \
202  else if (!IsBootstrapProcessingMode()) \
203  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
204  RelationGetRelationName(_old_rel)); \
205  } \
206  else \
207  hentry->reldesc = (RELATION); \
208 } while(0)
209 
210 #define RelationIdCacheLookup(ID, RELATION) \
211 do { \
212  RelIdCacheEnt *hentry; \
213  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
214  (void *) &(ID), \
215  HASH_FIND, NULL); \
216  if (hentry) \
217  RELATION = hentry->reldesc; \
218  else \
219  RELATION = NULL; \
220 } while(0)
221 
222 #define RelationCacheDelete(RELATION) \
223 do { \
224  RelIdCacheEnt *hentry; \
225  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
226  (void *) &((RELATION)->rd_id), \
227  HASH_REMOVE, NULL); \
228  if (hentry == NULL) \
229  elog(WARNING, "failed to delete relcache entry for OID %u", \
230  (RELATION)->rd_id); \
231 } while(0)
232 
233 
234 /*
235  * Special cache for opclass-related information
236  *
237  * Note: only default support procs get cached, ie, those with
238  * lefttype = righttype = opcintype.
239  */
240 typedef struct opclasscacheent
241 {
242  Oid opclassoid; /* lookup key: OID of opclass */
243  bool valid; /* set true after successful fill-in */
244  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
245  Oid opcfamily; /* OID of opclass's family */
246  Oid opcintype; /* OID of opclass's declared input type */
247  RegProcedure *supportProcs; /* OIDs of support procedures */
249 
250 static HTAB *OpClassCache = NULL;
251 
252 
253 /* non-export function prototypes */
254 
255 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
256 static void RelationClearRelation(Relation relation, bool rebuild);
257 
258 static void RelationReloadIndexInfo(Relation relation);
259 static void RelationReloadNailed(Relation relation);
260 static void RelationFlushRelation(Relation relation);
262 #ifdef USE_ASSERT_CHECKING
263 static void AssertPendingSyncConsistency(Relation relation);
264 #endif
265 static void AtEOXact_cleanup(Relation relation, bool isCommit);
266 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
267  SubTransactionId mySubid, SubTransactionId parentSubid);
268 static bool load_relcache_init_file(bool shared);
269 static void write_relcache_init_file(bool shared);
270 static void write_item(const void *data, Size len, FILE *fp);
271 
272 static void formrdesc(const char *relationName, Oid relationReltype,
273  bool isshared, int natts, const FormData_pg_attribute *attrs);
274 
275 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
277 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
278 static void RelationBuildTupleDesc(Relation relation);
279 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
280 static void RelationInitPhysicalAddr(Relation relation);
281 static void load_critical_index(Oid indexoid, Oid heapoid);
282 static TupleDesc GetPgClassDescriptor(void);
283 static TupleDesc GetPgIndexDescriptor(void);
284 static void AttrDefaultFetch(Relation relation);
285 static void CheckConstraintFetch(Relation relation);
286 static int CheckConstraintCmp(const void *a, const void *b);
287 static void InitIndexAmRoutine(Relation relation);
288 static void IndexSupportInitialize(oidvector *indclass,
289  RegProcedure *indexSupport,
290  Oid *opFamily,
291  Oid *opcInType,
292  StrategyNumber maxSupportNumber,
293  AttrNumber maxAttributeNumber);
294 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
295  StrategyNumber numSupport);
296 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
297 static void unlink_initfile(const char *initfilename, int elevel);
298 
299 
300 /*
301  * ScanPgRelation
302  *
303  * This is used by RelationBuildDesc to find a pg_class
304  * tuple matching targetRelId. The caller must hold at least
305  * AccessShareLock on the target relid to prevent concurrent-update
306  * scenarios; it isn't guaranteed that all scans used to build the
307  * relcache entry will use the same snapshot. If, for example,
308  * an attribute were to be added after scanning pg_class and before
309  * scanning pg_attribute, relnatts wouldn't match.
310  *
311  * NB: the returned tuple has been copied into palloc'd storage
312  * and must eventually be freed with heap_freetuple.
313  */
314 static HeapTuple
315 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
316 {
317  HeapTuple pg_class_tuple;
318  Relation pg_class_desc;
319  SysScanDesc pg_class_scan;
320  ScanKeyData key[1];
321  Snapshot snapshot = NULL;
322 
323  /*
324  * If something goes wrong during backend startup, we might find ourselves
325  * trying to read pg_class before we've selected a database. That ain't
326  * gonna work, so bail out with a useful error message. If this happens,
327  * it probably means a relcache entry that needs to be nailed isn't.
328  */
329  if (!OidIsValid(MyDatabaseId))
330  elog(FATAL, "cannot read pg_class without having selected a database");
331 
332  /*
333  * form a scan key
334  */
335  ScanKeyInit(&key[0],
336  Anum_pg_class_oid,
337  BTEqualStrategyNumber, F_OIDEQ,
338  ObjectIdGetDatum(targetRelId));
339 
340  /*
341  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
342  * built the critical relcache entries (this includes initdb and startup
343  * without a pg_internal.init file). The caller can also force a heap
344  * scan by setting indexOK == false.
345  */
346  pg_class_desc = table_open(RelationRelationId, AccessShareLock);
347 
348  /*
349  * The caller might need a tuple that's newer than the one the historic
350  * snapshot; currently the only case requiring to do so is looking up the
351  * relfilenode of non mapped system relations during decoding. That
352  * snapshot can't change in the midst of a relcache build, so there's no
353  * need to register the snapshot.
354  */
355  if (force_non_historic)
356  snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
357 
358  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
359  indexOK && criticalRelcachesBuilt,
360  snapshot,
361  1, key);
362 
363  pg_class_tuple = systable_getnext(pg_class_scan);
364 
365  /*
366  * Must copy tuple before releasing buffer.
367  */
368  if (HeapTupleIsValid(pg_class_tuple))
369  pg_class_tuple = heap_copytuple(pg_class_tuple);
370 
371  /* all done */
372  systable_endscan(pg_class_scan);
373  table_close(pg_class_desc, AccessShareLock);
374 
375  return pg_class_tuple;
376 }
377 
378 /*
379  * AllocateRelationDesc
380  *
381  * This is used to allocate memory for a new relation descriptor
382  * and initialize the rd_rel field from the given pg_class tuple.
383  */
384 static Relation
386 {
387  Relation relation;
388  MemoryContext oldcxt;
389  Form_pg_class relationForm;
390 
391  /* Relcache entries must live in CacheMemoryContext */
393 
394  /*
395  * allocate and zero space for new relation descriptor
396  */
397  relation = (Relation) palloc0(sizeof(RelationData));
398 
399  /* make sure relation is marked as having no open file yet */
400  relation->rd_smgr = NULL;
401 
402  /*
403  * Copy the relation tuple form
404  *
405  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
406  * variable-length fields (relacl, reloptions) are NOT stored in the
407  * relcache --- there'd be little point in it, since we don't copy the
408  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
409  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
410  * it from the syscache if you need it. The same goes for the original
411  * form of reloptions (however, we do store the parsed form of reloptions
412  * in rd_options).
413  */
414  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
415 
416  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
417 
418  /* initialize relation tuple form */
419  relation->rd_rel = relationForm;
420 
421  /* and allocate attribute tuple form storage */
422  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
423  /* which we mark as a reference-counted tupdesc */
424  relation->rd_att->tdrefcount = 1;
425 
426  MemoryContextSwitchTo(oldcxt);
427 
428  return relation;
429 }
430 
431 /*
432  * RelationParseRelOptions
433  * Convert pg_class.reloptions into pre-parsed rd_options
434  *
435  * tuple is the real pg_class tuple (not rd_rel!) for relation
436  *
437  * Note: rd_rel and (if an index) rd_indam must be valid already
438  */
439 static void
441 {
442  bytea *options;
443  amoptions_function amoptsfn;
444 
445  relation->rd_options = NULL;
446 
447  /*
448  * Look up any AM-specific parse function; fall out if relkind should not
449  * have options.
450  */
451  switch (relation->rd_rel->relkind)
452  {
453  case RELKIND_RELATION:
454  case RELKIND_TOASTVALUE:
455  case RELKIND_VIEW:
456  case RELKIND_MATVIEW:
457  case RELKIND_PARTITIONED_TABLE:
458  amoptsfn = NULL;
459  break;
460  case RELKIND_INDEX:
461  case RELKIND_PARTITIONED_INDEX:
462  amoptsfn = relation->rd_indam->amoptions;
463  break;
464  default:
465  return;
466  }
467 
468  /*
469  * Fetch reloptions from tuple; have to use a hardwired descriptor because
470  * we might not have any other for pg_class yet (consider executing this
471  * code for pg_class itself)
472  */
473  options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
474 
475  /*
476  * Copy parsed data into CacheMemoryContext. To guard against the
477  * possibility of leaks in the reloptions code, we want to do the actual
478  * parsing in the caller's memory context and copy the results into
479  * CacheMemoryContext after the fact.
480  */
481  if (options)
482  {
484  VARSIZE(options));
485  memcpy(relation->rd_options, options, VARSIZE(options));
486  pfree(options);
487  }
488 }
489 
490 /*
491  * RelationBuildTupleDesc
492  *
493  * Form the relation's tuple descriptor from information in
494  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
495  */
496 static void
498 {
499  HeapTuple pg_attribute_tuple;
500  Relation pg_attribute_desc;
501  SysScanDesc pg_attribute_scan;
502  ScanKeyData skey[2];
503  int need;
504  TupleConstr *constr;
505  AttrDefault *attrdef = NULL;
506  AttrMissing *attrmiss = NULL;
507  int ndef = 0;
508 
509  /* fill rd_att's type ID fields (compare heap.c's AddNewRelationTuple) */
510  relation->rd_att->tdtypeid =
511  relation->rd_rel->reltype ? relation->rd_rel->reltype : RECORDOID;
512  relation->rd_att->tdtypmod = -1; /* just to be sure */
513 
515  sizeof(TupleConstr));
516  constr->has_not_null = false;
517  constr->has_generated_stored = false;
518 
519  /*
520  * Form a scan key that selects only user attributes (attnum > 0).
521  * (Eliminating system attribute rows at the index level is lots faster
522  * than fetching them.)
523  */
524  ScanKeyInit(&skey[0],
525  Anum_pg_attribute_attrelid,
526  BTEqualStrategyNumber, F_OIDEQ,
528  ScanKeyInit(&skey[1],
529  Anum_pg_attribute_attnum,
530  BTGreaterStrategyNumber, F_INT2GT,
531  Int16GetDatum(0));
532 
533  /*
534  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
535  * built the critical relcache entries (this includes initdb and startup
536  * without a pg_internal.init file).
537  */
538  pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock);
539  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
542  NULL,
543  2, skey);
544 
545  /*
546  * add attribute data to relation->rd_att
547  */
548  need = RelationGetNumberOfAttributes(relation);
549 
550  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
551  {
552  Form_pg_attribute attp;
553  int attnum;
554 
555  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
556 
557  attnum = attp->attnum;
558  if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation))
559  elog(ERROR, "invalid attribute number %d for %s",
560  attp->attnum, RelationGetRelationName(relation));
561 
562 
563  memcpy(TupleDescAttr(relation->rd_att, attnum - 1),
564  attp,
566 
567  /* Update constraint/default info */
568  if (attp->attnotnull)
569  constr->has_not_null = true;
570  if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED)
571  constr->has_generated_stored = true;
572 
573  /* If the column has a default, fill it into the attrdef array */
574  if (attp->atthasdef)
575  {
576  if (attrdef == NULL)
577  attrdef = (AttrDefault *)
580  sizeof(AttrDefault));
581  attrdef[ndef].adnum = attnum;
582  attrdef[ndef].adbin = NULL;
583 
584  ndef++;
585  }
586 
587  /* Likewise for a missing value */
588  if (attp->atthasmissing)
589  {
590  Datum missingval;
591  bool missingNull;
592 
593  /* Do we have a missing value? */
594  missingval = heap_getattr(pg_attribute_tuple,
595  Anum_pg_attribute_attmissingval,
596  pg_attribute_desc->rd_att,
597  &missingNull);
598  if (!missingNull)
599  {
600  /* Yes, fetch from the array */
601  MemoryContext oldcxt;
602  bool is_null;
603  int one = 1;
604  Datum missval;
605 
606  if (attrmiss == NULL)
607  attrmiss = (AttrMissing *)
609  relation->rd_rel->relnatts *
610  sizeof(AttrMissing));
611 
612  missval = array_get_element(missingval,
613  1,
614  &one,
615  -1,
616  attp->attlen,
617  attp->attbyval,
618  attp->attalign,
619  &is_null);
620  Assert(!is_null);
621  if (attp->attbyval)
622  {
623  /* for copy by val just copy the datum direct */
624  attrmiss[attnum - 1].am_value = missval;
625  }
626  else
627  {
628  /* otherwise copy in the correct context */
630  attrmiss[attnum - 1].am_value = datumCopy(missval,
631  attp->attbyval,
632  attp->attlen);
633  MemoryContextSwitchTo(oldcxt);
634  }
635  attrmiss[attnum - 1].am_present = true;
636  }
637  }
638  need--;
639  if (need == 0)
640  break;
641  }
642 
643  /*
644  * end the scan and close the attribute relation
645  */
646  systable_endscan(pg_attribute_scan);
647  table_close(pg_attribute_desc, AccessShareLock);
648 
649  if (need != 0)
650  elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
651  need, RelationGetRelid(relation));
652 
653  /*
654  * The attcacheoff values we read from pg_attribute should all be -1
655  * ("unknown"). Verify this if assert checking is on. They will be
656  * computed when and if needed during tuple access.
657  */
658 #ifdef USE_ASSERT_CHECKING
659  {
660  int i;
661 
662  for (i = 0; i < RelationGetNumberOfAttributes(relation); i++)
663  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
664  }
665 #endif
666 
667  /*
668  * However, we can easily set the attcacheoff value for the first
669  * attribute: it must be zero. This eliminates the need for special cases
670  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
671  */
672  if (RelationGetNumberOfAttributes(relation) > 0)
673  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
674 
675  /*
676  * Set up constraint/default info
677  */
678  if (constr->has_not_null ||
679  constr->has_generated_stored ||
680  ndef > 0 ||
681  attrmiss ||
682  relation->rd_rel->relchecks)
683  {
684  relation->rd_att->constr = constr;
685 
686  if (ndef > 0) /* DEFAULTs */
687  {
688  if (ndef < RelationGetNumberOfAttributes(relation))
689  constr->defval = (AttrDefault *)
690  repalloc(attrdef, ndef * sizeof(AttrDefault));
691  else
692  constr->defval = attrdef;
693  constr->num_defval = ndef;
694  AttrDefaultFetch(relation);
695  }
696  else
697  constr->num_defval = 0;
698 
699  constr->missing = attrmiss;
700 
701  if (relation->rd_rel->relchecks > 0) /* CHECKs */
702  {
703  constr->num_check = relation->rd_rel->relchecks;
704  constr->check = (ConstrCheck *)
706  constr->num_check * sizeof(ConstrCheck));
707  CheckConstraintFetch(relation);
708  }
709  else
710  constr->num_check = 0;
711  }
712  else
713  {
714  pfree(constr);
715  relation->rd_att->constr = NULL;
716  }
717 }
718 
719 /*
720  * RelationBuildRuleLock
721  *
722  * Form the relation's rewrite rules from information in
723  * the pg_rewrite system catalog.
724  *
725  * Note: The rule parsetrees are potentially very complex node structures.
726  * To allow these trees to be freed when the relcache entry is flushed,
727  * we make a private memory context to hold the RuleLock information for
728  * each relcache entry that has associated rules. The context is used
729  * just for rule info, not for any other subsidiary data of the relcache
730  * entry, because that keeps the update logic in RelationClearRelation()
731  * manageable. The other subsidiary data structures are simple enough
732  * to be easy to free explicitly, anyway.
733  */
734 static void
736 {
737  MemoryContext rulescxt;
738  MemoryContext oldcxt;
739  HeapTuple rewrite_tuple;
740  Relation rewrite_desc;
741  TupleDesc rewrite_tupdesc;
742  SysScanDesc rewrite_scan;
744  RuleLock *rulelock;
745  int numlocks;
746  RewriteRule **rules;
747  int maxlocks;
748 
749  /*
750  * Make the private context. Assume it'll not contain much data.
751  */
753  "relation rules",
755  relation->rd_rulescxt = rulescxt;
757  RelationGetRelationName(relation));
758 
759  /*
760  * allocate an array to hold the rewrite rules (the array is extended if
761  * necessary)
762  */
763  maxlocks = 4;
764  rules = (RewriteRule **)
765  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
766  numlocks = 0;
767 
768  /*
769  * form a scan key
770  */
771  ScanKeyInit(&key,
772  Anum_pg_rewrite_ev_class,
773  BTEqualStrategyNumber, F_OIDEQ,
775 
776  /*
777  * open pg_rewrite and begin a scan
778  *
779  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
780  * be reading the rules in name order, except possibly during
781  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
782  * ensures that rules will be fired in name order.
783  */
784  rewrite_desc = table_open(RewriteRelationId, AccessShareLock);
785  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
786  rewrite_scan = systable_beginscan(rewrite_desc,
788  true, NULL,
789  1, &key);
790 
791  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
792  {
793  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
794  bool isnull;
795  Datum rule_datum;
796  char *rule_str;
797  RewriteRule *rule;
798 
799  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
800  sizeof(RewriteRule));
801 
802  rule->ruleId = rewrite_form->oid;
803 
804  rule->event = rewrite_form->ev_type - '0';
805  rule->enabled = rewrite_form->ev_enabled;
806  rule->isInstead = rewrite_form->is_instead;
807 
808  /*
809  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
810  * rule strings are often large enough to be toasted. To avoid
811  * leaking memory in the caller's context, do the detoasting here so
812  * we can free the detoasted version.
813  */
814  rule_datum = heap_getattr(rewrite_tuple,
815  Anum_pg_rewrite_ev_action,
816  rewrite_tupdesc,
817  &isnull);
818  Assert(!isnull);
819  rule_str = TextDatumGetCString(rule_datum);
820  oldcxt = MemoryContextSwitchTo(rulescxt);
821  rule->actions = (List *) stringToNode(rule_str);
822  MemoryContextSwitchTo(oldcxt);
823  pfree(rule_str);
824 
825  rule_datum = heap_getattr(rewrite_tuple,
826  Anum_pg_rewrite_ev_qual,
827  rewrite_tupdesc,
828  &isnull);
829  Assert(!isnull);
830  rule_str = TextDatumGetCString(rule_datum);
831  oldcxt = MemoryContextSwitchTo(rulescxt);
832  rule->qual = (Node *) stringToNode(rule_str);
833  MemoryContextSwitchTo(oldcxt);
834  pfree(rule_str);
835 
836  /*
837  * We want the rule's table references to be checked as though by the
838  * table owner, not the user referencing the rule. Therefore, scan
839  * through the rule's actions and set the checkAsUser field on all
840  * rtable entries. We have to look at the qual as well, in case it
841  * contains sublinks.
842  *
843  * The reason for doing this when the rule is loaded, rather than when
844  * it is stored, is that otherwise ALTER TABLE OWNER would have to
845  * grovel through stored rules to update checkAsUser fields. Scanning
846  * the rule tree during load is relatively cheap (compared to
847  * constructing it in the first place), so we do it here.
848  */
849  setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
850  setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
851 
852  if (numlocks >= maxlocks)
853  {
854  maxlocks *= 2;
855  rules = (RewriteRule **)
856  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
857  }
858  rules[numlocks++] = rule;
859  }
860 
861  /*
862  * end the scan and close the attribute relation
863  */
864  systable_endscan(rewrite_scan);
865  table_close(rewrite_desc, AccessShareLock);
866 
867  /*
868  * there might not be any rules (if relhasrules is out-of-date)
869  */
870  if (numlocks == 0)
871  {
872  relation->rd_rules = NULL;
873  relation->rd_rulescxt = NULL;
874  MemoryContextDelete(rulescxt);
875  return;
876  }
877 
878  /*
879  * form a RuleLock and insert into relation
880  */
881  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
882  rulelock->numLocks = numlocks;
883  rulelock->rules = rules;
884 
885  relation->rd_rules = rulelock;
886 }
887 
888 /*
889  * equalRuleLocks
890  *
891  * Determine whether two RuleLocks are equivalent
892  *
893  * Probably this should be in the rules code someplace...
894  */
895 static bool
897 {
898  int i;
899 
900  /*
901  * As of 7.3 we assume the rule ordering is repeatable, because
902  * RelationBuildRuleLock should read 'em in a consistent order. So just
903  * compare corresponding slots.
904  */
905  if (rlock1 != NULL)
906  {
907  if (rlock2 == NULL)
908  return false;
909  if (rlock1->numLocks != rlock2->numLocks)
910  return false;
911  for (i = 0; i < rlock1->numLocks; i++)
912  {
913  RewriteRule *rule1 = rlock1->rules[i];
914  RewriteRule *rule2 = rlock2->rules[i];
915 
916  if (rule1->ruleId != rule2->ruleId)
917  return false;
918  if (rule1->event != rule2->event)
919  return false;
920  if (rule1->enabled != rule2->enabled)
921  return false;
922  if (rule1->isInstead != rule2->isInstead)
923  return false;
924  if (!equal(rule1->qual, rule2->qual))
925  return false;
926  if (!equal(rule1->actions, rule2->actions))
927  return false;
928  }
929  }
930  else if (rlock2 != NULL)
931  return false;
932  return true;
933 }
934 
935 /*
936  * equalPolicy
937  *
938  * Determine whether two policies are equivalent
939  */
940 static bool
942 {
943  int i;
944  Oid *r1,
945  *r2;
946 
947  if (policy1 != NULL)
948  {
949  if (policy2 == NULL)
950  return false;
951 
952  if (policy1->polcmd != policy2->polcmd)
953  return false;
954  if (policy1->hassublinks != policy2->hassublinks)
955  return false;
956  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
957  return false;
958  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
959  return false;
960 
961  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
962  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
963 
964  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
965  {
966  if (r1[i] != r2[i])
967  return false;
968  }
969 
970  if (!equal(policy1->qual, policy2->qual))
971  return false;
972  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
973  return false;
974  }
975  else if (policy2 != NULL)
976  return false;
977 
978  return true;
979 }
980 
981 /*
982  * equalRSDesc
983  *
984  * Determine whether two RowSecurityDesc's are equivalent
985  */
986 static bool
988 {
989  ListCell *lc,
990  *rc;
991 
992  if (rsdesc1 == NULL && rsdesc2 == NULL)
993  return true;
994 
995  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
996  (rsdesc1 == NULL && rsdesc2 != NULL))
997  return false;
998 
999  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
1000  return false;
1001 
1002  /* RelationBuildRowSecurity should build policies in order */
1003  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1004  {
1007 
1008  if (!equalPolicy(l, r))
1009  return false;
1010  }
1011 
1012  return true;
1013 }
1014 
1015 /*
1016  * RelationBuildDesc
1017  *
1018  * Build a relation descriptor. The caller must hold at least
1019  * AccessShareLock on the target relid.
1020  *
1021  * The new descriptor is inserted into the hash table if insertIt is true.
1022  *
1023  * Returns NULL if no pg_class row could be found for the given relid
1024  * (suggesting we are trying to access a just-deleted relation).
1025  * Any other error is reported via elog.
1026  */
1027 static Relation
1028 RelationBuildDesc(Oid targetRelId, bool insertIt)
1029 {
1030  Relation relation;
1031  Oid relid;
1032  HeapTuple pg_class_tuple;
1033  Form_pg_class relp;
1034 
1035  /*
1036  * This function and its subroutines can allocate a good deal of transient
1037  * data in CurrentMemoryContext. Traditionally we've just leaked that
1038  * data, reasoning that the caller's context is at worst of transaction
1039  * scope, and relcache loads shouldn't happen so often that it's essential
1040  * to recover transient data before end of statement/transaction. However
1041  * that's definitely not true in clobber-cache test builds, and perhaps
1042  * it's not true in other cases. If RECOVER_RELATION_BUILD_MEMORY is not
1043  * zero, arrange to allocate the junk in a temporary context that we'll
1044  * free before returning. Make it a child of caller's context so that it
1045  * will get cleaned up appropriately if we error out partway through.
1046  */
1047 #if RECOVER_RELATION_BUILD_MEMORY
1048  MemoryContext tmpcxt;
1049  MemoryContext oldcxt;
1050 
1052  "RelationBuildDesc workspace",
1054  oldcxt = MemoryContextSwitchTo(tmpcxt);
1055 #endif
1056 
1057  /*
1058  * find the tuple in pg_class corresponding to the given relation id
1059  */
1060  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1061 
1062  /*
1063  * if no such tuple exists, return NULL
1064  */
1065  if (!HeapTupleIsValid(pg_class_tuple))
1066  {
1067 #if RECOVER_RELATION_BUILD_MEMORY
1068  /* Return to caller's context, and blow away the temporary context */
1069  MemoryContextSwitchTo(oldcxt);
1070  MemoryContextDelete(tmpcxt);
1071 #endif
1072  return NULL;
1073  }
1074 
1075  /*
1076  * get information from the pg_class_tuple
1077  */
1078  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1079  relid = relp->oid;
1080  Assert(relid == targetRelId);
1081 
1082  /*
1083  * allocate storage for the relation descriptor, and copy pg_class_tuple
1084  * to relation->rd_rel.
1085  */
1086  relation = AllocateRelationDesc(relp);
1087 
1088  /*
1089  * initialize the relation's relation id (relation->rd_id)
1090  */
1091  RelationGetRelid(relation) = relid;
1092 
1093  /*
1094  * Normal relations are not nailed into the cache. Since we don't flush
1095  * new relations, it won't be new. It could be temp though.
1096  */
1097  relation->rd_refcnt = 0;
1098  relation->rd_isnailed = false;
1103  switch (relation->rd_rel->relpersistence)
1104  {
1105  case RELPERSISTENCE_UNLOGGED:
1106  case RELPERSISTENCE_PERMANENT:
1107  relation->rd_backend = InvalidBackendId;
1108  relation->rd_islocaltemp = false;
1109  break;
1110  case RELPERSISTENCE_TEMP:
1111  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1112  {
1113  relation->rd_backend = BackendIdForTempRelations();
1114  relation->rd_islocaltemp = true;
1115  }
1116  else
1117  {
1118  /*
1119  * If it's a temp table, but not one of ours, we have to use
1120  * the slow, grotty method to figure out the owning backend.
1121  *
1122  * Note: it's possible that rd_backend gets set to MyBackendId
1123  * here, in case we are looking at a pg_class entry left over
1124  * from a crashed backend that coincidentally had the same
1125  * BackendId we're using. We should *not* consider such a
1126  * table to be "ours"; this is why we need the separate
1127  * rd_islocaltemp flag. The pg_class entry will get flushed
1128  * if/when we clean out the corresponding temp table namespace
1129  * in preparation for using it.
1130  */
1131  relation->rd_backend =
1132  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1133  Assert(relation->rd_backend != InvalidBackendId);
1134  relation->rd_islocaltemp = false;
1135  }
1136  break;
1137  default:
1138  elog(ERROR, "invalid relpersistence: %c",
1139  relation->rd_rel->relpersistence);
1140  break;
1141  }
1142 
1143  /*
1144  * initialize the tuple descriptor (relation->rd_att).
1145  */
1146  RelationBuildTupleDesc(relation);
1147 
1148  /*
1149  * Fetch rules and triggers that affect this relation
1150  */
1151  if (relation->rd_rel->relhasrules)
1152  RelationBuildRuleLock(relation);
1153  else
1154  {
1155  relation->rd_rules = NULL;
1156  relation->rd_rulescxt = NULL;
1157  }
1158 
1159  if (relation->rd_rel->relhastriggers)
1160  RelationBuildTriggers(relation);
1161  else
1162  relation->trigdesc = NULL;
1163 
1164  if (relation->rd_rel->relrowsecurity)
1165  RelationBuildRowSecurity(relation);
1166  else
1167  relation->rd_rsdesc = NULL;
1168 
1169  /* foreign key data is not loaded till asked for */
1170  relation->rd_fkeylist = NIL;
1171  relation->rd_fkeyvalid = false;
1172 
1173  /* partitioning data is not loaded till asked for */
1174  relation->rd_partkey = NULL;
1175  relation->rd_partkeycxt = NULL;
1176  relation->rd_partdesc = NULL;
1177  relation->rd_pdcxt = NULL;
1178  relation->rd_partcheck = NIL;
1179  relation->rd_partcheckvalid = false;
1180  relation->rd_partcheckcxt = NULL;
1181 
1182  /*
1183  * initialize access method information
1184  */
1185  switch (relation->rd_rel->relkind)
1186  {
1187  case RELKIND_INDEX:
1188  case RELKIND_PARTITIONED_INDEX:
1189  Assert(relation->rd_rel->relam != InvalidOid);
1190  RelationInitIndexAccessInfo(relation);
1191  break;
1192  case RELKIND_RELATION:
1193  case RELKIND_TOASTVALUE:
1194  case RELKIND_MATVIEW:
1195  Assert(relation->rd_rel->relam != InvalidOid);
1197  break;
1198  case RELKIND_SEQUENCE:
1199  Assert(relation->rd_rel->relam == InvalidOid);
1201  break;
1202  case RELKIND_VIEW:
1203  case RELKIND_COMPOSITE_TYPE:
1204  case RELKIND_FOREIGN_TABLE:
1205  case RELKIND_PARTITIONED_TABLE:
1206  Assert(relation->rd_rel->relam == InvalidOid);
1207  break;
1208  }
1209 
1210  /* extract reloptions if any */
1211  RelationParseRelOptions(relation, pg_class_tuple);
1212 
1213  /*
1214  * initialize the relation lock manager information
1215  */
1216  RelationInitLockInfo(relation); /* see lmgr.c */
1217 
1218  /*
1219  * initialize physical addressing information for the relation
1220  */
1221  RelationInitPhysicalAddr(relation);
1222 
1223  /* make sure relation is marked as having no open file yet */
1224  relation->rd_smgr = NULL;
1225 
1226  /*
1227  * now we can free the memory allocated for pg_class_tuple
1228  */
1229  heap_freetuple(pg_class_tuple);
1230 
1231  /*
1232  * Insert newly created relation into relcache hash table, if requested.
1233  *
1234  * There is one scenario in which we might find a hashtable entry already
1235  * present, even though our caller failed to find it: if the relation is a
1236  * system catalog or index that's used during relcache load, we might have
1237  * recursively created the same relcache entry during the preceding steps.
1238  * So allow RelationCacheInsert to delete any already-present relcache
1239  * entry for the same OID. The already-present entry should have refcount
1240  * zero (else somebody forgot to close it); in the event that it doesn't,
1241  * we'll elog a WARNING and leak the already-present entry.
1242  */
1243  if (insertIt)
1244  RelationCacheInsert(relation, true);
1245 
1246  /* It's fully valid */
1247  relation->rd_isvalid = true;
1248 
1249 #if RECOVER_RELATION_BUILD_MEMORY
1250  /* Return to caller's context, and blow away the temporary context */
1251  MemoryContextSwitchTo(oldcxt);
1252  MemoryContextDelete(tmpcxt);
1253 #endif
1254 
1255  return relation;
1256 }
1257 
1258 /*
1259  * Initialize the physical addressing info (RelFileNode) for a relcache entry
1260  *
1261  * Note: at the physical level, relations in the pg_global tablespace must
1262  * be treated as shared, even if relisshared isn't set. Hence we do not
1263  * look at relisshared here.
1264  */
1265 static void
1267 {
1268  Oid oldnode = relation->rd_node.relNode;
1269 
1270  /* these relations kinds never have storage */
1271  if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
1272  return;
1273 
1274  if (relation->rd_rel->reltablespace)
1275  relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1276  else
1277  relation->rd_node.spcNode = MyDatabaseTableSpace;
1278  if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1279  relation->rd_node.dbNode = InvalidOid;
1280  else
1281  relation->rd_node.dbNode = MyDatabaseId;
1282 
1283  if (relation->rd_rel->relfilenode)
1284  {
1285  /*
1286  * Even if we are using a decoding snapshot that doesn't represent the
1287  * current state of the catalog we need to make sure the filenode
1288  * points to the current file since the older file will be gone (or
1289  * truncated). The new file will still contain older rows so lookups
1290  * in them will work correctly. This wouldn't work correctly if
1291  * rewrites were allowed to change the schema in an incompatible way,
1292  * but those are prevented both on catalog tables and on user tables
1293  * declared as additional catalog tables.
1294  */
1297  && IsTransactionState())
1298  {
1299  HeapTuple phys_tuple;
1300  Form_pg_class physrel;
1301 
1302  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1303  RelationGetRelid(relation) != ClassOidIndexId,
1304  true);
1305  if (!HeapTupleIsValid(phys_tuple))
1306  elog(ERROR, "could not find pg_class entry for %u",
1307  RelationGetRelid(relation));
1308  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1309 
1310  relation->rd_rel->reltablespace = physrel->reltablespace;
1311  relation->rd_rel->relfilenode = physrel->relfilenode;
1312  heap_freetuple(phys_tuple);
1313  }
1314 
1315  relation->rd_node.relNode = relation->rd_rel->relfilenode;
1316  }
1317  else
1318  {
1319  /* Consult the relation mapper */
1320  relation->rd_node.relNode =
1321  RelationMapOidToFilenode(relation->rd_id,
1322  relation->rd_rel->relisshared);
1323  if (!OidIsValid(relation->rd_node.relNode))
1324  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1325  RelationGetRelationName(relation), relation->rd_id);
1326  }
1327 
1328  /*
1329  * For RelationNeedsWAL() to answer correctly on parallel workers, restore
1330  * rd_firstRelfilenodeSubid. No subtransactions start or end while in
1331  * parallel mode, so the specific SubTransactionId does not matter.
1332  */
1333  if (IsParallelWorker() && oldnode != relation->rd_node.relNode)
1334  {
1335  if (RelFileNodeSkippingWAL(relation->rd_node))
1337  else
1339  }
1340 }
1341 
1342 /*
1343  * Fill in the IndexAmRoutine for an index relation.
1344  *
1345  * relation's rd_amhandler and rd_indexcxt must be valid already.
1346  */
1347 static void
1349 {
1350  IndexAmRoutine *cached,
1351  *tmp;
1352 
1353  /*
1354  * Call the amhandler in current, short-lived memory context, just in case
1355  * it leaks anything (it probably won't, but let's be paranoid).
1356  */
1357  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1358 
1359  /* OK, now transfer the data into relation's rd_indexcxt. */
1360  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1361  sizeof(IndexAmRoutine));
1362  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1363  relation->rd_indam = cached;
1364 
1365  pfree(tmp);
1366 }
1367 
1368 /*
1369  * Initialize index-access-method support data for an index relation
1370  */
1371 void
1373 {
1374  HeapTuple tuple;
1375  Form_pg_am aform;
1376  Datum indcollDatum;
1377  Datum indclassDatum;
1378  Datum indoptionDatum;
1379  bool isnull;
1380  oidvector *indcoll;
1381  oidvector *indclass;
1382  int2vector *indoption;
1383  MemoryContext indexcxt;
1384  MemoryContext oldcontext;
1385  int indnatts;
1386  int indnkeyatts;
1387  uint16 amsupport;
1388 
1389  /*
1390  * Make a copy of the pg_index entry for the index. Since pg_index
1391  * contains variable-length and possibly-null fields, we have to do this
1392  * honestly rather than just treating it as a Form_pg_index struct.
1393  */
1394  tuple = SearchSysCache1(INDEXRELID,
1395  ObjectIdGetDatum(RelationGetRelid(relation)));
1396  if (!HeapTupleIsValid(tuple))
1397  elog(ERROR, "cache lookup failed for index %u",
1398  RelationGetRelid(relation));
1400  relation->rd_indextuple = heap_copytuple(tuple);
1401  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1402  MemoryContextSwitchTo(oldcontext);
1403  ReleaseSysCache(tuple);
1404 
1405  /*
1406  * Look up the index's access method, save the OID of its handler function
1407  */
1408  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1409  if (!HeapTupleIsValid(tuple))
1410  elog(ERROR, "cache lookup failed for access method %u",
1411  relation->rd_rel->relam);
1412  aform = (Form_pg_am) GETSTRUCT(tuple);
1413  relation->rd_amhandler = aform->amhandler;
1414  ReleaseSysCache(tuple);
1415 
1416  indnatts = RelationGetNumberOfAttributes(relation);
1417  if (indnatts != IndexRelationGetNumberOfAttributes(relation))
1418  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1419  RelationGetRelid(relation));
1420  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation);
1421 
1422  /*
1423  * Make the private context to hold index access info. The reason we need
1424  * a context, and not just a couple of pallocs, is so that we won't leak
1425  * any subsidiary info attached to fmgr lookup records.
1426  */
1428  "index info",
1430  relation->rd_indexcxt = indexcxt;
1432  RelationGetRelationName(relation));
1433 
1434  /*
1435  * Now we can fetch the index AM's API struct
1436  */
1437  InitIndexAmRoutine(relation);
1438 
1439  /*
1440  * Allocate arrays to hold data. Opclasses are not used for included
1441  * columns, so allocate them for indnkeyatts only.
1442  */
1443  relation->rd_opfamily = (Oid *)
1444  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1445  relation->rd_opcintype = (Oid *)
1446  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1447 
1448  amsupport = relation->rd_indam->amsupport;
1449  if (amsupport > 0)
1450  {
1451  int nsupport = indnatts * amsupport;
1452 
1453  relation->rd_support = (RegProcedure *)
1454  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1455  relation->rd_supportinfo = (FmgrInfo *)
1456  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1457  }
1458  else
1459  {
1460  relation->rd_support = NULL;
1461  relation->rd_supportinfo = NULL;
1462  }
1463 
1464  relation->rd_indcollation = (Oid *)
1465  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1466 
1467  relation->rd_indoption = (int16 *)
1468  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16));
1469 
1470  /*
1471  * indcollation cannot be referenced directly through the C struct,
1472  * because it comes after the variable-width indkey field. Must extract
1473  * the datum the hard way...
1474  */
1475  indcollDatum = fastgetattr(relation->rd_indextuple,
1476  Anum_pg_index_indcollation,
1478  &isnull);
1479  Assert(!isnull);
1480  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1481  memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid));
1482 
1483  /*
1484  * indclass cannot be referenced directly through the C struct, because it
1485  * comes after the variable-width indkey field. Must extract the datum
1486  * the hard way...
1487  */
1488  indclassDatum = fastgetattr(relation->rd_indextuple,
1489  Anum_pg_index_indclass,
1491  &isnull);
1492  Assert(!isnull);
1493  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1494 
1495  /*
1496  * Fill the support procedure OID array, as well as the info about
1497  * opfamilies and opclass input types. (aminfo and supportinfo are left
1498  * as zeroes, and are filled on-the-fly when used)
1499  */
1500  IndexSupportInitialize(indclass, relation->rd_support,
1501  relation->rd_opfamily, relation->rd_opcintype,
1502  amsupport, indnkeyatts);
1503 
1504  /*
1505  * Similarly extract indoption and copy it to the cache entry
1506  */
1507  indoptionDatum = fastgetattr(relation->rd_indextuple,
1508  Anum_pg_index_indoption,
1510  &isnull);
1511  Assert(!isnull);
1512  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1513  memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16));
1514 
1515  (void) RelationGetIndexAttOptions(relation, false);
1516 
1517  /*
1518  * expressions, predicate, exclusion caches will be filled later
1519  */
1520  relation->rd_indexprs = NIL;
1521  relation->rd_indpred = NIL;
1522  relation->rd_exclops = NULL;
1523  relation->rd_exclprocs = NULL;
1524  relation->rd_exclstrats = NULL;
1525  relation->rd_amcache = NULL;
1526 }
1527 
1528 /*
1529  * IndexSupportInitialize
1530  * Initializes an index's cached opclass information,
1531  * given the index's pg_index.indclass entry.
1532  *
1533  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1534  * which are arrays allocated by the caller.
1535  *
1536  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1537  * indicate the size of the arrays it has allocated --- but in practice these
1538  * numbers must always match those obtainable from the system catalog entries
1539  * for the index and access method.
1540  */
1541 static void
1543  RegProcedure *indexSupport,
1544  Oid *opFamily,
1545  Oid *opcInType,
1546  StrategyNumber maxSupportNumber,
1547  AttrNumber maxAttributeNumber)
1548 {
1549  int attIndex;
1550 
1551  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1552  {
1553  OpClassCacheEnt *opcentry;
1554 
1555  if (!OidIsValid(indclass->values[attIndex]))
1556  elog(ERROR, "bogus pg_index tuple");
1557 
1558  /* look up the info for this opclass, using a cache */
1559  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1560  maxSupportNumber);
1561 
1562  /* copy cached data into relcache entry */
1563  opFamily[attIndex] = opcentry->opcfamily;
1564  opcInType[attIndex] = opcentry->opcintype;
1565  if (maxSupportNumber > 0)
1566  memcpy(&indexSupport[attIndex * maxSupportNumber],
1567  opcentry->supportProcs,
1568  maxSupportNumber * sizeof(RegProcedure));
1569  }
1570 }
1571 
1572 /*
1573  * LookupOpclassInfo
1574  *
1575  * This routine maintains a per-opclass cache of the information needed
1576  * by IndexSupportInitialize(). This is more efficient than relying on
1577  * the catalog cache, because we can load all the info about a particular
1578  * opclass in a single indexscan of pg_amproc.
1579  *
1580  * The information from pg_am about expected range of support function
1581  * numbers is passed in, rather than being looked up, mainly because the
1582  * caller will have it already.
1583  *
1584  * Note there is no provision for flushing the cache. This is OK at the
1585  * moment because there is no way to ALTER any interesting properties of an
1586  * existing opclass --- all you can do is drop it, which will result in
1587  * a useless but harmless dead entry in the cache. To support altering
1588  * opclass membership (not the same as opfamily membership!), we'd need to
1589  * be able to flush this cache as well as the contents of relcache entries
1590  * for indexes.
1591  */
1592 static OpClassCacheEnt *
1593 LookupOpclassInfo(Oid operatorClassOid,
1594  StrategyNumber numSupport)
1595 {
1596  OpClassCacheEnt *opcentry;
1597  bool found;
1598  Relation rel;
1599  SysScanDesc scan;
1600  ScanKeyData skey[3];
1601  HeapTuple htup;
1602  bool indexOK;
1603 
1604  if (OpClassCache == NULL)
1605  {
1606  /* First time through: initialize the opclass cache */
1607  HASHCTL ctl;
1608 
1609  MemSet(&ctl, 0, sizeof(ctl));
1610  ctl.keysize = sizeof(Oid);
1611  ctl.entrysize = sizeof(OpClassCacheEnt);
1612  OpClassCache = hash_create("Operator class cache", 64,
1613  &ctl, HASH_ELEM | HASH_BLOBS);
1614 
1615  /* Also make sure CacheMemoryContext exists */
1616  if (!CacheMemoryContext)
1618  }
1619 
1620  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1621  (void *) &operatorClassOid,
1622  HASH_ENTER, &found);
1623 
1624  if (!found)
1625  {
1626  /* Need to allocate memory for new entry */
1627  opcentry->valid = false; /* until known OK */
1628  opcentry->numSupport = numSupport;
1629 
1630  if (numSupport > 0)
1631  opcentry->supportProcs = (RegProcedure *)
1633  (numSupport + 1) * sizeof(RegProcedure));
1634  else
1635  opcentry->supportProcs = NULL;
1636  }
1637  else
1638  {
1639  Assert(numSupport == opcentry->numSupport);
1640  }
1641 
1642  /*
1643  * When testing for cache-flush hazards, we intentionally disable the
1644  * operator class cache and force reloading of the info on each call. This
1645  * is helpful because we want to test the case where a cache flush occurs
1646  * while we are loading the info, and it's very hard to provoke that if
1647  * this happens only once per opclass per backend.
1648  */
1649 #if defined(CLOBBER_CACHE_ALWAYS)
1650  opcentry->valid = false;
1651 #endif
1652 
1653  if (opcentry->valid)
1654  return opcentry;
1655 
1656  /*
1657  * Need to fill in new entry.
1658  *
1659  * To avoid infinite recursion during startup, force heap scans if we're
1660  * looking up info for the opclasses used by the indexes we would like to
1661  * reference here.
1662  */
1663  indexOK = criticalRelcachesBuilt ||
1664  (operatorClassOid != OID_BTREE_OPS_OID &&
1665  operatorClassOid != INT2_BTREE_OPS_OID);
1666 
1667  /*
1668  * We have to fetch the pg_opclass row to determine its opfamily and
1669  * opcintype, which are needed to look up related operators and functions.
1670  * It'd be convenient to use the syscache here, but that probably doesn't
1671  * work while bootstrapping.
1672  */
1673  ScanKeyInit(&skey[0],
1674  Anum_pg_opclass_oid,
1675  BTEqualStrategyNumber, F_OIDEQ,
1676  ObjectIdGetDatum(operatorClassOid));
1677  rel = table_open(OperatorClassRelationId, AccessShareLock);
1678  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1679  NULL, 1, skey);
1680 
1681  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1682  {
1683  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1684 
1685  opcentry->opcfamily = opclassform->opcfamily;
1686  opcentry->opcintype = opclassform->opcintype;
1687  }
1688  else
1689  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1690 
1691  systable_endscan(scan);
1693 
1694  /*
1695  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1696  * the default ones (those with lefttype = righttype = opcintype).
1697  */
1698  if (numSupport > 0)
1699  {
1700  ScanKeyInit(&skey[0],
1701  Anum_pg_amproc_amprocfamily,
1702  BTEqualStrategyNumber, F_OIDEQ,
1703  ObjectIdGetDatum(opcentry->opcfamily));
1704  ScanKeyInit(&skey[1],
1705  Anum_pg_amproc_amproclefttype,
1706  BTEqualStrategyNumber, F_OIDEQ,
1707  ObjectIdGetDatum(opcentry->opcintype));
1708  ScanKeyInit(&skey[2],
1709  Anum_pg_amproc_amprocrighttype,
1710  BTEqualStrategyNumber, F_OIDEQ,
1711  ObjectIdGetDatum(opcentry->opcintype));
1712  rel = table_open(AccessMethodProcedureRelationId, AccessShareLock);
1713  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1714  NULL, 3, skey);
1715 
1716  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1717  {
1718  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1719 
1720  if (amprocform->amprocnum <= 0 ||
1721  (StrategyNumber) amprocform->amprocnum > numSupport)
1722  elog(ERROR, "invalid amproc number %d for opclass %u",
1723  amprocform->amprocnum, operatorClassOid);
1724 
1725  opcentry->supportProcs[amprocform->amprocnum - 1] =
1726  amprocform->amproc;
1727  }
1728 
1729  systable_endscan(scan);
1731  }
1732 
1733  opcentry->valid = true;
1734  return opcentry;
1735 }
1736 
1737 /*
1738  * Fill in the TableAmRoutine for a relation
1739  *
1740  * relation's rd_amhandler must be valid already.
1741  */
1742 static void
1744 {
1745  relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
1746 }
1747 
1748 /*
1749  * Initialize table access method support for a table like relation
1750  */
1751 void
1753 {
1754  HeapTuple tuple;
1755  Form_pg_am aform;
1756 
1757  if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
1758  {
1759  /*
1760  * Sequences are currently accessed like heap tables, but it doesn't
1761  * seem prudent to show that in the catalog. So just overwrite it
1762  * here.
1763  */
1764  relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1765  }
1766  else if (IsCatalogRelation(relation))
1767  {
1768  /*
1769  * Avoid doing a syscache lookup for catalog tables.
1770  */
1771  Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
1772  relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1773  }
1774  else
1775  {
1776  /*
1777  * Look up the table access method, save the OID of its handler
1778  * function.
1779  */
1780  Assert(relation->rd_rel->relam != InvalidOid);
1781  tuple = SearchSysCache1(AMOID,
1782  ObjectIdGetDatum(relation->rd_rel->relam));
1783  if (!HeapTupleIsValid(tuple))
1784  elog(ERROR, "cache lookup failed for access method %u",
1785  relation->rd_rel->relam);
1786  aform = (Form_pg_am) GETSTRUCT(tuple);
1787  relation->rd_amhandler = aform->amhandler;
1788  ReleaseSysCache(tuple);
1789  }
1790 
1791  /*
1792  * Now we can fetch the table AM's API struct
1793  */
1794  InitTableAmRoutine(relation);
1795 }
1796 
1797 /*
1798  * formrdesc
1799  *
1800  * This is a special cut-down version of RelationBuildDesc(),
1801  * used while initializing the relcache.
1802  * The relation descriptor is built just from the supplied parameters,
1803  * without actually looking at any system table entries. We cheat
1804  * quite a lot since we only need to work for a few basic system
1805  * catalogs.
1806  *
1807  * The catalogs this is used for can't have constraints (except attnotnull),
1808  * default values, rules, or triggers, since we don't cope with any of that.
1809  * (Well, actually, this only matters for properties that need to be valid
1810  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1811  * these properties matter then...)
1812  *
1813  * NOTE: we assume we are already switched into CacheMemoryContext.
1814  */
1815 static void
1816 formrdesc(const char *relationName, Oid relationReltype,
1817  bool isshared,
1818  int natts, const FormData_pg_attribute *attrs)
1819 {
1820  Relation relation;
1821  int i;
1822  bool has_not_null;
1823 
1824  /*
1825  * allocate new relation desc, clear all fields of reldesc
1826  */
1827  relation = (Relation) palloc0(sizeof(RelationData));
1828 
1829  /* make sure relation is marked as having no open file yet */
1830  relation->rd_smgr = NULL;
1831 
1832  /*
1833  * initialize reference count: 1 because it is nailed in cache
1834  */
1835  relation->rd_refcnt = 1;
1836 
1837  /*
1838  * all entries built with this routine are nailed-in-cache; none are for
1839  * new or temp relations.
1840  */
1841  relation->rd_isnailed = true;
1846  relation->rd_backend = InvalidBackendId;
1847  relation->rd_islocaltemp = false;
1848 
1849  /*
1850  * initialize relation tuple form
1851  *
1852  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1853  * get us launched. RelationCacheInitializePhase3() will read the real
1854  * data from pg_class and replace what we've done here. Note in
1855  * particular that relowner is left as zero; this cues
1856  * RelationCacheInitializePhase3 that the real data isn't there yet.
1857  */
1859 
1860  namestrcpy(&relation->rd_rel->relname, relationName);
1861  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1862  relation->rd_rel->reltype = relationReltype;
1863 
1864  /*
1865  * It's important to distinguish between shared and non-shared relations,
1866  * even at bootstrap time, to make sure we know where they are stored.
1867  */
1868  relation->rd_rel->relisshared = isshared;
1869  if (isshared)
1870  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1871 
1872  /* formrdesc is used only for permanent relations */
1873  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1874 
1875  /* ... and they're always populated, too */
1876  relation->rd_rel->relispopulated = true;
1877 
1878  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1879  relation->rd_rel->relpages = 0;
1880  relation->rd_rel->reltuples = -1;
1881  relation->rd_rel->relallvisible = 0;
1882  relation->rd_rel->relkind = RELKIND_RELATION;
1883  relation->rd_rel->relnatts = (int16) natts;
1884  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1885 
1886  /*
1887  * initialize attribute tuple form
1888  *
1889  * Unlike the case with the relation tuple, this data had better be right
1890  * because it will never be replaced. The data comes from
1891  * src/include/catalog/ headers via genbki.pl.
1892  */
1893  relation->rd_att = CreateTemplateTupleDesc(natts);
1894  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1895 
1896  relation->rd_att->tdtypeid = relationReltype;
1897  relation->rd_att->tdtypmod = -1; /* just to be sure */
1898 
1899  /*
1900  * initialize tuple desc info
1901  */
1902  has_not_null = false;
1903  for (i = 0; i < natts; i++)
1904  {
1905  memcpy(TupleDescAttr(relation->rd_att, i),
1906  &attrs[i],
1908  has_not_null |= attrs[i].attnotnull;
1909  /* make sure attcacheoff is valid */
1910  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1911  }
1912 
1913  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1914  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1915 
1916  /* mark not-null status */
1917  if (has_not_null)
1918  {
1919  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1920 
1921  constr->has_not_null = true;
1922  relation->rd_att->constr = constr;
1923  }
1924 
1925  /*
1926  * initialize relation id from info in att array (my, this is ugly)
1927  */
1928  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
1929 
1930  /*
1931  * All relations made with formrdesc are mapped. This is necessarily so
1932  * because there is no other way to know what filenode they currently
1933  * have. In bootstrap mode, add them to the initial relation mapper data,
1934  * specifying that the initial filenode is the same as the OID.
1935  */
1936  relation->rd_rel->relfilenode = InvalidOid;
1939  RelationGetRelid(relation),
1940  isshared, true);
1941 
1942  /*
1943  * initialize the relation lock manager information
1944  */
1945  RelationInitLockInfo(relation); /* see lmgr.c */
1946 
1947  /*
1948  * initialize physical addressing information for the relation
1949  */
1950  RelationInitPhysicalAddr(relation);
1951 
1952  /*
1953  * initialize the table am handler
1954  */
1955  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1956  relation->rd_tableam = GetHeapamTableAmRoutine();
1957 
1958  /*
1959  * initialize the rel-has-index flag, using hardwired knowledge
1960  */
1962  {
1963  /* In bootstrap mode, we have no indexes */
1964  relation->rd_rel->relhasindex = false;
1965  }
1966  else
1967  {
1968  /* Otherwise, all the rels formrdesc is used for have indexes */
1969  relation->rd_rel->relhasindex = true;
1970  }
1971 
1972  /*
1973  * add new reldesc to relcache
1974  */
1975  RelationCacheInsert(relation, false);
1976 
1977  /* It's fully valid */
1978  relation->rd_isvalid = true;
1979 }
1980 
1981 
1982 /* ----------------------------------------------------------------
1983  * Relation Descriptor Lookup Interface
1984  * ----------------------------------------------------------------
1985  */
1986 
1987 /*
1988  * RelationIdGetRelation
1989  *
1990  * Lookup a reldesc by OID; make one if not already in cache.
1991  *
1992  * Returns NULL if no pg_class row could be found for the given relid
1993  * (suggesting we are trying to access a just-deleted relation).
1994  * Any other error is reported via elog.
1995  *
1996  * NB: caller should already have at least AccessShareLock on the
1997  * relation ID, else there are nasty race conditions.
1998  *
1999  * NB: relation ref count is incremented, or set to 1 if new entry.
2000  * Caller should eventually decrement count. (Usually,
2001  * that happens by calling RelationClose().)
2002  */
2003 Relation
2005 {
2006  Relation rd;
2007 
2008  /* Make sure we're in an xact, even if this ends up being a cache hit */
2010 
2011  /*
2012  * first try to find reldesc in the cache
2013  */
2014  RelationIdCacheLookup(relationId, rd);
2015 
2016  if (RelationIsValid(rd))
2017  {
2018  /* return NULL for dropped relations */
2020  {
2021  Assert(!rd->rd_isvalid);
2022  return NULL;
2023  }
2024 
2026  /* revalidate cache entry if necessary */
2027  if (!rd->rd_isvalid)
2028  {
2029  /*
2030  * Indexes only have a limited number of possible schema changes,
2031  * and we don't want to use the full-blown procedure because it's
2032  * a headache for indexes that reload itself depends on.
2033  */
2034  if (rd->rd_rel->relkind == RELKIND_INDEX ||
2035  rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2037  else
2038  RelationClearRelation(rd, true);
2039 
2040  /*
2041  * Normally entries need to be valid here, but before the relcache
2042  * has been initialized, not enough infrastructure exists to
2043  * perform pg_class lookups. The structure of such entries doesn't
2044  * change, but we still want to update the rd_rel entry. So
2045  * rd_isvalid = false is left in place for a later lookup.
2046  */
2047  Assert(rd->rd_isvalid ||
2049  }
2050  return rd;
2051  }
2052 
2053  /*
2054  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2055  * it.
2056  */
2057  rd = RelationBuildDesc(relationId, true);
2058  if (RelationIsValid(rd))
2060  return rd;
2061 }
2062 
2063 /* ----------------------------------------------------------------
2064  * cache invalidation support routines
2065  * ----------------------------------------------------------------
2066  */
2067 
2068 /*
2069  * RelationIncrementReferenceCount
2070  * Increments relation reference count.
2071  *
2072  * Note: bootstrap mode has its own weird ideas about relation refcount
2073  * behavior; we ought to fix it someday, but for now, just disable
2074  * reference count ownership tracking in bootstrap mode.
2075  */
2076 void
2078 {
2080  rel->rd_refcnt += 1;
2083 }
2084 
2085 /*
2086  * RelationDecrementReferenceCount
2087  * Decrements relation reference count.
2088  */
2089 void
2091 {
2092  Assert(rel->rd_refcnt > 0);
2093  rel->rd_refcnt -= 1;
2096 }
2097 
2098 /*
2099  * RelationClose - close an open relation
2100  *
2101  * Actually, we just decrement the refcount.
2102  *
2103  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2104  * will be freed as soon as their refcount goes to zero. In combination
2105  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2106  * to catch references to already-released relcache entries. It slows
2107  * things down quite a bit, however.
2108  */
2109 void
2111 {
2112  /* Note: no locking manipulations needed */
2114 
2115  /*
2116  * If the relation is no longer open in this session, we can clean up any
2117  * stale partition descriptors it has. This is unlikely, so check to see
2118  * if there are child contexts before expending a call to mcxt.c.
2119  */
2120  if (RelationHasReferenceCountZero(relation) &&
2121  relation->rd_pdcxt != NULL &&
2122  relation->rd_pdcxt->firstchild != NULL)
2124 
2125 #ifdef RELCACHE_FORCE_RELEASE
2126  if (RelationHasReferenceCountZero(relation) &&
2127  relation->rd_createSubid == InvalidSubTransactionId &&
2129  RelationClearRelation(relation, false);
2130 #endif
2131 }
2132 
2133 /*
2134  * RelationReloadIndexInfo - reload minimal information for an open index
2135  *
2136  * This function is used only for indexes. A relcache inval on an index
2137  * can mean that its pg_class or pg_index row changed. There are only
2138  * very limited changes that are allowed to an existing index's schema,
2139  * so we can update the relcache entry without a complete rebuild; which
2140  * is fortunate because we can't rebuild an index entry that is "nailed"
2141  * and/or in active use. We support full replacement of the pg_class row,
2142  * as well as updates of a few simple fields of the pg_index row.
2143  *
2144  * We can't necessarily reread the catalog rows right away; we might be
2145  * in a failed transaction when we receive the SI notification. If so,
2146  * RelationClearRelation just marks the entry as invalid by setting
2147  * rd_isvalid to false. This routine is called to fix the entry when it
2148  * is next needed.
2149  *
2150  * We assume that at the time we are called, we have at least AccessShareLock
2151  * on the target index. (Note: in the calls from RelationClearRelation,
2152  * this is legitimate because we know the rel has positive refcount.)
2153  *
2154  * If the target index is an index on pg_class or pg_index, we'd better have
2155  * previously gotten at least AccessShareLock on its underlying catalog,
2156  * else we are at risk of deadlock against someone trying to exclusive-lock
2157  * the heap and index in that order. This is ensured in current usage by
2158  * only applying this to indexes being opened or having positive refcount.
2159  */
2160 static void
2162 {
2163  bool indexOK;
2164  HeapTuple pg_class_tuple;
2165  Form_pg_class relp;
2166 
2167  /* Should be called only for invalidated, live indexes */
2168  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
2169  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2170  !relation->rd_isvalid &&
2172 
2173  /* Ensure it's closed at smgr level */
2174  RelationCloseSmgr(relation);
2175 
2176  /* Must free any AM cached data upon relcache flush */
2177  if (relation->rd_amcache)
2178  pfree(relation->rd_amcache);
2179  relation->rd_amcache = NULL;
2180 
2181  /*
2182  * If it's a shared index, we might be called before backend startup has
2183  * finished selecting a database, in which case we have no way to read
2184  * pg_class yet. However, a shared index can never have any significant
2185  * schema updates, so it's okay to ignore the invalidation signal. Just
2186  * mark it valid and return without doing anything more.
2187  */
2188  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2189  {
2190  relation->rd_isvalid = true;
2191  return;
2192  }
2193 
2194  /*
2195  * Read the pg_class row
2196  *
2197  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2198  * for pg_class_oid_index ...
2199  */
2200  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2201  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2202  if (!HeapTupleIsValid(pg_class_tuple))
2203  elog(ERROR, "could not find pg_class tuple for index %u",
2204  RelationGetRelid(relation));
2205  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2206  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2207  /* Reload reloptions in case they changed */
2208  if (relation->rd_options)
2209  pfree(relation->rd_options);
2210  RelationParseRelOptions(relation, pg_class_tuple);
2211  /* done with pg_class tuple */
2212  heap_freetuple(pg_class_tuple);
2213  /* We must recalculate physical address in case it changed */
2214  RelationInitPhysicalAddr(relation);
2215 
2216  /*
2217  * For a non-system index, there are fields of the pg_index row that are
2218  * allowed to change, so re-read that row and update the relcache entry.
2219  * Most of the info derived from pg_index (such as support function lookup
2220  * info) cannot change, and indeed the whole point of this routine is to
2221  * update the relcache entry without clobbering that data; so wholesale
2222  * replacement is not appropriate.
2223  */
2224  if (!IsSystemRelation(relation))
2225  {
2226  HeapTuple tuple;
2228 
2229  tuple = SearchSysCache1(INDEXRELID,
2230  ObjectIdGetDatum(RelationGetRelid(relation)));
2231  if (!HeapTupleIsValid(tuple))
2232  elog(ERROR, "cache lookup failed for index %u",
2233  RelationGetRelid(relation));
2234  index = (Form_pg_index) GETSTRUCT(tuple);
2235 
2236  /*
2237  * Basically, let's just copy all the bool fields. There are one or
2238  * two of these that can't actually change in the current code, but
2239  * it's not worth it to track exactly which ones they are. None of
2240  * the array fields are allowed to change, though.
2241  */
2242  relation->rd_index->indisunique = index->indisunique;
2243  relation->rd_index->indisprimary = index->indisprimary;
2244  relation->rd_index->indisexclusion = index->indisexclusion;
2245  relation->rd_index->indimmediate = index->indimmediate;
2246  relation->rd_index->indisclustered = index->indisclustered;
2247  relation->rd_index->indisvalid = index->indisvalid;
2248  relation->rd_index->indcheckxmin = index->indcheckxmin;
2249  relation->rd_index->indisready = index->indisready;
2250  relation->rd_index->indislive = index->indislive;
2251 
2252  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2254  HeapTupleHeaderGetXmin(tuple->t_data));
2255 
2256  ReleaseSysCache(tuple);
2257  }
2258 
2259  /* Okay, now it's valid again */
2260  relation->rd_isvalid = true;
2261 }
2262 
2263 /*
2264  * RelationReloadNailed - reload minimal information for nailed relations.
2265  *
2266  * The structure of a nailed relation can never change (which is good, because
2267  * we rely on knowing their structure to be able to read catalog content). But
2268  * some parts, e.g. pg_class.relfrozenxid, are still important to have
2269  * accurate content for. Therefore those need to be reloaded after the arrival
2270  * of invalidations.
2271  */
2272 static void
2274 {
2275  Assert(relation->rd_isnailed);
2276 
2277  /*
2278  * Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2279  * mapping changed.
2280  */
2281  RelationInitPhysicalAddr(relation);
2282 
2283  /* flag as needing to be revalidated */
2284  relation->rd_isvalid = false;
2285 
2286  /*
2287  * Can only reread catalog contents if in a transaction. If the relation
2288  * is currently open (not counting the nailed refcount), do so
2289  * immediately. Otherwise we've already marked the entry as possibly
2290  * invalid, and it'll be fixed when next opened.
2291  */
2292  if (!IsTransactionState() || relation->rd_refcnt <= 1)
2293  return;
2294 
2295  if (relation->rd_rel->relkind == RELKIND_INDEX)
2296  {
2297  /*
2298  * If it's a nailed-but-not-mapped index, then we need to re-read the
2299  * pg_class row to see if its relfilenode changed.
2300  */
2301  RelationReloadIndexInfo(relation);
2302  }
2303  else
2304  {
2305  /*
2306  * Reload a non-index entry. We can't easily do so if relcaches
2307  * aren't yet built, but that's fine because at that stage the
2308  * attributes that need to be current (like relfrozenxid) aren't yet
2309  * accessed. To ensure the entry will later be revalidated, we leave
2310  * it in invalid state, but allow use (cf. RelationIdGetRelation()).
2311  */
2313  {
2314  HeapTuple pg_class_tuple;
2315  Form_pg_class relp;
2316 
2317  /*
2318  * NB: Mark the entry as valid before starting to scan, to avoid
2319  * self-recursion when re-building pg_class.
2320  */
2321  relation->rd_isvalid = true;
2322 
2323  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2324  true, false);
2325  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2326  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2327  heap_freetuple(pg_class_tuple);
2328 
2329  /*
2330  * Again mark as valid, to protect against concurrently arriving
2331  * invalidations.
2332  */
2333  relation->rd_isvalid = true;
2334  }
2335  }
2336 }
2337 
2338 /*
2339  * RelationDestroyRelation
2340  *
2341  * Physically delete a relation cache entry and all subsidiary data.
2342  * Caller must already have unhooked the entry from the hash table.
2343  */
2344 static void
2345 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2346 {
2348 
2349  /*
2350  * Make sure smgr and lower levels close the relation's files, if they
2351  * weren't closed already. (This was probably done by caller, but let's
2352  * just be real sure.)
2353  */
2354  RelationCloseSmgr(relation);
2355 
2356  /*
2357  * Free all the subsidiary data structures of the relcache entry, then the
2358  * entry itself.
2359  */
2360  if (relation->rd_rel)
2361  pfree(relation->rd_rel);
2362  /* can't use DecrTupleDescRefCount here */
2363  Assert(relation->rd_att->tdrefcount > 0);
2364  if (--relation->rd_att->tdrefcount == 0)
2365  {
2366  /*
2367  * If we Rebuilt a relcache entry during a transaction then its
2368  * possible we did that because the TupDesc changed as the result of
2369  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2370  * possible someone copied that TupDesc, in which case the copy would
2371  * point to free'd memory. So if we rebuild an entry we keep the
2372  * TupDesc around until end of transaction, to be safe.
2373  */
2374  if (remember_tupdesc)
2376  else
2377  FreeTupleDesc(relation->rd_att);
2378  }
2379  FreeTriggerDesc(relation->trigdesc);
2380  list_free_deep(relation->rd_fkeylist);
2381  list_free(relation->rd_indexlist);
2382  bms_free(relation->rd_indexattr);
2383  bms_free(relation->rd_keyattr);
2384  bms_free(relation->rd_pkattr);
2385  bms_free(relation->rd_idattr);
2386  if (relation->rd_pubactions)
2387  pfree(relation->rd_pubactions);
2388  if (relation->rd_options)
2389  pfree(relation->rd_options);
2390  if (relation->rd_indextuple)
2391  pfree(relation->rd_indextuple);
2392  if (relation->rd_amcache)
2393  pfree(relation->rd_amcache);
2394  if (relation->rd_fdwroutine)
2395  pfree(relation->rd_fdwroutine);
2396  if (relation->rd_indexcxt)
2397  MemoryContextDelete(relation->rd_indexcxt);
2398  if (relation->rd_rulescxt)
2399  MemoryContextDelete(relation->rd_rulescxt);
2400  if (relation->rd_rsdesc)
2401  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2402  if (relation->rd_partkeycxt)
2404  if (relation->rd_pdcxt)
2405  MemoryContextDelete(relation->rd_pdcxt);
2406  if (relation->rd_partcheckcxt)
2408  pfree(relation);
2409 }
2410 
2411 /*
2412  * RelationClearRelation
2413  *
2414  * Physically blow away a relation cache entry, or reset it and rebuild
2415  * it from scratch (that is, from catalog entries). The latter path is
2416  * used when we are notified of a change to an open relation (one with
2417  * refcount > 0).
2418  *
2419  * NB: when rebuilding, we'd better hold some lock on the relation,
2420  * else the catalog data we need to read could be changing under us.
2421  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2422  * a sinval reset could happen while we're accessing the catalogs, and
2423  * the rel would get blown away underneath us by RelationCacheInvalidate
2424  * if it has zero refcnt.
2425  *
2426  * The "rebuild" parameter is redundant in current usage because it has
2427  * to match the relation's refcnt status, but we keep it as a crosscheck
2428  * that we're doing what the caller expects.
2429  */
2430 static void
2431 RelationClearRelation(Relation relation, bool rebuild)
2432 {
2433  /*
2434  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2435  * course it would be an equally bad idea to blow away one with nonzero
2436  * refcnt, since that would leave someone somewhere with a dangling
2437  * pointer. All callers are expected to have verified that this holds.
2438  */
2439  Assert(rebuild ?
2440  !RelationHasReferenceCountZero(relation) :
2441  RelationHasReferenceCountZero(relation));
2442 
2443  /*
2444  * Make sure smgr and lower levels close the relation's files, if they
2445  * weren't closed already. If the relation is not getting deleted, the
2446  * next smgr access should reopen the files automatically. This ensures
2447  * that the low-level file access state is updated after, say, a vacuum
2448  * truncation.
2449  */
2450  RelationCloseSmgr(relation);
2451 
2452  /* Free AM cached data, if any */
2453  if (relation->rd_amcache)
2454  pfree(relation->rd_amcache);
2455  relation->rd_amcache = NULL;
2456 
2457  /*
2458  * Treat nailed-in system relations separately, they always need to be
2459  * accessible, so we can't blow them away.
2460  */
2461  if (relation->rd_isnailed)
2462  {
2463  RelationReloadNailed(relation);
2464  return;
2465  }
2466 
2467  /* Mark it invalid until we've finished rebuild */
2468  relation->rd_isvalid = false;
2469 
2470  /* See RelationForgetRelation(). */
2471  if (relation->rd_droppedSubid != InvalidSubTransactionId)
2472  return;
2473 
2474  /*
2475  * Even non-system indexes should not be blown away if they are open and
2476  * have valid index support information. This avoids problems with active
2477  * use of the index support information. As with nailed indexes, we
2478  * re-read the pg_class row to handle possible physical relocation of the
2479  * index, and we check for pg_index updates too.
2480  */
2481  if ((relation->rd_rel->relkind == RELKIND_INDEX ||
2482  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2483  relation->rd_refcnt > 0 &&
2484  relation->rd_indexcxt != NULL)
2485  {
2486  if (IsTransactionState())
2487  RelationReloadIndexInfo(relation);
2488  return;
2489  }
2490 
2491  /*
2492  * If we're really done with the relcache entry, blow it away. But if
2493  * someone is still using it, reconstruct the whole deal without moving
2494  * the physical RelationData record (so that the someone's pointer is
2495  * still valid).
2496  */
2497  if (!rebuild)
2498  {
2499  /* Remove it from the hash table */
2500  RelationCacheDelete(relation);
2501 
2502  /* And release storage */
2503  RelationDestroyRelation(relation, false);
2504  }
2505  else if (!IsTransactionState())
2506  {
2507  /*
2508  * If we're not inside a valid transaction, we can't do any catalog
2509  * access so it's not possible to rebuild yet. Just exit, leaving
2510  * rd_isvalid = false so that the rebuild will occur when the entry is
2511  * next opened.
2512  *
2513  * Note: it's possible that we come here during subtransaction abort,
2514  * and the reason for wanting to rebuild is that the rel is open in
2515  * the outer transaction. In that case it might seem unsafe to not
2516  * rebuild immediately, since whatever code has the rel already open
2517  * will keep on using the relcache entry as-is. However, in such a
2518  * case the outer transaction should be holding a lock that's
2519  * sufficient to prevent any significant change in the rel's schema,
2520  * so the existing entry contents should be good enough for its
2521  * purposes; at worst we might be behind on statistics updates or the
2522  * like. (See also CheckTableNotInUse() and its callers.) These same
2523  * remarks also apply to the cases above where we exit without having
2524  * done RelationReloadIndexInfo() yet.
2525  */
2526  return;
2527  }
2528  else
2529  {
2530  /*
2531  * Our strategy for rebuilding an open relcache entry is to build a
2532  * new entry from scratch, swap its contents with the old entry, and
2533  * finally delete the new entry (along with any infrastructure swapped
2534  * over from the old entry). This is to avoid trouble in case an
2535  * error causes us to lose control partway through. The old entry
2536  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2537  * on next access. Meanwhile it's not any less valid than it was
2538  * before, so any code that might expect to continue accessing it
2539  * isn't hurt by the rebuild failure. (Consider for example a
2540  * subtransaction that ALTERs a table and then gets canceled partway
2541  * through the cache entry rebuild. The outer transaction should
2542  * still see the not-modified cache entry as valid.) The worst
2543  * consequence of an error is leaking the necessarily-unreferenced new
2544  * entry, and this shouldn't happen often enough for that to be a big
2545  * problem.
2546  *
2547  * When rebuilding an open relcache entry, we must preserve ref count,
2548  * rd_*Subid, and rd_toastoid state. Also attempt to preserve the
2549  * pg_class entry (rd_rel), tupledesc, rewrite-rule, partition key,
2550  * and partition descriptor substructures in place, because various
2551  * places assume that these structures won't move while they are
2552  * working with an open relcache entry. (Note: the refcount
2553  * mechanism for tupledescs might someday allow us to remove this hack
2554  * for the tupledesc.)
2555  *
2556  * Note that this process does not touch CurrentResourceOwner; which
2557  * is good because whatever ref counts the entry may have do not
2558  * necessarily belong to that resource owner.
2559  */
2560  Relation newrel;
2561  Oid save_relid = RelationGetRelid(relation);
2562  bool keep_tupdesc;
2563  bool keep_rules;
2564  bool keep_policies;
2565  bool keep_partkey;
2566 
2567  /* Build temporary entry, but don't link it into hashtable */
2568  newrel = RelationBuildDesc(save_relid, false);
2569  if (newrel == NULL)
2570  {
2571  /*
2572  * We can validly get here, if we're using a historic snapshot in
2573  * which a relation, accessed from outside logical decoding, is
2574  * still invisible. In that case it's fine to just mark the
2575  * relation as invalid and return - it'll fully get reloaded by
2576  * the cache reset at the end of logical decoding (or at the next
2577  * access). During normal processing we don't want to ignore this
2578  * case as it shouldn't happen there, as explained below.
2579  */
2580  if (HistoricSnapshotActive())
2581  return;
2582 
2583  /*
2584  * This shouldn't happen as dropping a relation is intended to be
2585  * impossible if still referenced (cf. CheckTableNotInUse()). But
2586  * if we get here anyway, we can't just delete the relcache entry,
2587  * as it possibly could get accessed later (as e.g. the error
2588  * might get trapped and handled via a subtransaction rollback).
2589  */
2590  elog(ERROR, "relation %u deleted while still in use", save_relid);
2591  }
2592 
2593  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2594  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2595  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2596  /* partkey is immutable once set up, so we can always keep it */
2597  keep_partkey = (relation->rd_partkey != NULL);
2598 
2599  /*
2600  * Perform swapping of the relcache entry contents. Within this
2601  * process the old entry is momentarily invalid, so there *must* be no
2602  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2603  * all-in-line code for safety.
2604  *
2605  * Since the vast majority of fields should be swapped, our method is
2606  * to swap the whole structures and then re-swap those few fields we
2607  * didn't want swapped.
2608  */
2609 #define SWAPFIELD(fldtype, fldname) \
2610  do { \
2611  fldtype _tmp = newrel->fldname; \
2612  newrel->fldname = relation->fldname; \
2613  relation->fldname = _tmp; \
2614  } while (0)
2615 
2616  /* swap all Relation struct fields */
2617  {
2618  RelationData tmpstruct;
2619 
2620  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2621  memcpy(newrel, relation, sizeof(RelationData));
2622  memcpy(relation, &tmpstruct, sizeof(RelationData));
2623  }
2624 
2625  /* rd_smgr must not be swapped, due to back-links from smgr level */
2626  SWAPFIELD(SMgrRelation, rd_smgr);
2627  /* rd_refcnt must be preserved */
2628  SWAPFIELD(int, rd_refcnt);
2629  /* isnailed shouldn't change */
2630  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2631  /* creation sub-XIDs must be preserved */
2632  SWAPFIELD(SubTransactionId, rd_createSubid);
2633  SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2634  SWAPFIELD(SubTransactionId, rd_firstRelfilenodeSubid);
2635  SWAPFIELD(SubTransactionId, rd_droppedSubid);
2636  /* un-swap rd_rel pointers, swap contents instead */
2637  SWAPFIELD(Form_pg_class, rd_rel);
2638  /* ... but actually, we don't have to update newrel->rd_rel */
2639  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2640  /* preserve old tupledesc, rules, policies if no logical change */
2641  if (keep_tupdesc)
2642  SWAPFIELD(TupleDesc, rd_att);
2643  if (keep_rules)
2644  {
2645  SWAPFIELD(RuleLock *, rd_rules);
2646  SWAPFIELD(MemoryContext, rd_rulescxt);
2647  }
2648  if (keep_policies)
2649  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2650  /* toast OID override must be preserved */
2651  SWAPFIELD(Oid, rd_toastoid);
2652  /* pgstat_info must be preserved */
2653  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2654  /* preserve old partition key if we have one */
2655  if (keep_partkey)
2656  {
2657  SWAPFIELD(PartitionKey, rd_partkey);
2658  SWAPFIELD(MemoryContext, rd_partkeycxt);
2659  }
2660  if (newrel->rd_pdcxt != NULL)
2661  {
2662  /*
2663  * We are rebuilding a partitioned relation with a non-zero
2664  * reference count, so we must keep the old partition descriptor
2665  * around, in case there's a PartitionDirectory with a pointer to
2666  * it. This means we can't free the old rd_pdcxt yet. (This is
2667  * necessary because RelationGetPartitionDesc hands out direct
2668  * pointers to the relcache's data structure, unlike our usual
2669  * practice which is to hand out copies. We'd have the same
2670  * problem with rd_partkey, except that we always preserve that
2671  * once created.)
2672  *
2673  * To ensure that it's not leaked completely, re-attach it to the
2674  * new reldesc, or make it a child of the new reldesc's rd_pdcxt
2675  * in the unlikely event that there is one already. (Compare hack
2676  * in RelationBuildPartitionDesc.) RelationClose will clean up
2677  * any such contexts once the reference count reaches zero.
2678  *
2679  * In the case where the reference count is zero, this code is not
2680  * reached, which should be OK because in that case there should
2681  * be no PartitionDirectory with a pointer to the old entry.
2682  *
2683  * Note that newrel and relation have already been swapped, so the
2684  * "old" partition descriptor is actually the one hanging off of
2685  * newrel.
2686  */
2687  relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */
2688  if (relation->rd_pdcxt != NULL) /* probably never happens */
2689  MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
2690  else
2691  relation->rd_pdcxt = newrel->rd_pdcxt;
2692  /* drop newrel's pointers so we don't destroy it below */
2693  newrel->rd_partdesc = NULL;
2694  newrel->rd_pdcxt = NULL;
2695  }
2696 
2697 #undef SWAPFIELD
2698 
2699  /* And now we can throw away the temporary entry */
2700  RelationDestroyRelation(newrel, !keep_tupdesc);
2701  }
2702 }
2703 
2704 /*
2705  * RelationFlushRelation
2706  *
2707  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2708  * This is used when we receive a cache invalidation event for the rel.
2709  */
2710 static void
2712 {
2713  if (relation->rd_createSubid != InvalidSubTransactionId ||
2715  {
2716  /*
2717  * New relcache entries are always rebuilt, not flushed; else we'd
2718  * forget the "new" status of the relation. Ditto for the
2719  * new-relfilenode status.
2720  *
2721  * The rel could have zero refcnt here, so temporarily increment the
2722  * refcnt to ensure it's safe to rebuild it. We can assume that the
2723  * current transaction has some lock on the rel already.
2724  */
2726  RelationClearRelation(relation, true);
2728  }
2729  else
2730  {
2731  /*
2732  * Pre-existing rels can be dropped from the relcache if not open.
2733  */
2734  bool rebuild = !RelationHasReferenceCountZero(relation);
2735 
2736  RelationClearRelation(relation, rebuild);
2737  }
2738 }
2739 
2740 /*
2741  * RelationForgetRelation - caller reports that it dropped the relation
2742  */
2743 void
2745 {
2746  Relation relation;
2747 
2748  RelationIdCacheLookup(rid, relation);
2749 
2750  if (!PointerIsValid(relation))
2751  return; /* not in cache, nothing to do */
2752 
2753  if (!RelationHasReferenceCountZero(relation))
2754  elog(ERROR, "relation %u is still open", rid);
2755 
2757  if (relation->rd_createSubid != InvalidSubTransactionId ||
2759  {
2760  /*
2761  * In the event of subtransaction rollback, we must not forget
2762  * rd_*Subid. Mark the entry "dropped" so RelationClearRelation()
2763  * invalidates it in lieu of destroying it. (If we're in a top
2764  * transaction, we could opt to destroy the entry.)
2765  */
2767  }
2768 
2769  RelationClearRelation(relation, false);
2770 }
2771 
2772 /*
2773  * RelationCacheInvalidateEntry
2774  *
2775  * This routine is invoked for SI cache flush messages.
2776  *
2777  * Any relcache entry matching the relid must be flushed. (Note: caller has
2778  * already determined that the relid belongs to our database or is a shared
2779  * relation.)
2780  *
2781  * We used to skip local relations, on the grounds that they could
2782  * not be targets of cross-backend SI update messages; but it seems
2783  * safer to process them, so that our *own* SI update messages will
2784  * have the same effects during CommandCounterIncrement for both
2785  * local and nonlocal relations.
2786  */
2787 void
2789 {
2790  Relation relation;
2791 
2792  RelationIdCacheLookup(relationId, relation);
2793 
2794  if (PointerIsValid(relation))
2795  {
2797  RelationFlushRelation(relation);
2798  }
2799 }
2800 
2801 /*
2802  * RelationCacheInvalidate
2803  * Blow away cached relation descriptors that have zero reference counts,
2804  * and rebuild those with positive reference counts. Also reset the smgr
2805  * relation cache and re-read relation mapping data.
2806  *
2807  * This is currently used only to recover from SI message buffer overflow,
2808  * so we do not touch relations having new-in-transaction relfilenodes; they
2809  * cannot be targets of cross-backend SI updates (and our own updates now go
2810  * through a separate linked list that isn't limited by the SI message
2811  * buffer size).
2812  *
2813  * We do this in two phases: the first pass deletes deletable items, and
2814  * the second one rebuilds the rebuildable items. This is essential for
2815  * safety, because hash_seq_search only copes with concurrent deletion of
2816  * the element it is currently visiting. If a second SI overflow were to
2817  * occur while we are walking the table, resulting in recursive entry to
2818  * this routine, we could crash because the inner invocation blows away
2819  * the entry next to be visited by the outer scan. But this way is OK,
2820  * because (a) during the first pass we won't process any more SI messages,
2821  * so hash_seq_search will complete safely; (b) during the second pass we
2822  * only hold onto pointers to nondeletable entries.
2823  *
2824  * The two-phase approach also makes it easy to update relfilenodes for
2825  * mapped relations before we do anything else, and to ensure that the
2826  * second pass processes nailed-in-cache items before other nondeletable
2827  * items. This should ensure that system catalogs are up to date before
2828  * we attempt to use them to reload information about other open relations.
2829  */
2830 void
2832 {
2834  RelIdCacheEnt *idhentry;
2835  Relation relation;
2836  List *rebuildFirstList = NIL;
2837  List *rebuildList = NIL;
2838  ListCell *l;
2839 
2840  /*
2841  * Reload relation mapping data before starting to reconstruct cache.
2842  */
2844 
2845  /* Phase 1 */
2846  hash_seq_init(&status, RelationIdCache);
2847 
2848  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2849  {
2850  relation = idhentry->reldesc;
2851 
2852  /* Must close all smgr references to avoid leaving dangling ptrs */
2853  RelationCloseSmgr(relation);
2854 
2855  /*
2856  * Ignore new relations; no other backend will manipulate them before
2857  * we commit. Likewise, before replacing a relation's relfilenode, we
2858  * shall have acquired AccessExclusiveLock and drained any applicable
2859  * pending invalidations.
2860  */
2861  if (relation->rd_createSubid != InvalidSubTransactionId ||
2863  continue;
2864 
2866 
2867  if (RelationHasReferenceCountZero(relation))
2868  {
2869  /* Delete this entry immediately */
2870  Assert(!relation->rd_isnailed);
2871  RelationClearRelation(relation, false);
2872  }
2873  else
2874  {
2875  /*
2876  * If it's a mapped relation, immediately update its rd_node in
2877  * case its relfilenode changed. We must do this during phase 1
2878  * in case the relation is consulted during rebuild of other
2879  * relcache entries in phase 2. It's safe since consulting the
2880  * map doesn't involve any access to relcache entries.
2881  */
2882  if (RelationIsMapped(relation))
2883  RelationInitPhysicalAddr(relation);
2884 
2885  /*
2886  * Add this entry to list of stuff to rebuild in second pass.
2887  * pg_class goes to the front of rebuildFirstList while
2888  * pg_class_oid_index goes to the back of rebuildFirstList, so
2889  * they are done first and second respectively. Other nailed
2890  * relations go to the front of rebuildList, so they'll be done
2891  * next in no particular order; and everything else goes to the
2892  * back of rebuildList.
2893  */
2894  if (RelationGetRelid(relation) == RelationRelationId)
2895  rebuildFirstList = lcons(relation, rebuildFirstList);
2896  else if (RelationGetRelid(relation) == ClassOidIndexId)
2897  rebuildFirstList = lappend(rebuildFirstList, relation);
2898  else if (relation->rd_isnailed)
2899  rebuildList = lcons(relation, rebuildList);
2900  else
2901  rebuildList = lappend(rebuildList, relation);
2902  }
2903  }
2904 
2905  /*
2906  * Now zap any remaining smgr cache entries. This must happen before we
2907  * start to rebuild entries, since that may involve catalog fetches which
2908  * will re-open catalog files.
2909  */
2910  smgrcloseall();
2911 
2912  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2913  foreach(l, rebuildFirstList)
2914  {
2915  relation = (Relation) lfirst(l);
2916  RelationClearRelation(relation, true);
2917  }
2918  list_free(rebuildFirstList);
2919  foreach(l, rebuildList)
2920  {
2921  relation = (Relation) lfirst(l);
2922  RelationClearRelation(relation, true);
2923  }
2924  list_free(rebuildList);
2925 }
2926 
2927 /*
2928  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2929  *
2930  * Needed in some cases where we are changing a relation's physical mapping.
2931  * The link will be automatically reopened on next use.
2932  */
2933 void
2935 {
2936  Relation relation;
2937 
2938  RelationIdCacheLookup(relationId, relation);
2939 
2940  if (!PointerIsValid(relation))
2941  return; /* not in cache, nothing to do */
2942 
2943  RelationCloseSmgr(relation);
2944 }
2945 
2946 static void
2948 {
2949  if (EOXactTupleDescArray == NULL)
2950  {
2951  MemoryContext oldcxt;
2952 
2954 
2955  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2958  MemoryContextSwitchTo(oldcxt);
2959  }
2961  {
2962  int32 newlen = EOXactTupleDescArrayLen * 2;
2963 
2965 
2966  EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2967  newlen * sizeof(TupleDesc));
2968  EOXactTupleDescArrayLen = newlen;
2969  }
2970 
2971  EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2972 }
2973 
2974 #ifdef USE_ASSERT_CHECKING
2975 static void
2976 AssertPendingSyncConsistency(Relation relation)
2977 {
2978  bool relcache_verdict =
2979  relation->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT &&
2980  ((relation->rd_createSubid != InvalidSubTransactionId &&
2981  RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) ||
2983 
2984  Assert(relcache_verdict == RelFileNodeSkippingWAL(relation->rd_node));
2985 
2986  if (relation->rd_droppedSubid != InvalidSubTransactionId)
2987  Assert(!relation->rd_isvalid &&
2988  (relation->rd_createSubid != InvalidSubTransactionId ||
2990 }
2991 
2992 /*
2993  * AssertPendingSyncs_RelationCache
2994  *
2995  * Assert that relcache.c and storage.c agree on whether to skip WAL.
2996  */
2997 void
2999 {
3001  LOCALLOCK *locallock;
3002  Relation *rels;
3003  int maxrels;
3004  int nrels;
3005  RelIdCacheEnt *idhentry;
3006  int i;
3007 
3008  /*
3009  * Open every relation that this transaction has locked. If, for some
3010  * relation, storage.c is skipping WAL and relcache.c is not skipping WAL,
3011  * a CommandCounterIncrement() typically yields a local invalidation
3012  * message that destroys the relcache entry. By recreating such entries
3013  * here, we detect the problem.
3014  */
3016  maxrels = 1;
3017  rels = palloc(maxrels * sizeof(*rels));
3018  nrels = 0;
3019  hash_seq_init(&status, GetLockMethodLocalHash());
3020  while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
3021  {
3022  Oid relid;
3023  Relation r;
3024 
3025  if (locallock->nLocks <= 0)
3026  continue;
3027  if ((LockTagType) locallock->tag.lock.locktag_type !=
3029  continue;
3030  relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2);
3031  r = RelationIdGetRelation(relid);
3032  if (!RelationIsValid(r))
3033  continue;
3034  if (nrels >= maxrels)
3035  {
3036  maxrels *= 2;
3037  rels = repalloc(rels, maxrels * sizeof(*rels));
3038  }
3039  rels[nrels++] = r;
3040  }
3041 
3042  hash_seq_init(&status, RelationIdCache);
3043  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3044  AssertPendingSyncConsistency(idhentry->reldesc);
3045 
3046  for (i = 0; i < nrels; i++)
3047  RelationClose(rels[i]);
3049 }
3050 #endif
3051 
3052 /*
3053  * AtEOXact_RelationCache
3054  *
3055  * Clean up the relcache at main-transaction commit or abort.
3056  *
3057  * Note: this must be called *before* processing invalidation messages.
3058  * In the case of abort, we don't want to try to rebuild any invalidated
3059  * cache entries (since we can't safely do database accesses). Therefore
3060  * we must reset refcnts before handling pending invalidations.
3061  *
3062  * As of PostgreSQL 8.1, relcache refcnts should get released by the
3063  * ResourceOwner mechanism. This routine just does a debugging
3064  * cross-check that no pins remain. However, we also need to do special
3065  * cleanup when the current transaction created any relations or made use
3066  * of forced index lists.
3067  */
3068 void
3070 {
3072  RelIdCacheEnt *idhentry;
3073  int i;
3074 
3075  /*
3076  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3077  * listed in it. Otherwise fall back on a hash_seq_search scan.
3078  *
3079  * For simplicity, eoxact_list[] entries are not deleted till end of
3080  * top-level transaction, even though we could remove them at
3081  * subtransaction end in some cases, or remove relations from the list if
3082  * they are cleared for other reasons. Therefore we should expect the
3083  * case that list entries are not found in the hashtable; if not, there's
3084  * nothing to do for them.
3085  */
3087  {
3088  hash_seq_init(&status, RelationIdCache);
3089  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3090  {
3091  AtEOXact_cleanup(idhentry->reldesc, isCommit);
3092  }
3093  }
3094  else
3095  {
3096  for (i = 0; i < eoxact_list_len; i++)
3097  {
3098  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3099  (void *) &eoxact_list[i],
3100  HASH_FIND,
3101  NULL);
3102  if (idhentry != NULL)
3103  AtEOXact_cleanup(idhentry->reldesc, isCommit);
3104  }
3105  }
3106 
3107  if (EOXactTupleDescArrayLen > 0)
3108  {
3109  Assert(EOXactTupleDescArray != NULL);
3110  for (i = 0; i < NextEOXactTupleDescNum; i++)
3111  FreeTupleDesc(EOXactTupleDescArray[i]);
3112  pfree(EOXactTupleDescArray);
3113  EOXactTupleDescArray = NULL;
3114  }
3115 
3116  /* Now we're out of the transaction and can clear the lists */
3117  eoxact_list_len = 0;
3118  eoxact_list_overflowed = false;
3121 }
3122 
3123 /*
3124  * AtEOXact_cleanup
3125  *
3126  * Clean up a single rel at main-transaction commit or abort
3127  *
3128  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3129  * bother to prevent duplicate entries in eoxact_list[].
3130  */
3131 static void
3132 AtEOXact_cleanup(Relation relation, bool isCommit)
3133 {
3134  bool clear_relcache = false;
3135 
3136  /*
3137  * The relcache entry's ref count should be back to its normal
3138  * not-in-a-transaction state: 0 unless it's nailed in cache.
3139  *
3140  * In bootstrap mode, this is NOT true, so don't check it --- the
3141  * bootstrap code expects relations to stay open across start/commit
3142  * transaction calls. (That seems bogus, but it's not worth fixing.)
3143  *
3144  * Note: ideally this check would be applied to every relcache entry, not
3145  * just those that have eoxact work to do. But it's not worth forcing a
3146  * scan of the whole relcache just for this. (Moreover, doing so would
3147  * mean that assert-enabled testing never tests the hash_search code path
3148  * above, which seems a bad idea.)
3149  */
3150 #ifdef USE_ASSERT_CHECKING
3152  {
3153  int expected_refcnt;
3154 
3155  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3156  Assert(relation->rd_refcnt == expected_refcnt);
3157  }
3158 #endif
3159 
3160  /*
3161  * Is the relation live after this transaction ends?
3162  *
3163  * During commit, clear the relcache entry if it is preserved after
3164  * relation drop, in order not to orphan the entry. During rollback,
3165  * clear the relcache entry if the relation is created in the current
3166  * transaction since it isn't interesting any longer once we are out of
3167  * the transaction.
3168  */
3169  clear_relcache =
3170  (isCommit ?
3173 
3174  /*
3175  * Since we are now out of the transaction, reset the subids to zero. That
3176  * also lets RelationClearRelation() drop the relcache entry.
3177  */
3182 
3183  if (clear_relcache)
3184  {
3185  if (RelationHasReferenceCountZero(relation))
3186  {
3187  RelationClearRelation(relation, false);
3188  return;
3189  }
3190  else
3191  {
3192  /*
3193  * Hmm, somewhere there's a (leaked?) reference to the relation.
3194  * We daren't remove the entry for fear of dereferencing a
3195  * dangling pointer later. Bleat, and mark it as not belonging to
3196  * the current transaction. Hopefully it'll get cleaned up
3197  * eventually. This must be just a WARNING to avoid
3198  * error-during-error-recovery loops.
3199  */
3200  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3201  RelationGetRelationName(relation));
3202  }
3203  }
3204 }
3205 
3206 /*
3207  * AtEOSubXact_RelationCache
3208  *
3209  * Clean up the relcache at sub-transaction commit or abort.
3210  *
3211  * Note: this must be called *before* processing invalidation messages.
3212  */
3213 void
3215  SubTransactionId parentSubid)
3216 {
3218  RelIdCacheEnt *idhentry;
3219  int i;
3220 
3221  /*
3222  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3223  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3224  * logic as in AtEOXact_RelationCache.
3225  */
3227  {
3228  hash_seq_init(&status, RelationIdCache);
3229  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3230  {
3231  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3232  mySubid, parentSubid);
3233  }
3234  }
3235  else
3236  {
3237  for (i = 0; i < eoxact_list_len; i++)
3238  {
3239  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3240  (void *) &eoxact_list[i],
3241  HASH_FIND,
3242  NULL);
3243  if (idhentry != NULL)
3244  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3245  mySubid, parentSubid);
3246  }
3247  }
3248 
3249  /* Don't reset the list; we still need more cleanup later */
3250 }
3251 
3252 /*
3253  * AtEOSubXact_cleanup
3254  *
3255  * Clean up a single rel at subtransaction commit or abort
3256  *
3257  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3258  * bother to prevent duplicate entries in eoxact_list[].
3259  */
3260 static void
3261 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3262  SubTransactionId mySubid, SubTransactionId parentSubid)
3263 {
3264  /*
3265  * Is it a relation created in the current subtransaction?
3266  *
3267  * During subcommit, mark it as belonging to the parent, instead, as long
3268  * as it has not been dropped. Otherwise simply delete the relcache entry.
3269  * --- it isn't interesting any longer.
3270  */
3271  if (relation->rd_createSubid == mySubid)
3272  {
3273  /*
3274  * Valid rd_droppedSubid means the corresponding relation is dropped
3275  * but the relcache entry is preserved for at-commit pending sync. We
3276  * need to drop it explicitly here not to make the entry orphan.
3277  */
3278  Assert(relation->rd_droppedSubid == mySubid ||
3280  if (isCommit && relation->rd_droppedSubid == InvalidSubTransactionId)
3281  relation->rd_createSubid = parentSubid;
3282  else if (RelationHasReferenceCountZero(relation))
3283  {
3284  /* allow the entry to be removed */
3289  RelationClearRelation(relation, false);
3290  return;
3291  }
3292  else
3293  {
3294  /*
3295  * Hmm, somewhere there's a (leaked?) reference to the relation.
3296  * We daren't remove the entry for fear of dereferencing a
3297  * dangling pointer later. Bleat, and transfer it to the parent
3298  * subtransaction so we can try again later. This must be just a
3299  * WARNING to avoid error-during-error-recovery loops.
3300  */
3301  relation->rd_createSubid = parentSubid;
3302  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3303  RelationGetRelationName(relation));
3304  }
3305  }
3306 
3307  /*
3308  * Likewise, update or drop any new-relfilenode-in-subtransaction record
3309  * or drop record.
3310  */
3311  if (relation->rd_newRelfilenodeSubid == mySubid)
3312  {
3313  if (isCommit)
3314  relation->rd_newRelfilenodeSubid = parentSubid;
3315  else
3317  }
3318 
3319  if (relation->rd_firstRelfilenodeSubid == mySubid)
3320  {
3321  if (isCommit)
3322  relation->rd_firstRelfilenodeSubid = parentSubid;
3323  else
3325  }
3326 
3327  if (relation->rd_droppedSubid == mySubid)
3328  {
3329  if (isCommit)
3330  relation->rd_droppedSubid = parentSubid;
3331  else
3333  }
3334 }
3335 
3336 
3337 /*
3338  * RelationBuildLocalRelation
3339  * Build a relcache entry for an about-to-be-created relation,
3340  * and enter it into the relcache.
3341  */
3342 Relation
3344  Oid relnamespace,
3345  TupleDesc tupDesc,
3346  Oid relid,
3347  Oid accessmtd,
3348  Oid relfilenode,
3349  Oid reltablespace,
3350  bool shared_relation,
3351  bool mapped_relation,
3352  char relpersistence,
3353  char relkind)
3354 {
3355  Relation rel;
3356  MemoryContext oldcxt;
3357  int natts = tupDesc->natts;
3358  int i;
3359  bool has_not_null;
3360  bool nailit;
3361 
3362  AssertArg(natts >= 0);
3363 
3364  /*
3365  * check for creation of a rel that must be nailed in cache.
3366  *
3367  * XXX this list had better match the relations specially handled in
3368  * RelationCacheInitializePhase2/3.
3369  */
3370  switch (relid)
3371  {
3372  case DatabaseRelationId:
3373  case AuthIdRelationId:
3374  case AuthMemRelationId:
3375  case RelationRelationId:
3376  case AttributeRelationId:
3377  case ProcedureRelationId:
3378  case TypeRelationId:
3379  nailit = true;
3380  break;
3381  default:
3382  nailit = false;
3383  break;
3384  }
3385 
3386  /*
3387  * check that hardwired list of shared rels matches what's in the
3388  * bootstrap .bki file. If you get a failure here during initdb, you
3389  * probably need to fix IsSharedRelation() to match whatever you've done
3390  * to the set of shared relations.
3391  */
3392  if (shared_relation != IsSharedRelation(relid))
3393  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3394  relname, relid);
3395 
3396  /* Shared relations had better be mapped, too */
3397  Assert(mapped_relation || !shared_relation);
3398 
3399  /*
3400  * switch to the cache context to create the relcache entry.
3401  */
3402  if (!CacheMemoryContext)
3404 
3406 
3407  /*
3408  * allocate a new relation descriptor and fill in basic state fields.
3409  */
3410  rel = (Relation) palloc0(sizeof(RelationData));
3411 
3412  /* make sure relation is marked as having no open file yet */
3413  rel->rd_smgr = NULL;
3414 
3415  /* mark it nailed if appropriate */
3416  rel->rd_isnailed = nailit;
3417 
3418  rel->rd_refcnt = nailit ? 1 : 0;
3419 
3420  /* it's being created in this transaction */
3425 
3426  /*
3427  * create a new tuple descriptor from the one passed in. We do this
3428  * partly to copy it into the cache context, and partly because the new
3429  * relation can't have any defaults or constraints yet; they have to be
3430  * added in later steps, because they require additions to multiple system
3431  * catalogs. We can copy attnotnull constraints here, however.
3432  */
3433  rel->rd_att = CreateTupleDescCopy(tupDesc);
3434  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3435  has_not_null = false;
3436  for (i = 0; i < natts; i++)
3437  {
3438  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3439  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3440 
3441  datt->attidentity = satt->attidentity;
3442  datt->attgenerated = satt->attgenerated;
3443  datt->attnotnull = satt->attnotnull;
3444  has_not_null |= satt->attnotnull;
3445  }
3446 
3447  if (has_not_null)
3448  {
3449  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3450 
3451  constr->has_not_null = true;
3452  rel->rd_att->constr = constr;
3453  }
3454 
3455  /*
3456  * initialize relation tuple form (caller may add/override data later)
3457  */
3459 
3460  namestrcpy(&rel->rd_rel->relname, relname);
3461  rel->rd_rel->relnamespace = relnamespace;
3462 
3463  rel->rd_rel->relkind = relkind;
3464  rel->rd_rel->relnatts = natts;
3465  rel->rd_rel->reltype = InvalidOid;
3466  /* needed when bootstrapping: */
3467  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3468 
3469  /* set up persistence and relcache fields dependent on it */
3470  rel->rd_rel->relpersistence = relpersistence;
3471  switch (relpersistence)
3472  {
3473  case RELPERSISTENCE_UNLOGGED:
3474  case RELPERSISTENCE_PERMANENT:
3476  rel->rd_islocaltemp = false;
3477  break;
3478  case RELPERSISTENCE_TEMP:
3479  Assert(isTempOrTempToastNamespace(relnamespace));
3481  rel->rd_islocaltemp = true;
3482  break;
3483  default:
3484  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3485  break;
3486  }
3487 
3488  /* if it's a materialized view, it's not populated initially */
3489  if (relkind == RELKIND_MATVIEW)
3490  rel->rd_rel->relispopulated = false;
3491  else
3492  rel->rd_rel->relispopulated = true;
3493 
3494  /* set replica identity -- system catalogs and non-tables don't have one */
3495  if (!IsCatalogNamespace(relnamespace) &&
3496  (relkind == RELKIND_RELATION ||
3497  relkind == RELKIND_MATVIEW ||
3498  relkind == RELKIND_PARTITIONED_TABLE))
3499  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3500  else
3501  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3502 
3503  /*
3504  * Insert relation physical and logical identifiers (OIDs) into the right
3505  * places. For a mapped relation, we set relfilenode to zero and rely on
3506  * RelationInitPhysicalAddr to consult the map.
3507  */
3508  rel->rd_rel->relisshared = shared_relation;
3509 
3510  RelationGetRelid(rel) = relid;
3511 
3512  for (i = 0; i < natts; i++)
3513  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3514 
3515  rel->rd_rel->reltablespace = reltablespace;
3516 
3517  if (mapped_relation)
3518  {
3519  rel->rd_rel->relfilenode = InvalidOid;
3520  /* Add it to the active mapping information */
3521  RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3522  }
3523  else
3524  rel->rd_rel->relfilenode = relfilenode;
3525 
3526  RelationInitLockInfo(rel); /* see lmgr.c */
3527 
3529 
3530  rel->rd_rel->relam = accessmtd;
3531 
3532  if (relkind == RELKIND_RELATION ||
3533  relkind == RELKIND_SEQUENCE ||
3534  relkind == RELKIND_TOASTVALUE ||
3535  relkind == RELKIND_MATVIEW)
3537 
3538  /*
3539  * Okay to insert into the relcache hash table.
3540  *
3541  * Ordinarily, there should certainly not be an existing hash entry for
3542  * the same OID; but during bootstrap, when we create a "real" relcache
3543  * entry for one of the bootstrap relations, we'll be overwriting the
3544  * phony one created with formrdesc. So allow that to happen for nailed
3545  * rels.
3546  */
3547  RelationCacheInsert(rel, nailit);
3548 
3549  /*
3550  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3551  * can't do this before storing relid in it.
3552  */
3553  EOXactListAdd(rel);
3554 
3555  /*
3556  * done building relcache entry.
3557  */
3558  MemoryContextSwitchTo(oldcxt);
3559 
3560  /* It's fully valid */
3561  rel->rd_isvalid = true;
3562 
3563  /*
3564  * Caller expects us to pin the returned entry.
3565  */
3567 
3568  return rel;
3569 }
3570 
3571 
3572 /*
3573  * RelationSetNewRelfilenode
3574  *
3575  * Assign a new relfilenode (physical file name), and possibly a new
3576  * persistence setting, to the relation.
3577  *
3578  * This allows a full rewrite of the relation to be done with transactional
3579  * safety (since the filenode assignment can be rolled back). Note however
3580  * that there is no simple way to access the relation's old data for the
3581  * remainder of the current transaction. This limits the usefulness to cases
3582  * such as TRUNCATE or rebuilding an index from scratch.
3583  *
3584  * Caller must already hold exclusive lock on the relation.
3585  */
3586 void
3587 RelationSetNewRelfilenode(Relation relation, char persistence)
3588 {
3589  Oid newrelfilenode;
3590  Relation pg_class;
3591  HeapTuple tuple;
3592  Form_pg_class classform;
3593  MultiXactId minmulti = InvalidMultiXactId;
3594  TransactionId freezeXid = InvalidTransactionId;
3595  RelFileNode newrnode;
3596 
3597  /* Allocate a new relfilenode */
3598  newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3599  persistence);
3600 
3601  /*
3602  * Get a writable copy of the pg_class tuple for the given relation.
3603  */
3604  pg_class = table_open(RelationRelationId, RowExclusiveLock);
3605 
3606  tuple = SearchSysCacheCopy1(RELOID,
3607  ObjectIdGetDatum(RelationGetRelid(relation)));
3608  if (!HeapTupleIsValid(tuple))
3609  elog(ERROR, "could not find tuple for relation %u",
3610  RelationGetRelid(relation));
3611  classform = (Form_pg_class) GETSTRUCT(tuple);
3612 
3613  /*
3614  * Schedule unlinking of the old storage at transaction commit.
3615  */
3616  RelationDropStorage(relation);
3617 
3618  /*
3619  * Create storage for the main fork of the new relfilenode. If it's a
3620  * table-like object, call into the table AM to do so, which'll also
3621  * create the table's init fork if needed.
3622  *
3623  * NOTE: If relevant for the AM, any conflict in relfilenode value will be
3624  * caught here, if GetNewRelFileNode messes up for any reason.
3625  */
3626  newrnode = relation->rd_node;
3627  newrnode.relNode = newrelfilenode;
3628 
3629  switch (relation->rd_rel->relkind)
3630  {
3631  case RELKIND_INDEX:
3632  case RELKIND_SEQUENCE:
3633  {
3634  /* handle these directly, at least for now */
3635  SMgrRelation srel;
3636 
3637  srel = RelationCreateStorage(newrnode, persistence);
3638  smgrclose(srel);
3639  }
3640  break;
3641 
3642  case RELKIND_RELATION:
3643  case RELKIND_TOASTVALUE:
3644  case RELKIND_MATVIEW:
3645  table_relation_set_new_filenode(relation, &newrnode,
3646  persistence,
3647  &freezeXid, &minmulti);
3648  break;
3649 
3650  default:
3651  /* we shouldn't be called for anything else */
3652  elog(ERROR, "relation \"%s\" does not have storage",
3653  RelationGetRelationName(relation));
3654  break;
3655  }
3656 
3657  /*
3658  * If we're dealing with a mapped index, pg_class.relfilenode doesn't
3659  * change; instead we have to send the update to the relation mapper.
3660  *
3661  * For mapped indexes, we don't actually change the pg_class entry at all;
3662  * this is essential when reindexing pg_class itself. That leaves us with
3663  * possibly-inaccurate values of relpages etc, but those will be fixed up
3664  * later.
3665  */
3666  if (RelationIsMapped(relation))
3667  {
3668  /* This case is only supported for indexes */
3669  Assert(relation->rd_rel->relkind == RELKIND_INDEX);
3670 
3671  /* Since we're not updating pg_class, these had better not change */
3672  Assert(classform->relfrozenxid == freezeXid);
3673  Assert(classform->relminmxid == minmulti);
3674  Assert(classform->relpersistence == persistence);
3675 
3676  /*
3677  * In some code paths it's possible that the tuple update we'd
3678  * otherwise do here is the only thing that would assign an XID for
3679  * the current transaction. However, we must have an XID to delete
3680  * files, so make sure one is assigned.
3681  */
3682  (void) GetCurrentTransactionId();
3683 
3684  /* Do the deed */
3686  newrelfilenode,
3687  relation->rd_rel->relisshared,
3688  false);
3689 
3690  /* Since we're not updating pg_class, must trigger inval manually */
3691  CacheInvalidateRelcache(relation);
3692  }
3693  else
3694  {
3695  /* Normal case, update the pg_class entry */
3696  classform->relfilenode = newrelfilenode;
3697 
3698  /* relpages etc. never change for sequences */
3699  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3700  {
3701  classform->relpages = 0; /* it's empty until further notice */
3702  classform->reltuples = -1;
3703  classform->relallvisible = 0;
3704  }
3705  classform->relfrozenxid = freezeXid;
3706  classform->relminmxid = minmulti;
3707  classform->relpersistence = persistence;
3708 
3709  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3710  }
3711 
3712  heap_freetuple(tuple);
3713 
3714  table_close(pg_class, RowExclusiveLock);
3715 
3716  /*
3717  * Make the pg_class row change or relation map change visible. This will
3718  * cause the relcache entry to get updated, too.
3719  */
3721 
3722  RelationAssumeNewRelfilenode(relation);
3723 }
3724 
3725 /*
3726  * RelationAssumeNewRelfilenode
3727  *
3728  * Code that modifies pg_class.reltablespace or pg_class.relfilenode must call
3729  * this. The call shall precede any code that might insert WAL records whose
3730  * replay would modify bytes in the new RelFileNode, and the call shall follow
3731  * any WAL modifying bytes in the prior RelFileNode. See struct RelationData.
3732  * Ideally, call this as near as possible to the CommandCounterIncrement()
3733  * that makes the pg_class change visible (before it or after it); that
3734  * minimizes the chance of future development adding a forbidden WAL insertion
3735  * between RelationAssumeNewRelfilenode() and CommandCounterIncrement().
3736  */
3737 void
3739 {
3742  relation->rd_firstRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
3743 
3744  /* Flag relation as needing eoxact cleanup (to clear these fields) */
3745  EOXactListAdd(relation);
3746 }
3747 
3748 
3749 /*
3750  * RelationCacheInitialize
3751  *
3752  * This initializes the relation descriptor cache. At the time
3753  * that this is invoked, we can't do database access yet (mainly
3754  * because the transaction subsystem is not up); all we are doing
3755  * is making an empty cache hashtable. This must be done before
3756  * starting the initialization transaction, because otherwise
3757  * AtEOXact_RelationCache would crash if that transaction aborts
3758  * before we can get the relcache set up.
3759  */
3760 
3761 #define INITRELCACHESIZE 400
3762 
3763 void
3765 {
3766  HASHCTL ctl;
3767 
3768  /*
3769  * make sure cache memory context exists
3770  */
3771  if (!CacheMemoryContext)
3773 
3774  /*
3775  * create hashtable that indexes the relcache
3776  */
3777  MemSet(&ctl, 0, sizeof(ctl));
3778  ctl.keysize = sizeof(Oid);
3779  ctl.entrysize = sizeof(RelIdCacheEnt);
3780  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3781  &ctl, HASH_ELEM | HASH_BLOBS);
3782 
3783  /*
3784  * relation mapper needs to be initialized too
3785  */
3787 }
3788 
3789 /*
3790  * RelationCacheInitializePhase2
3791  *
3792  * This is called to prepare for access to shared catalogs during startup.
3793  * We must at least set up nailed reldescs for pg_database, pg_authid,
3794  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3795  * for their indexes, too. We attempt to load this information from the
3796  * shared relcache init file. If that's missing or broken, just make
3797  * phony entries for the catalogs themselves.
3798  * RelationCacheInitializePhase3 will clean up as needed.
3799  */
3800 void
3802 {
3803  MemoryContext oldcxt;
3804 
3805  /*
3806  * relation mapper needs initialized too
3807  */
3809 
3810  /*
3811  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3812  * nothing.
3813  */
3815  return;
3816 
3817  /*
3818  * switch to cache memory context
3819  */
3821 
3822  /*
3823  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3824  * the cache with pre-made descriptors for the critical shared catalogs.
3825  */
3826  if (!load_relcache_init_file(true))
3827  {
3828  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3829  Natts_pg_database, Desc_pg_database);
3830  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3831  Natts_pg_authid, Desc_pg_authid);
3832  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3833  Natts_pg_auth_members, Desc_pg_auth_members);
3834  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3835  Natts_pg_shseclabel, Desc_pg_shseclabel);
3836  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3837  Natts_pg_subscription, Desc_pg_subscription);
3838 
3839 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3840  }
3841 
3842  MemoryContextSwitchTo(oldcxt);
3843 }
3844 
3845 /*
3846  * RelationCacheInitializePhase3
3847  *
3848  * This is called as soon as the catcache and transaction system
3849  * are functional and we have determined MyDatabaseId. At this point
3850  * we can actually read data from the database's system catalogs.
3851  * We first try to read pre-computed relcache entries from the local
3852  * relcache init file. If that's missing or broken, make phony entries
3853  * for the minimum set of nailed-in-cache relations. Then (unless
3854  * bootstrapping) make sure we have entries for the critical system
3855  * indexes. Once we've done all this, we have enough infrastructure to
3856  * open any system catalog or use any catcache. The last step is to
3857  * rewrite the cache files if needed.
3858  */
3859 void
3861 {
3863  RelIdCacheEnt *idhentry;
3864  MemoryContext oldcxt;
3865  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3866 
3867  /*
3868  * relation mapper needs initialized too
3869  */
3871 
3872  /*
3873  * switch to cache memory context
3874  */
3876 
3877  /*
3878  * Try to load the local relcache cache file. If unsuccessful, bootstrap
3879  * the cache with pre-made descriptors for the critical "nailed-in" system
3880  * catalogs.
3881  */
3882  if (IsBootstrapProcessingMode() ||
3883  !load_relcache_init_file(false))
3884  {
3885  needNewCacheFile = true;
3886 
3887  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3888  Natts_pg_class, Desc_pg_class);
3889  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3890  Natts_pg_attribute, Desc_pg_attribute);
3891  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3892  Natts_pg_proc, Desc_pg_proc);
3893  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3894  Natts_pg_type, Desc_pg_type);
3895 
3896 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3897  }
3898 
3899  MemoryContextSwitchTo(oldcxt);
3900 
3901  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3903  return;
3904 
3905  /*
3906  * If we didn't get the critical system indexes loaded into relcache, do
3907  * so now. These are critical because the catcache and/or opclass cache
3908  * depend on them for fetches done during relcache load. Thus, we have an
3909  * infinite-recursion problem. We can break the recursion by doing
3910  * heapscans instead of indexscans at certain key spots. To avoid hobbling
3911  * performance, we only want to do that until we have the critical indexes
3912  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3913  * decide whether to do heapscan or indexscan at the key spots, and we set
3914  * it true after we've loaded the critical indexes.
3915  *
3916  * The critical indexes are marked as "nailed in cache", partly to make it
3917  * easy for load_relcache_init_file to count them, but mainly because we
3918  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3919  * true. (NOTE: perhaps it would be possible to reload them by
3920  * temporarily setting criticalRelcachesBuilt to false again. For now,
3921  * though, we just nail 'em in.)
3922  *
3923  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3924  * in the same way as the others, because the critical catalogs don't
3925  * (currently) have any rules or triggers, and so these indexes can be
3926  * rebuilt without inducing recursion. However they are used during
3927  * relcache load when a rel does have rules or triggers, so we choose to
3928  * nail them for performance reasons.
3929  */
3931  {
3933  RelationRelationId);
3935  AttributeRelationId);
3937  IndexRelationId);
3939  OperatorClassRelationId);
3941  AccessMethodProcedureRelationId);
3943  RewriteRelationId);
3945  TriggerRelationId);
3946 
3947 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3948 
3949  criticalRelcachesBuilt = true;
3950  }
3951 
3952  /*
3953  * Process critical shared indexes too.
3954  *
3955  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3956  * initial lookup of MyDatabaseId, without which we'll never find any
3957  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3958  * database OID, so it instead depends on DatabaseOidIndexId. We also
3959  * need to nail up some indexes on pg_authid and pg_auth_members for use
3960  * during client authentication. SharedSecLabelObjectIndexId isn't
3961  * critical for the core system, but authentication hooks might be
3962  * interested in it.
3963  */
3965  {
3967  DatabaseRelationId);
3969  DatabaseRelationId);
3971  AuthIdRelationId);
3973  AuthIdRelationId);
3975  AuthMemRelationId);
3977  SharedSecLabelRelationId);
3978 
3979 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3980 
3982  }
3983 
3984  /*
3985  * Now, scan all the relcache entries and update anything that might be
3986  * wrong in the results from formrdesc or the relcache cache file. If we
3987  * faked up relcache entries using formrdesc, then read the real pg_class
3988  * rows and replace the fake entries with them. Also, if any of the
3989  * relcache entries have rules, triggers, or security policies, load that
3990  * info the hard way since it isn't recorded in the cache file.
3991  *
3992  * Whenever we access the catalogs to read data, there is a possibility of
3993  * a shared-inval cache flush causing relcache entries to be removed.
3994  * Since hash_seq_search only guarantees to still work after the *current*
3995  * entry is removed, it's unsafe to continue the hashtable scan afterward.
3996  * We handle this by restarting the scan from scratch after each access.
3997  * This is theoretically O(N^2), but the number of entries that actually
3998  * need to be fixed is small enough that it doesn't matter.
3999  */
4000  hash_seq_init(&status, RelationIdCache);
4001 
4002  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
4003  {
4004  Relation relation = idhentry->reldesc;
4005  bool restart = false;
4006 
4007  /*
4008  * Make sure *this* entry doesn't get flushed while we work with it.
4009  */
4011 
4012  /*
4013  * If it's a faked-up entry, read the real pg_class tuple.
4014  */
4015  if (relation->rd_rel->relowner == InvalidOid)
4016  {
4017  HeapTuple htup;
4018  Form_pg_class relp;
4019 
4020  htup = SearchSysCache1(RELOID,
4021  ObjectIdGetDatum(RelationGetRelid(relation)));
4022  if (!HeapTupleIsValid(htup))
4023  elog(FATAL, "cache lookup failed for relation %u",
4024  RelationGetRelid(relation));
4025  relp = (Form_pg_class) GETSTRUCT(htup);
4026 
4027  /*
4028  * Copy tuple to relation->rd_rel. (See notes in
4029  * AllocateRelationDesc())
4030  */
4031  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
4032 
4033  /* Update rd_options while we have the tuple */
4034  if (relation->rd_options)
4035  pfree(relation->rd_options);
4036  RelationParseRelOptions(relation, htup);
4037 
4038  /*
4039  * Check the values in rd_att were set up correctly. (We cannot
4040  * just copy them over now: formrdesc must have set up the rd_att
4041  * data correctly to start with, because it may already have been
4042  * copied into one or more catcache entries.)
4043  */
4044  Assert(relation->rd_att->tdtypeid == relp->reltype);
4045  Assert(relation->rd_att->tdtypmod == -1);
4046 
4047  ReleaseSysCache(htup);
4048 
4049  /* relowner had better be OK now, else we'll loop forever */
4050  if (relation->rd_rel->relowner == InvalidOid)
4051  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
4052  RelationGetRelationName(relation));
4053 
4054  restart = true;
4055  }
4056 
4057  /*
4058  * Fix data that isn't saved in relcache cache file.
4059  *
4060  * relhasrules or relhastriggers could possibly be wrong or out of
4061  * date. If we don't actually find any rules or triggers, clear the
4062  * local copy of the flag so that we don't get into an infinite loop
4063  * here. We don't make any attempt to fix the pg_class entry, though.
4064  */
4065  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
4066  {
4067  RelationBuildRuleLock(relation);
4068  if (relation->rd_rules == NULL)
4069  relation->rd_rel->relhasrules = false;
4070  restart = true;
4071  }
4072  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
4073  {
4074  RelationBuildTriggers(relation);
4075  if (relation->trigdesc == NULL)
4076  relation->rd_rel->relhastriggers = false;
4077  restart = true;
4078  }
4079 
4080  /*
4081  * Re-load the row security policies if the relation has them, since
4082  * they are not preserved in the cache. Note that we can never NOT
4083  * have a policy while relrowsecurity is true,
4084  * RelationBuildRowSecurity will create a single default-deny policy
4085  * if there is no policy defined in pg_policy.
4086  */
4087  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
4088  {
4089  RelationBuildRowSecurity(relation);
4090 
4091  Assert(relation->rd_rsdesc != NULL);
4092  restart = true;
4093  }
4094 
4095  /* Reload tableam data if needed */
4096  if (relation->rd_tableam == NULL &&
4097  (relation->rd_rel->relkind == RELKIND_RELATION ||
4098  relation->rd_rel->relkind == RELKIND_SEQUENCE ||
4099  relation->rd_rel->relkind == RELKIND_TOASTVALUE ||
4100  relation->rd_rel->relkind == RELKIND_MATVIEW))
4101  {
4103  Assert(relation->rd_tableam != NULL);
4104 
4105  restart = true;
4106  }
4107 
4108  /* Release hold on the relation */
4110 
4111  /* Now, restart the hashtable scan if needed */
4112  if (restart)
4113  {
4114  hash_seq_term(&status);
4115  hash_seq_init(&status, RelationIdCache);
4116  }
4117  }
4118 
4119  /*
4120  * Lastly, write out new relcache cache files if needed. We don't bother
4121  * to distinguish cases where only one of the two needs an update.
4122  */
4123  if (needNewCacheFile)
4124  {
4125  /*
4126  * Force all the catcaches to finish initializing and thereby open the
4127  * catalogs and indexes they use. This will preload the relcache with
4128  * entries for all the most important system catalogs and indexes, so
4129  * that the init files will be most useful for future backends.
4130  */
4132 
4133  /* now write the files */
4135  write_relcache_init_file(false);
4136  }
4137 }
4138 
4139 /*
4140  * Load one critical system index into the relcache
4141  *
4142  * indexoid is the OID of the target index, heapoid is the OID of the catalog
4143  * it belongs to.
4144  */
4145 static void
4146 load_critical_index(Oid indexoid, Oid heapoid)
4147 {
4148  Relation ird;
4149 
4150  /*
4151  * We must lock the underlying catalog before locking the index to avoid
4152  * deadlock, since RelationBuildDesc might well need to read the catalog,
4153  * and if anyone else is exclusive-locking this catalog and index they'll
4154  * be doing it in that order.
4155  */
4156  LockRelationOid(heapoid, AccessShareLock);
4157  LockRelationOid(indexoid, AccessShareLock);
4158  ird = RelationBuildDesc(indexoid, true);
4159  if (ird == NULL)
4160  elog(PANIC, "could not open critical system index %u", indexoid);
4161  ird->rd_isnailed = true;
4162  ird->rd_refcnt = 1;
4165 
4166  (void) RelationGetIndexAttOptions(ird, false);
4167 }
4168 
4169 /*
4170  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
4171  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
4172  *
4173  * We need this kluge because we have to be able to access non-fixed-width
4174  * fields of pg_class and pg_index before we have the standard catalog caches
4175  * available. We use predefined data that's set up in just the same way as
4176  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
4177  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
4178  * does it have a TupleConstr field. But it's good enough for the purpose of
4179  * extracting fields.
4180  */
4181 static TupleDesc
4183 {
4184  TupleDesc result;
4185  MemoryContext oldcxt;
4186  int i;
4187 
4189 
4190  result = CreateTemplateTupleDesc(natts);
4191  result->tdtypeid = RECORDOID; /* not right, but we don't care */
4192  result->tdtypmod = -1;
4193 
4194  for (i = 0; i < natts; i++)
4195  {
4196  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
4197  /* make sure attcacheoff is valid */
4198  TupleDescAttr(result, i)->attcacheoff = -1;
4199  }
4200 
4201  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
4202  TupleDescAttr(result, 0)->attcacheoff = 0;
4203 
4204  /* Note: we don't bother to set up a TupleConstr entry */
4205 
4206  MemoryContextSwitchTo(oldcxt);
4207 
4208  return result;
4209 }
4210 
4211 static TupleDesc
4213 {
4214  static TupleDesc pgclassdesc = NULL;
4215 
4216  /* Already done? */
4217  if (pgclassdesc == NULL)
4218  pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
4219  Desc_pg_class);
4220 
4221  return pgclassdesc;
4222 }
4223 
4224 static TupleDesc
4226 {
4227  static TupleDesc pgindexdesc = NULL;
4228 
4229  /* Already done? */
4230  if (pgindexdesc == NULL)
4231  pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4232  Desc_pg_index);
4233 
4234  return pgindexdesc;
4235 }
4236 
4237 /*
4238  * Load any default attribute value definitions for the relation.
4239  */
4240 static void
4242 {
4243  AttrDefault *attrdef = relation->rd_att->constr->defval;
4244  int ndef = relation->rd_att->constr->num_defval;
4245  Relation adrel;
4246  SysScanDesc adscan;
4247  ScanKeyData skey;
4248  HeapTuple htup;
4249  Datum val;
4250  bool isnull;
4251  int found;
4252  int i;
4253 
4254  ScanKeyInit(&skey,
4255  Anum_pg_attrdef_adrelid,
4256  BTEqualStrategyNumber, F_OIDEQ,
4257  ObjectIdGetDatum(RelationGetRelid(relation)));
4258 
4259  adrel = table_open(AttrDefaultRelationId, AccessShareLock);
4260  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4261  NULL, 1, &skey);
4262  found = 0;
4263 
4264  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4265  {
4266  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4267  Form_pg_attribute attr = TupleDescAttr(relation->rd_att, adform->adnum - 1);
4268 
4269  for (i = 0; i < ndef; i++)
4270  {
4271  if (adform->adnum != attrdef[i].adnum)
4272  continue;
4273  if (attrdef[i].adbin != NULL)
4274  elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4275  NameStr(attr->attname),
4276  RelationGetRelationName(relation));
4277  else
4278  found++;
4279 
4280  val = fastgetattr(htup,
4281  Anum_pg_attrdef_adbin,
4282  adrel->rd_att, &isnull);
4283  if (isnull)
4284  elog(WARNING, "null adbin for attr %s of rel %s",
4285  NameStr(attr->attname),
4286  RelationGetRelationName(relation));
4287  else
4288  {
4289  /* detoast and convert to cstring in caller's context */
4290  char *s = TextDatumGetCString(val);
4291 
4293  pfree(s);
4294  }
4295  break;
4296  }
4297 
4298  if (i >= ndef)
4299  elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4300  adform->adnum, RelationGetRelationName(relation));
4301  }
4302 
4303  systable_endscan(adscan);
4304  table_close(adrel, AccessShareLock);
4305 }
4306 
4307 /*
4308  * Load any check constraints for the relation.
4309  */
4310 static void
4312 {
4313  ConstrCheck *check = relation->rd_att->constr->check;
4314  int ncheck = relation->rd_att->constr->num_check;
4315  Relation conrel;
4316  SysScanDesc conscan;
4317  ScanKeyData skey[1];
4318  HeapTuple htup;
4319  int found = 0;
4320 
4321  ScanKeyInit(&skey[0],
4322  Anum_pg_constraint_conrelid,
4323  BTEqualStrategyNumber, F_OIDEQ,
4324  ObjectIdGetDatum(RelationGetRelid(relation)));
4325 
4326  conrel = table_open(ConstraintRelationId, AccessShareLock);
4327  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4328  NULL, 1, skey);
4329 
4330  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4331  {
4333  Datum val;
4334  bool isnull;
4335  char *s;
4336 
4337  /* We want check constraints only */
4338  if (conform->contype != CONSTRAINT_CHECK)
4339  continue;
4340 
4341  if (found >= ncheck)
4342  elog(ERROR, "unexpected constraint record found for rel %s",
4343  RelationGetRelationName(relation));
4344 
4345  check[found].ccvalid = conform->convalidated;
4346  check[found].ccnoinherit = conform->connoinherit;
4348  NameStr(conform->conname));
4349 
4350  /* Grab and test conbin is actually set */
4351  val = fastgetattr(htup,
4352  Anum_pg_constraint_conbin,
4353  conrel->rd_att, &isnull);
4354  if (isnull)
4355  elog(ERROR, "null conbin for rel %s",
4356  RelationGetRelationName(relation));
4357 
4358  /* detoast and convert to cstring in caller's context */
4359  s = TextDatumGetCString(val);
4360  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4361  pfree(s);
4362 
4363  found++;
4364  }
4365 
4366  systable_endscan(conscan);
4367  table_close(conrel, AccessShareLock);
4368 
4369  if (found != ncheck)
4370  elog(ERROR, "%d constraint record(s) missing for rel %s",
4371  ncheck - found, RelationGetRelationName(relation));
4372 
4373  /* Sort the records so that CHECKs are applied in a deterministic order */
4374  if (ncheck > 1)
4375  qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4376 }
4377 
4378 /*
4379  * qsort comparator to sort ConstrCheck entries by name
4380  */
4381 static int
4382 CheckConstraintCmp(const void *a, const void *b)
4383 {
4384  const ConstrCheck *ca = (const ConstrCheck *) a;
4385  const ConstrCheck *cb = (const ConstrCheck *) b;
4386 
4387  return strcmp(ca->ccname, cb->ccname);
4388 }
4389 
4390 /*
4391  * RelationGetFKeyList -- get a list of foreign key info for the relation
4392  *
4393  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4394  * the given relation. This data is a direct copy of relevant fields from
4395  * pg_constraint. The list items are in no particular order.
4396  *
4397  * CAUTION: the returned list is part of the relcache's data, and could
4398  * vanish in a relcache entry reset. Callers must inspect or copy it
4399  * before doing anything that might trigger a cache flush, such as
4400  * system catalog accesses. copyObject() can be used if desired.
4401  * (We define it this way because current callers want to filter and
4402  * modify the list entries anyway, so copying would be a waste of time.)
4403  */
4404 List *
4406 {
4407  List *result;
4408  Relation conrel;
4409  SysScanDesc conscan;
4410  ScanKeyData skey;
4411  HeapTuple htup;
4412  List *oldlist;
4413  MemoryContext oldcxt;
4414 
4415  /* Quick exit if we already computed the list. */
4416  if (relation->rd_fkeyvalid)
4417  return relation->rd_fkeylist;
4418 
4419  /* Fast path: non-partitioned tables without triggers can't have FKs */
4420  if (!relation->rd_rel->relhastriggers &&
4421  relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
4422  return NIL;
4423 
4424  /*
4425  * We build the list we intend to return (in the caller's context) while
4426  * doing the scan. After successfully completing the scan, we copy that
4427  * list into the relcache entry. This avoids cache-context memory leakage
4428  * if we get some sort of error partway through.
4429  */
4430  result = NIL;
4431 
4432  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4433  ScanKeyInit(&skey,
4434  Anum_pg_constraint_conrelid,
4435  BTEqualStrategyNumber, F_OIDEQ,
4436  ObjectIdGetDatum(RelationGetRelid(relation)));
4437 
4438  conrel = table_open(ConstraintRelationId, AccessShareLock);
4439  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4440  NULL, 1, &skey);
4441 
4442  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4443  {
4444  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4445  ForeignKeyCacheInfo *info;
4446 
4447  /* consider only foreign keys */
4448  if (constraint->contype != CONSTRAINT_FOREIGN)
4449  continue;
4450 
4451  info = makeNode(ForeignKeyCacheInfo);
4452  info->conoid = constraint->oid;
4453  info->conrelid = constraint->conrelid;
4454  info->confrelid = constraint->confrelid;
4455 
4456  DeconstructFkConstraintRow(htup, &info->nkeys,
4457  info->conkey,
4458  info->confkey,
4459  info->conpfeqop,
4460  NULL, NULL);
4461 
4462  /* Add FK's node to the result list */
4463  result = lappend(result, info);
4464  }
4465 
4466  systable_endscan(conscan);
4467  table_close(conrel, AccessShareLock);
4468 
4469  /* Now save a copy of the completed list in the relcache entry. */
4471  oldlist = relation->rd_fkeylist;
4472  relation->rd_fkeylist = copyObject(result);
4473  relation->rd_fkeyvalid = true;
4474  MemoryContextSwitchTo(oldcxt);
4475 
4476  /* Don't leak the old list, if there is one */
4477  list_free_deep(oldlist);
4478 
4479  return result;
4480 }
4481 
4482 /*
4483  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4484  *
4485  * The index list is created only if someone requests it. We scan pg_index
4486  * to find relevant indexes, and add the list to the relcache entry so that
4487  * we won't have to compute it again. Note that shared cache inval of a
4488  * relcache entry will delete the old list and set rd_indexvalid to false,
4489  * so that we must recompute the index list on next request. This handles
4490  * creation or deletion of an index.
4491  *
4492  * Indexes that are marked not indislive are omitted from the returned list.
4493  * Such indexes are expected to be dropped momentarily, and should not be
4494  * touched at all by any caller of this function.
4495  *
4496  * The returned list is guaranteed to be sorted in order by OID. This is
4497  * needed by the executor, since for index types that we obtain exclusive
4498  * locks on when updating the index, all backends must lock the indexes in
4499  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4500  * consistent ordering would do, but ordering by OID is easy.
4501  *
4502  * Since shared cache inval causes the relcache's copy of the list to go away,
4503  * we return a copy of the list palloc'd in the caller's context. The caller
4504  * may list_free() the returned list after scanning it. This is necessary
4505  * since the caller will typically be doing syscache lookups on the relevant
4506  * indexes, and syscache lookup could cause SI messages to be processed!
4507  *
4508  * In exactly the same way, we update rd_pkindex, which is the OID of the
4509  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4510  * which is the pg_class OID of an index to be used as the relation's
4511  * replication identity index, or InvalidOid if there is no such index.
4512  */
4513 List *
4515 {
4516  Relation indrel;
4517  SysScanDesc indscan;
4518  ScanKeyData skey;
4519  HeapTuple htup;
4520  List *result;
4521  List *oldlist;
4522  char replident = relation->rd_rel->relreplident;
4523  Oid pkeyIndex = InvalidOid;
4524  Oid candidateIndex = InvalidOid;
4525  MemoryContext oldcxt;
4526 
4527  /* Quick exit if we already computed the list. */
4528  if (relation->rd_indexvalid)
4529  return list_copy(relation->rd_indexlist);
4530 
4531  /*
4532  * We build the list we intend to return (in the caller's context) while
4533  * doing the scan. After successfully completing the scan, we copy that
4534  * list into the relcache entry. This avoids cache-context memory leakage
4535  * if we get some sort of error partway through.
4536  */
4537  result = NIL;
4538 
4539  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4540  ScanKeyInit(&skey,
4541  Anum_pg_index_indrelid,
4542  BTEqualStrategyNumber, F_OIDEQ,
4543  ObjectIdGetDatum(RelationGetRelid(relation)));
4544 
4545  indrel = table_open(IndexRelationId, AccessShareLock);
4546  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4547  NULL, 1, &skey);
4548 
4549  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4550  {
4552 
4553  /*
4554  * Ignore any indexes that are currently being dropped. This will
4555  * prevent them from being searched, inserted into, or considered in
4556  * HOT-safety decisions. It's unsafe to touch such an index at all
4557  * since its catalog entries could disappear at any instant.
4558  */
4559  if (!index->indislive)
4560  continue;
4561 
4562  /* add index's OID to result list */
4563  result = lappend_oid(result, index->indexrelid);
4564 
4565  /*
4566  * Invalid, non-unique, non-immediate or predicate indexes aren't
4567  * interesting for either oid indexes or replication identity indexes,
4568  * so don't check them.
4569  */
4570  if (!index->indisvalid || !index->indisunique ||
4571  !index->indimmediate ||
4572  !heap_attisnull(htup, Anum_pg_index_indpred, NULL))
4573  continue;
4574 
4575  /* remember primary key index if any */
4576  if (index->indisprimary)
4577  pkeyIndex = index->indexrelid;
4578 
4579  /* remember explicitly chosen replica index */
4580  if (index->indisreplident)
4581  candidateIndex = index->indexrelid;
4582  }
4583 
4584  systable_endscan(indscan);
4585 
4586  table_close(indrel, AccessShareLock);
4587 
4588  /* Sort the result list into OID order, per API spec. */
4589  list_sort(result, list_oid_cmp);
4590 
4591  /* Now save a copy of the completed list in the relcache entry. */
4593  oldlist = relation->rd_indexlist;
4594  relation->rd_indexlist = list_copy(result);
4595  relation->rd_pkindex = pkeyIndex;
4596  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4597  relation->rd_replidindex = pkeyIndex;
4598  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4599  relation->rd_replidindex = candidateIndex;
4600  else
4601  relation->rd_replidindex = InvalidOid;
4602  relation->rd_indexvalid = true;
4603  MemoryContextSwitchTo(oldcxt);
4604 
4605  /* Don't leak the old list, if there is one */
4606  list_free(oldlist);
4607 
4608  return result;
4609 }
4610 
4611 /*
4612  * RelationGetStatExtList
4613  * get a list of OIDs of statistics objects on this relation
4614  *
4615  * The statistics list is created only if someone requests it, in a way
4616  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4617  * relevant statistics, and add the list to the relcache entry so that we
4618  * won't have to compute it again. Note that shared cache inval of a
4619  * relcache entry will delete the old list and set rd_statvalid to 0,
4620  * so that we must recompute the statistics list on next request. This
4621  * handles creation or deletion of a statistics object.
4622  *
4623  * The returned list is guaranteed to be sorted in order by OID, although
4624  * this is not currently needed.
4625  *
4626  * Since shared cache inval causes the relcache's copy of the list to go away,
4627  * we return a copy of the list palloc'd in the caller's context. The caller
4628  * may list_free() the returned list after scanning it. This is necessary
4629  * since the caller will typically be doing syscache lookups on the relevant
4630  * statistics, and syscache lookup could cause SI messages to be processed!
4631  */
4632 List *
4634 {
4635  Relation indrel;
4636  SysScanDesc indscan;
4637  ScanKeyData skey;
4638  HeapTuple htup;
4639  List *result;
4640  List *oldlist;
4641  MemoryContext oldcxt;
4642 
4643  /* Quick exit if we already computed the list. */
4644  if (relation->rd_statvalid != 0)
4645  return list_copy(relation->rd_statlist);
4646 
4647  /*
4648  * We build the list we intend to return (in the caller's context) while
4649  * doing the scan. After successfully completing the scan, we copy that
4650  * list into the relcache entry. This avoids cache-context memory leakage
4651  * if we get some sort of error partway through.
4652  */
4653  result = NIL;
4654 
4655  /*
4656  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4657  * rel.
4658  */
4659  ScanKeyInit(&skey,
4660  Anum_pg_statistic_ext_stxrelid,
4661  BTEqualStrategyNumber, F_OIDEQ,
4662  ObjectIdGetDatum(RelationGetRelid(relation)));
4663 
4664  indrel = table_open(StatisticExtRelationId, AccessShareLock);
4665  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4666  NULL, 1, &skey);
4667 
4668  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4669  {
4670  Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;
4671 
4672  result = lappend_oid(result, oid);
4673  }
4674 
4675  systable_endscan(indscan);
4676 
4677  table_close(indrel, AccessShareLock);
4678 
4679  /* Sort the result list into OID order, per API spec. */
4680  list_sort(result, list_oid_cmp);
4681 
4682  /* Now save a copy of the completed list in the relcache entry. */
4684  oldlist = relation->rd_statlist;
4685  relation->rd_statlist = list_copy(result);
4686 
4687  relation->rd_statvalid = true;
4688  MemoryContextSwitchTo(oldcxt);
4689 
4690  /* Don't leak the old list, if there is one */
4691  list_free(oldlist);
4692 
4693  return result;
4694 }
4695 
4696 /*
4697  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4698  *
4699  * Returns InvalidOid if there is no such index.
4700  */
4701 Oid
4703 {
4704  List *ilist;
4705 
4706  if (!relation->rd_indexvalid)
4707  {
4708  /* RelationGetIndexList does the heavy lifting. */
4709  ilist = RelationGetIndexList(relation);
4710  list_free(ilist);
4711  Assert(relation->rd_indexvalid);
4712  }
4713 
4714  return relation->rd_pkindex;
4715 }
4716 
4717 /*
4718  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4719  *
4720  * Returns InvalidOid if there is no such index.
4721  */
4722 Oid
4724 {
4725  List *ilist;
4726 
4727  if (!relation->rd_indexvalid)
4728  {
4729  /* RelationGetIndexList does the heavy lifting. */
4730  ilist = RelationGetIndexList(relation);
4731  list_free(ilist);
4732  Assert(relation->rd_indexvalid);
4733  }
4734 
4735  return relation->rd_replidindex;
4736 }
4737 
4738 /*
4739  * RelationGetIndexExpressions -- get the index expressions for an index
4740  *
4741  * We cache the result of transforming pg_index.indexprs into a node tree.
4742  * If the rel is not an index or has no expressional columns, we return NIL.
4743  * Otherwise, the returned tree is copied into the caller's memory context.
4744  * (We don't want to return a pointer to the relcache copy, since it could
4745  * disappear due to relcache invalidation.)
4746  */
4747 List *
4749 {
4750  List *result;
4751  Datum exprsDatum;
4752  bool isnull;
4753  char *exprsString;
4754  MemoryContext oldcxt;
4755 
4756  /* Quick exit if we already computed the result. */
4757  if (relation->rd_indexprs)
4758  return copyObject(relation->rd_indexprs);
4759 
4760  /* Quick exit if there is nothing to do. */
4761  if (relation->rd_indextuple == NULL ||
4762  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4763  return NIL;
4764 
4765  /*
4766  * We build the tree we intend to return in the caller's context. After
4767  * successfully completing the work, we copy it into the relcache entry.
4768  * This avoids problems if we get some sort of error partway through.
4769  */
4770  exprsDatum = heap_getattr(relation->rd_indextuple,
4771  Anum_pg_index_indexprs,
4773  &isnull);
4774  Assert(!isnull);
4775  exprsString = TextDatumGetCString(exprsDatum);
4776  result = (List *) stringToNode(exprsString);
4777  pfree(exprsString);
4778 
4779  /*
4780  * Run the expressions through eval_const_expressions. This is not just an
4781  * optimization, but is necessary, because the planner will be comparing
4782  * them to similarly-processed qual clauses, and may fail to detect valid
4783  * matches without this. We must not use canonicalize_qual, however,
4784  * since these aren't qual expressions.
4785  */
4786  result = (List *) eval_const_expressions(NULL, (Node *) result);
4787 
4788  /* May as well fix opfuncids too */
4789  fix_opfuncids((Node *) result);
4790 
4791  /* Now save a copy of the completed tree in the relcache entry. */
4792  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4793  relation->rd_indexprs = copyObject(result);
4794  MemoryContextSwitchTo(oldcxt);
4795 
4796  return result;
4797 }
4798 
4799 /*
4800  * RelationGetDummyIndexExpressions -- get dummy expressions for an index
4801  *
4802  * Return a list of dummy expressions (just Const nodes) with the same
4803  * types/typmods/collations as the index's real expressions. This is
4804  * useful in situations where we don't want to run any user-defined code.
4805  */
4806 List *
4808 {
4809  List *result;
4810  Datum exprsDatum;
4811  bool isnull;
4812  char *exprsString;
4813  List *rawExprs;
4814  ListCell *lc;
4815 
4816  /* Quick exit if there is nothing to do. */
4817  if (relation->rd_indextuple == NULL ||
4818  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4819  return NIL;
4820 
4821  /* Extract raw node tree(s) from index tuple. */
4822  exprsDatum = heap_getattr(relation->rd_indextuple,
4823  Anum_pg_index_indexprs,
4825  &isnull);
4826  Assert(!isnull);
4827  exprsString = TextDatumGetCString(exprsDatum);
4828  rawExprs = (List *) stringToNode(exprsString);
4829  pfree(exprsString);
4830 
4831  /* Construct null Consts; the typlen and typbyval are arbitrary. */
4832  result = NIL;
4833  foreach(lc, rawExprs)
4834  {
4835  Node *rawExpr = (Node *) lfirst(lc);
4836 
4837  result = lappend(result,
4838  makeConst(exprType(rawExpr),
4839  exprTypmod(rawExpr),
4840  exprCollation(rawExpr),
4841  1,
4842  (Datum) 0,
4843  true,
4844  true));
4845  }
4846 
4847  return result;
4848 }
4849 
4850 /*
4851  * RelationGetIndexPredicate -- get the index predicate for an index
4852  *
4853  * We cache the result of transforming pg_index.indpred into an implicit-AND
4854  * node tree (suitable for use in planning).
4855  * If the rel is not an index or has no predicate, we return NIL.
4856  * Otherwise, the returned tree is copied into the caller's memory context.
4857  * (We don't want to return a pointer to the relcache copy, since it could
4858  * disappear due to relcache invalidation.)
4859  */
4860 List *
4862 {
4863  List *result;
4864  Datum predDatum;
4865  bool isnull;
4866  char *predString;
4867  MemoryContext oldcxt;
4868 
4869  /* Quick exit if we already computed the result. */
4870  if (relation->rd_indpred)
4871  return copyObject(relation->rd_indpred);
4872 
4873  /* Quick exit if there is nothing to do. */
4874  if (relation->rd_indextuple == NULL ||
4875  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL))
4876  return NIL;
4877 
4878  /*
4879  * We build the tree we intend to return in the caller's context. After
4880  * successfully completing the work, we copy it into the relcache entry.
4881  * This avoids problems if we get some sort of error partway through.
4882  */
4883  predDatum = heap_getattr(relation->rd_indextuple,
4884  Anum_pg_index_indpred,
4886  &isnull);
4887  Assert(!isnull);
4888  predString = TextDatumGetCString(predDatum);
4889  result = (List *) stringToNode(predString);
4890  pfree(predString);
4891 
4892  /*
4893  * Run the expression through const-simplification and canonicalization.
4894  * This is not just an optimization, but is necessary, because the planner
4895  * will be comparing it to similarly-processed qual clauses, and may fail
4896  * to detect valid matches without this. This must match the processing
4897  * done to qual clauses in preprocess_expression()! (We can skip the
4898  * stuff involving subqueries, however, since we don't allow any in index
4899  * predicates.)
4900  */
4901  result = (List *) eval_const_expressions(NULL, (Node *) result);
4902 
4903  result = (List *) canonicalize_qual((Expr *) result, false);
4904 
4905  /* Also convert to implicit-AND format */
4906  result = make_ands_implicit((Expr *) result);
4907 
4908  /* May as well fix opfuncids too */
4909  fix_opfuncids((Node *) result);
4910 
4911  /* Now save a copy of the completed tree in the relcache entry. */
4912  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4913  relation->rd_indpred = copyObject(result);
4914  MemoryContextSwitchTo(oldcxt);
4915 
4916  return result;
4917 }
4918 
4919 /*
4920  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4921  *
4922  * The result has a bit set for each attribute used anywhere in the index
4923  * definitions of all the indexes on this relation. (This includes not only
4924  * simple index keys, but attributes used in expressions and partial-index
4925  * predicates.)
4926  *
4927  * Depending on attrKind, a bitmap covering the attnums for all index columns,
4928  * for all potential foreign key columns, or for all columns in the configured
4929  * replica identity index is returned.
4930  *
4931  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4932  * we can include system attributes (e.g., OID) in the bitmap representation.
4933  *
4934  * Caller had better hold at least RowExclusiveLock on the target relation
4935  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4936  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4937  * that lock level doesn't guarantee a stable set of indexes, so we have to
4938  * be prepared to retry here in case of a change in the set of indexes.
4939  *
4940  * The returned result is palloc'd in the caller's memory context and should
4941  * be bms_free'd when not needed anymore.
4942  */
4943 Bitmapset *
4945 {
4946  Bitmapset *indexattrs; /* indexed columns */
4947  Bitmapset *uindexattrs; /* columns in unique indexes */
4948  Bitmapset *pkindexattrs; /* columns in the primary index */
4949  Bitmapset *idindexattrs; /* columns in the replica identity */
4950  List *indexoidlist;
4951  List *newindexoidlist;
4952  Oid relpkindex;
4953  Oid relreplindex;
4954  ListCell *l;
4955  MemoryContext oldcxt;
4956 
4957  /* Quick exit if we already computed the result. */
4958  if (relation->rd_indexattr != NULL)
4959  {
4960  switch (attrKind)
4961  {
4962  case INDEX_ATTR_BITMAP_ALL:
4963  return bms_copy(relation->rd_indexattr);
4964  case INDEX_ATTR_BITMAP_KEY:
4965  return bms_copy(relation->rd_keyattr);
4967  return bms_copy(relation->rd_pkattr);
4969  return bms_copy(relation->rd_idattr);
4970  default:
4971  elog(ERROR, "unknown attrKind %u", attrKind);
4972  }
4973  }
4974 
4975  /* Fast path if definitely no indexes */
4976  if (!RelationGetForm(relation)->relhasindex)
4977  return NULL;
4978 
4979  /*
4980  * Get cached list of index OIDs. If we have to start over, we do so here.
4981  */
4982 restart:
4983  indexoidlist = RelationGetIndexList(relation);
4984 
4985  /* Fall out if no indexes (but relhasindex was set) */
4986  if (indexoidlist == NIL)
4987  return NULL;
4988 
4989  /*
4990  * Copy the rd_pkindex and rd_replidindex values computed by
4991  * RelationGetIndexList before proceeding. This is needed because a
4992  * relcache flush could occur inside index_open below, resetting the
4993  * fields managed by RelationGetIndexList. We need to do the work with
4994  * stable values of these fields.
4995  */
4996  relpkindex = relation->rd_pkindex;
4997  relreplindex = relation->rd_replidindex;
4998 
4999  /*
5000  * For each index, add referenced attributes to indexattrs.
5001  *
5002  * Note: we consider all indexes returned by RelationGetIndexList, even if
5003  * they are not indisready or indisvalid. This is important because an
5004  * index for which CREATE INDEX CONCURRENTLY has just started must be
5005  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
5006  * CONCURRENTLY is far enough along that we should ignore the index, it
5007  * won't be returned at all by RelationGetIndexList.
5008  */
5009  indexattrs = NULL;
5010  uindexattrs = NULL;
5011  pkindexattrs = NULL;
5012  idindexattrs = NULL;
5013  foreach(l, indexoidlist)
5014  {
5015  Oid indexOid = lfirst_oid(l);
5016  Relation indexDesc;
5017  Datum datum;
5018  bool isnull;
5019  Node *indexExpressions;
5020  Node *indexPredicate;
5021  int i;
5022  bool isKey; /* candidate key */
5023  bool isPK; /* primary key */
5024  bool isIDKey; /* replica identity index */
5025 
5026  indexDesc = index_open(indexOid, AccessShareLock);
5027 
5028  /*
5029  * Extract index expressions and index predicate. Note: Don't use
5030  * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
5031  * those might run constant expressions evaluation, which needs a
5032  * snapshot, which we might not have here. (Also, it's probably more
5033  * sound to collect the bitmaps before any transformations that might
5034  * eliminate columns, but the practical impact of this is limited.)
5035  */
5036 
5037  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs,
5038  GetPgIndexDescriptor(), &isnull);
5039  if (!isnull)
5040  indexExpressions = stringToNode(TextDatumGetCString(datum));
5041  else
5042  indexExpressions = NULL;
5043 
5044  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred,
5045  GetPgIndexDescriptor(), &isnull);
5046  if (!isnull)
5047  indexPredicate = stringToNode(TextDatumGetCString(datum));
5048  else
5049  indexPredicate = NULL;
5050 
5051  /* Can this index be referenced by a foreign key? */
5052  isKey = indexDesc->rd_index->indisunique &&
5053  indexExpressions == NULL &&
5054  indexPredicate == NULL;
5055 
5056  /* Is this a primary key? */
5057  isPK = (indexOid == relpkindex);
5058 
5059  /* Is this index the configured (or default) replica identity? */
5060  isIDKey = (indexOid == relreplindex);
5061 
5062  /* Collect simple attribute references */
5063  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
5064  {
5065  int attrnum = indexDesc->rd_index->indkey.values[i];
5066 
5067  /*
5068  * Since we have covering indexes with non-key columns, we must
5069  * handle them accurately here. non-key columns must be added into
5070  * indexattrs, since they are in index, and HOT-update shouldn't
5071  * miss them. Obviously, non-key columns couldn't be referenced by
5072  * foreign key or identity key. Hence we do not include them into
5073  * uindexattrs, pkindexattrs and idindexattrs bitmaps.
5074  */
5075  if (attrnum != 0)
5076  {
5077  indexattrs = bms_add_member(indexattrs,
5079 
5080  if (isKey && i < indexDesc->rd_index->indnkeyatts)
5081  uindexattrs = bms_add_member(uindexattrs,
5083 
5084  if (isPK && i < indexDesc->rd_index->indnkeyatts)
5085  pkindexattrs = bms_add_member(pkindexattrs,
5087 
5088  if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
5089  idindexattrs = bms_add_member(idindexattrs,
5091  }
5092  }
5093 
5094  /* Collect all attributes used in expressions, too */
5095  pull_varattnos(indexExpressions, 1, &indexattrs);
5096 
5097  /* Collect all attributes in the index predicate, too */
5098  pull_varattnos(indexPredicate, 1, &indexattrs);
5099 
5100  index_close(indexDesc, AccessShareLock);
5101  }
5102 
5103  /*
5104  * During one of the index_opens in the above loop, we might have received
5105  * a relcache flush event on this relcache entry, which might have been
5106  * signaling a change in the rel's index list. If so, we'd better start
5107  * over to ensure we deliver up-to-date attribute bitmaps.
5108  */
5109  newindexoidlist = RelationGetIndexList(relation);
5110  if (equal(indexoidlist, newindexoidlist) &&
5111  relpkindex == relation->rd_pkindex &&
5112  relreplindex == relation->rd_replidindex)
5113  {
5114  /* Still the same index set, so proceed */
5115  list_free(newindexoidlist);
5116  list_free(indexoidlist);
5117  }
5118  else
5119  {
5120  /* Gotta do it over ... might as well not leak memory */
5121  list_free(newindexoidlist);
5122  list_free(indexoidlist);
5123  bms_free(uindexattrs);
5124  bms_free(pkindexattrs);
5125  bms_free(idindexattrs);
5126  bms_free(indexattrs);
5127 
5128  goto restart;
5129  }
5130 
5131  /* Don't leak the old values of these bitmaps, if any */
5132  bms_free(relation->rd_indexattr);
5133  relation->rd_indexattr = NULL;
5134  bms_free(relation->rd_keyattr);
5135  relation->rd_keyattr = NULL;
5136  bms_free(relation->rd_pkattr);
5137  relation->rd_pkattr = NULL;
5138  bms_free(relation->rd_idattr);
5139  relation->rd_idattr = NULL;
5140 
5141  /*
5142  * Now save copies of the bitmaps in the relcache entry. We intentionally
5143  * set rd_indexattr last, because that's the one that signals validity of
5144  * the values; if we run out of memory before making that copy, we won't
5145  * leave the relcache entry looking like the other ones are valid but
5146  * empty.
5147  */
5149  relation->rd_keyattr = bms_copy(uindexattrs);
5150  relation->rd_pkattr = bms_copy(pkindexattrs);
5151  relation->rd_idattr = bms_copy(idindexattrs);
5152  relation->rd_indexattr = bms_copy(indexattrs);
5153  MemoryContextSwitchTo(oldcxt);
5154 
5155  /* We return our original working copy for caller to play with */
5156  switch (attrKind)
5157  {
5158  case INDEX_ATTR_BITMAP_ALL:
5159  return indexattrs;
5160  case INDEX_ATTR_BITMAP_KEY:
5161  return uindexattrs;
5163  return pkindexattrs;
5165  return idindexattrs;
5166  default:
5167  elog(ERROR, "unknown attrKind %u", attrKind);
5168  return NULL;
5169  }
5170 }
5171 
5172 /*
5173  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5174  *
5175  * This should be called only for an index that is known to have an
5176  * associated exclusion constraint. It returns arrays (palloc'd in caller's
5177  * context) of the exclusion operator OIDs, their underlying functions'
5178  * OIDs, and their strategy numbers in the index's opclasses. We cache
5179  * all this information since it requires a fair amount of work to get.
5180  */
5181 void
5183  Oid **operators,
5184  Oid **procs,
5185  uint16 **strategies)
5186 {
5187  int indnkeyatts;
5188  Oid *ops;
5189  Oid *funcs;
5190  uint16 *strats;
5191  Relation conrel;
5192  SysScanDesc conscan;
5193  ScanKeyData skey[1];
5194  HeapTuple htup;
5195  bool found;
5196  MemoryContext oldcxt;
5197  int i;
5198 
5199  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
5200 
5201  /* Allocate result space in caller context */
5202  *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5203  *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5204  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5205 
5206  /* Quick exit if we have the data cached already */
5207  if (indexRelation->rd_exclstrats != NULL)
5208  {
5209  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts);
5210  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts);
5211  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts);
5212  return;
5213  }
5214 
5215  /*
5216  * Search pg_constraint for the constraint associated with the index. To
5217  * make this not too painfully slow, we use the index on conrelid; that
5218  * will hold the parent relation's OID not the index's own OID.
5219  *
5220  * Note: if we wanted to rely on the constraint name matching the index's
5221  * name, we could just do a direct lookup using pg_constraint's unique
5222  * index. For the moment it doesn't seem worth requiring that.
5223  */
5224  ScanKeyInit(&skey[0],
5225  Anum_pg_constraint_conrelid,
5226  BTEqualStrategyNumber, F_OIDEQ,
5227  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5228 
5229  conrel = table_open(ConstraintRelationId, AccessShareLock);
5230  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
5231  NULL, 1, skey);
5232  found = false;
5233 
5234  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5235  {
5237  Datum val;
5238  bool isnull;
5239  ArrayType *arr;
5240  int nelem;
5241 
5242  /* We want the exclusion constraint owning the index */
5243  if (conform->contype != CONSTRAINT_EXCLUSION ||
5244  conform->conindid != RelationGetRelid(indexRelation))
5245  continue;
5246 
5247  /* There should be only one */
5248  if (found)
5249  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5250  RelationGetRelationName(indexRelation));
5251  found = true;
5252 
5253  /* Extract the operator OIDS from conexclop */
5254  val = fastgetattr(htup,
5255  Anum_pg_constraint_conexclop,
5256  conrel->rd_att, &isnull);
5257  if (isnull)
5258  elog(ERROR, "null conexclop for rel %s",
5259  RelationGetRelationName(indexRelation));
5260 
5261  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5262  nelem = ARR_DIMS(arr)[0];
5263  if (ARR_NDIM(arr) != 1 ||
5264  nelem != indnkeyatts ||
5265  ARR_HASNULL(arr) ||
5266  ARR_ELEMTYPE(arr) != OIDOID)
5267  elog(ERROR, "conexclop is not a 1-D Oid array");
5268 
5269  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts);
5270  }
5271 
5272  systable_endscan(conscan);
5273  table_close(conrel, AccessShareLock);
5274 
5275  if (!found)
5276  elog(ERROR, "exclusion constraint record missing for rel %s",
5277  RelationGetRelationName(indexRelation));
5278 
5279  /* We need the func OIDs and strategy numbers too */
5280  for (i = 0; i < indnkeyatts; i++)
5281  {
5282  funcs[i] = get_opcode(ops[i]);
5283  strats[i] = get_op_opfamily_strategy(ops[i],
5284  indexRelation->rd_opfamily[i]);
5285  /* shouldn't fail, since it was checked at index creation */
5286  if (strats[i] == InvalidStrategy)
5287  elog(ERROR, "could not find strategy for operator %u in family %u",
5288  ops[i], indexRelation->rd_opfamily[i]);
5289  }
5290 
5291  /* Save a copy of the results in the relcache entry. */
5292  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5293  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5294  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5295  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5296  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts);
5297  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts);
5298  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts);
5299  MemoryContextSwitchTo(oldcxt);
5300 }
5301 
5302 /*
5303  * Get publication actions for the given relation.
5304  */
5305 struct PublicationActions *
5307 {
5308  List *puboids;
5309  ListCell *lc;
5310  MemoryContext oldcxt;
5311  PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5312 
5313  /*
5314  * If not publishable, it publishes no actions. (pgoutput_change() will
5315  * ignore it.)
5316  */
5317  if (!is_publishable_relation(relation))
5318  return pubactions;
5319 
5320  if (relation->rd_pubactions)
5321  return memcpy(pubactions, relation->rd_pubactions,
5322  sizeof(PublicationActions));
5323 
5324  /* Fetch the publication membership info. */
5325  puboids = GetRelationPublications(RelationGetRelid(relation));
5326  if (relation->rd_rel->relispartition)
5327  {
5328  /* Add publications that the ancestors are in too. */
5329  List *ancestors = get_partition_ancestors(RelationGetRelid(relation));
5330  ListCell *lc;
5331 
5332  foreach(lc, ancestors)
5333  {
5334  Oid ancestor = lfirst_oid(lc);
5335 
5336  puboids = list_concat_unique_oid(puboids,
5337  GetRelationPublications(ancestor));
5338  }
5339  }
5340  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5341 
5342  foreach(lc, puboids)
5343  {
5344  Oid pubid = lfirst_oid(lc);
5345  HeapTuple tup;
5346  Form_pg_publication pubform;
5347 
5349 
5350  if (!HeapTupleIsValid(tup))
5351  elog(ERROR, "cache lookup failed for publication %u", pubid);
5352 
5353  pubform = (Form_pg_publication) GETSTRUCT(tup);
5354 
5355  pubactions->pubinsert |= pubform->pubinsert;
5356  pubactions->pubupdate |= pubform->pubupdate;
5357  pubactions->pubdelete |= pubform->pubdelete;
5358  pubactions->pubtruncate |= pubform->pubtruncate;
5359 
5360  ReleaseSysCache(tup);
5361 
5362  /*
5363  * If we know everything is replicated, there is no point to check for
5364  * other publications.
5365  */
5366  if (pubactions->pubinsert && pubactions->pubupdate &&
5367  pubactions->pubdelete && pubactions->pubtruncate)
5368  break;
5369  }
5370 
5371  if (relation->rd_pubactions)
5372  {
5373  pfree(relation->rd_pubactions);
5374  relation->rd_pubactions = NULL;
5375  }
5376 
5377  /* Now save copy of the actions in the relcache entry. */
5379  relation->rd_pubactions = palloc(sizeof(PublicationActions));
5380  memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5381  MemoryContextSwitchTo(oldcxt);
5382 
5383  return pubactions;
5384 }
5385 
5386 /*
5387  * RelationGetIndexRawAttOptions -- get AM/opclass-specific options for the index
5388  */
5389 Datum *
5391 {
5392  Oid indexrelid = RelationGetRelid(indexrel);
5393  int16 natts = RelationGetNumberOfAttributes(indexrel);
5394  Datum *options = NULL;
5395  int16 attnum;
5396 
5397  for (attnum = 1; attnum <= natts; attnum++)
5398  {
5399  if (indexrel->rd_indam->amoptsprocnum == 0)
5400  continue;
5401 
5402  if (!OidIsValid(index_getprocid(indexrel, attnum,
5403  indexrel->rd_indam->amoptsprocnum)))
5404  continue;
5405 
5406  if (!options)
5407  options = palloc0(sizeof(Datum) * natts);
5408 
5409  options[attnum - 1] = get_attoptions(indexrelid, attnum);
5410  }
5411 
5412  return options;
5413 }
5414 
5415 static bytea **
5416 CopyIndexAttOptions(bytea **srcopts, int natts)
5417 {
5418  bytea **opts = palloc(sizeof(*opts) * natts);
5419 
5420  for (int i = 0; i < natts; i++)
5421  {
5422  bytea *opt = srcopts[i];
5423 
5424  opts[i] = !opt ? NULL : (bytea *)
5425  DatumGetPointer(datumCopy(PointerGetDatum(opt), false, -1));
5426  }
5427 
5428  return opts;
5429 }
5430 
5431 /*
5432  * RelationGetIndexAttOptions
5433  * get AM/opclass-specific options for an index parsed into a binary form
5434  */
5435 bytea **
5437 {
5438  MemoryContext oldcxt;
5439  bytea **opts = relation->rd_opcoptions;
5440  Oid relid = RelationGetRelid(relation);
5441  int natts = RelationGetNumberOfAttributes(relation); /* XXX
5442  * IndexRelationGetNumberOfKeyAttributes */
5443  int i;
5444 
5445  /* Try to copy cached options. */
5446  if (opts)
5447  return copy ? CopyIndexAttOptions(opts, natts) : opts;
5448 
5449  /* Get and parse opclass options. */
5450  opts = palloc0(sizeof(*opts) * natts);
5451 
5452  for (i = 0; i < natts; i++)
5453  {
5455  {
5456  Datum attoptions = get_attoptions(relid, i + 1);
5457 
5458  opts[i] = index_opclass_options(relation, i + 1, attoptions, false);
5459 
5460  if (attoptions != (Datum) 0)
5461  pfree(DatumGetPointer(attoptions));
5462  }
5463  }
5464 
5465  /* Copy parsed options to the cache. */
5466  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
5467  relation->rd_opcoptions = CopyIndexAttOptions(opts, natts);
5468  MemoryContextSwitchTo(oldcxt);
5469 
5470  if (copy)
5471  return opts;
5472 
5473  for (i = 0; i < natts; i++)
5474  {
5475  if (opts[i])
5476  pfree(opts[i]);
5477  }
5478 
5479  pfree(opts);
5480 
5481  return relation->rd_opcoptions;
5482 }
5483 
5484 /*
5485  * Routines to support ereport() reports of relation-related errors
5486  *
5487  * These could have been put into elog.c, but it seems like a module layering
5488  * violation to have elog.c calling relcache or syscache stuff --- and we
5489  * definitely don't want elog.h including rel.h. So we put them here.
5490  */
5491 
5492 /*
5493  * errtable --- stores schema_name and table_name of a table
5494  * within the current errordata.
5495  */
5496 int
5498 {
5502 
5503  return 0; /* return value does not matter */
5504 }
5505 
5506 /*
5507  * errtablecol --- stores schema_name, table_name and column_name
5508  * of a table column within the current errordata.
5509  *
5510  * The column is specified by attribute number --- for most callers, this is
5511  * easier and less error-prone than getting the column name for themselves.
5512  */
5513 int
5515 {
5517  const char *colname;
5518 
5519  /* Use reldesc if it's a user attribute, else consult the catalogs */
5520  if (attnum > 0 && attnum <= reldesc->natts)
5521  colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname);
5522  else
5523  colname = get_attname(RelationGetRelid(rel), attnum, false);
5524 
5525  return errtablecolname(rel, colname);
5526 }
5527 
5528 /*
5529  * errtablecolname --- stores schema_name, table_name and column_name
5530  * of a table column within the current errordata, where the column name is
5531  * given directly rather than extracted from the relation's catalog data.
5532  *
5533  * Don't use this directly unless errtablecol() is inconvenient for some
5534  * reason. This might possibly be needed during intermediate states in ALTER
5535  * TABLE, for instance.
5536  */
5537 int
5538 errtablecolname(Relation rel, const char *colname)
5539 {
5540  errtable(rel);
5542 
5543  return 0; /* return value does not matter */
5544 }
5545 
5546 /*
5547  * errtableconstraint --- stores schema_name, table_name and constraint_name
5548  * of a table-related constraint within the current errordata.
5549  */
5550 int
5551 errtableconstraint(Relation rel, const char *conname)
5552 {
5553  errtable(rel);
5555 
5556  return 0; /* return value does not matter */
5557 }
5558 
5559 
5560 /*
5561  * load_relcache_init_file, write_relcache_init_file
5562  *
5563  * In late 1992, we started regularly having databases with more than
5564  * a thousand classes in them. With this number of classes, it became
5565  * critical to do indexed lookups on the system catalogs.
5566  *
5567  * Bootstrapping these lookups is very hard. We want to be able to
5568  * use an index on pg_attribute, for example, but in order to do so,
5569  * we must have read pg_attribute for the attributes in the index,
5570  * which implies that we need to use the index.
5571  *
5572  * In order to get around the problem, we do the following:
5573  *
5574  * + When the database system is initialized (at initdb time), we
5575  * don't use indexes. We do sequential scans.
5576  *
5577  * + When the backend is started up in normal mode, we load an image
5578  * of the appropriate relation descriptors, in internal format,
5579  * from an initialization file in the data/base/... directory.
5580  *
5581  * + If the initialization file isn't there, then we create the
5582  * relation descriptors using sequential scans and write 'em to
5583  * the initialization file for use by subsequent backends.
5584  *
5585  * As of Postgres 9.0, there is one local initialization file in each
5586  * database, plus one shared initialization file for shared catalogs.
5587  *
5588  * We could dispense with the initialization files and just build the
5589  * critical reldescs the hard way on every backend startup, but that
5590  * slows down backend startup noticeably.
5591  *
5592  * We can in fact go further, and save more relcache entries than
5593  * just the ones that are absolutely critical; this allows us to speed
5594  * up backend startup by not having to build such entries the hard way.
5595  * Presently, all the catalog and index entries that are referred to
5596  * by catcaches are stored in the initialization files.
5597  *
5598  * The same mechanism that detects when catcache and relcache entries
5599  * need to be invalidated (due to catalog updates) also arranges to
5600  * unlink the initialization files when the contents may be out of date.
5601  * The files will then be rebuilt during the next backend startup.
5602  */
5603 
5604 /*
5605  * load_relcache_init_file -- attempt to load cache from the shared
5606  * or local cache init file
5607  *
5608  * If successful, return true and set criticalRelcachesBuilt or
5609  * criticalSharedRelcachesBuilt to true.
5610  * If not successful, return false.
5611  *
5612  * NOTE: we assume we are already switched into CacheMemoryContext.
5613  */
5614 static bool
5616 {
5617  FILE *fp;
5618  char initfilename[MAXPGPATH];
5619  Relation *rels;
5620  int relno,
5621  num_rels,
5622  max_rels,
5623  nailed_rels,
5624  nailed_indexes,
5625  magic;
5626  int i;
5627 
5628  if (shared)
5629  snprintf(initfilename, sizeof(initfilename), "global/%s",
5631  else
5632  snprintf(initfilename, sizeof(initfilename), "%s/%s",
5634 
5635  fp = AllocateFile(initfilename, PG_BINARY_R);
5636  if (fp == NULL)
5637  return false;
5638 
5639  /*
5640  * Read the index relcache entries from the file. Note we will not enter
5641  * any of them into the cache if the read fails partway through; this
5642  * helps to guard against broken init files.
5643  */
5644  max_rels = 100;
5645  rels = (Relation *) palloc(max_rels * sizeof(Relation));
5646  num_rels = 0;
5647  nailed_rels = nailed_indexes = 0;
5648 
5649  /* check for correct magic number (compatible version) */
5650  if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5651  goto read_failed;
5652  if (magic != RELCACHE_INIT_FILEMAGIC)
5653  goto read_failed;
5654 
5655  for (relno = 0;; relno++)
5656  {
5657  Size len;
5658  size_t nread;
5659  Relation rel;
5660  Form_pg_class relform;
5661  bool has_not_null;
5662 
5663  /* first read the relation descriptor length */
5664  nread = fread(&len, 1, sizeof(len), fp);
5665  if (nread != sizeof(len))
5666  {
5667  if (nread == 0)
5668  break; /* end of file */
5669  goto read_failed;
5670  }
5671 
5672  /* safety check for incompatible relcache layout */
5673  if (len != sizeof(RelationData))
5674  goto read_failed;
5675 
5676  /* allocate another relcache header */
5677  if (num_rels >= max_rels)
5678  {
5679  max_rels *= 2;
5680  rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5681  }
5682 
5683  rel = rels[num_rels++] = (Relation) palloc(len);
5684 
5685  /* then, read the Relation structure */
5686  if (fread(rel, 1, len, fp) != len)
5687  goto read_failed;
5688 
5689  /* next read the relation tuple form */
5690  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5691  goto read_failed;
5692 
5693  relform = (Form_pg_class) palloc(len);
5694  if (fread(relform, 1, len, fp) != len)
5695  goto read_failed;
5696 
5697  rel->rd_rel = relform;
5698 
5699  /* initialize attribute tuple forms */
5700  rel->rd_att = CreateTemplateTupleDesc(relform->relnatts);
5701  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5702 
5703  rel->rd_att->tdtypeid = relform->reltype ? relform->reltype : RECORDOID;
5704  rel->rd_att->tdtypmod = -1; /* just to be sure */
5705 
5706  /* next read all the attribute tuple form data entries */
5707  has_not_null = false;
5708  for (i = 0; i < relform->relnatts; i++)
5709  {
5710  Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5711 
5712  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5713  goto read_failed;
5714  if (len != ATTRIBUTE_FIXED_PART_SIZE)
5715  goto read_failed;
5716  if (fread(attr, 1, len, fp) != len)
5717  goto read_failed;
5718 
5719  has_not_null |= attr->attnotnull;
5720  }
5721 
5722  /* next read the access method specific field */
5723  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5724  goto read_failed;
5725  if (len > 0)
5726  {
5727  rel->rd_options = palloc(len);
5728  if (fread(rel->rd_options, 1, len, fp) != len)
5729  goto read_failed;
5730  if (len != VARSIZE(rel->rd_options))
5731  goto read_failed; /* sanity check */
5732  }
5733  else
5734  {
5735  rel->rd_options = NULL;
5736  }
5737 
5738  /* mark not-null status */
5739  if (has_not_null)
5740  {
5741  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5742 
5743  constr->has_not_null = true;
5744  rel->rd_att->constr = constr;
5745  }
5746 
5747  /*
5748  * If it's an index, there's more to do. Note we explicitly ignore
5749  * partitioned indexes here.
5750  */
5751  if (rel->rd_rel->relkind == RELKIND_INDEX)
5752  {
5753  MemoryContext indexcxt;
5754  Oid *opfamily;
5755  Oid *opcintype;
5756  RegProcedure *support;
5757  int nsupport;
5758  int16 *indoption;
5759  Oid *indcollation;
5760 
5761  /* Count nailed indexes to ensure we have 'em all */
5762  if (rel->rd_isnailed)
5763  nailed_indexes++;
5764 
5765  /* next, read the pg_index tuple */
5766  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5767  goto read_failed;
5768 
5769  rel->rd_indextuple = (HeapTuple) palloc(len);
5770  if (fread(rel->rd_indextuple, 1, len, fp) != len)
5771  goto read_failed;
5772 
5773  /* Fix up internal pointers in the tuple -- see heap_copytuple */
5774  rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5776 
5777  /*
5778  * prepare index info context --- parameters should match
5779  * RelationInitIndexAccessInfo
5780  */
5782  "index info",
5784  rel->rd_indexcxt = indexcxt;
5787 
5788  /*
5789  * Now we can fetch the index AM's API struct. (We can't store
5790  * that in the init file, since it contains function pointers that
5791  * might vary across server executions. Fortunately, it should be
5792  * safe to call the amhandler even while bootstrapping indexes.)
5793  */
5794  InitIndexAmRoutine(rel);
5795 
5796  /* next, read the vector of opfamily OIDs */
5797  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5798  goto read_failed;
5799 
5800  opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5801  if (fread(opfamily, 1, len, fp) != len)
5802  goto read_failed;
5803 
5804  rel->rd_opfamily = opfamily;
5805 
5806  /* next, read the vector of opcintype OIDs */
5807  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5808  goto read_failed;
5809 
5810  opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5811  if (fread(opcintype, 1, len, fp) != len)
5812  goto read_failed;
5813 
5814  rel->rd_opcintype = opcintype;
5815 
5816  /* next, read the vector of support procedure OIDs */
5817  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5818  goto read_failed;
5819  support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5820  if (fread(support, 1, len, fp) != len)
5821  goto read_failed;
5822 
5823  rel->rd_support = support;
5824 
5825  /* next, read the vector of collation OIDs */
5826  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5827  goto read_failed;
5828 
5829  indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5830  if (fread(indcollation, 1, len, fp) != len)
5831  goto read_failed;
5832 
5833  rel->rd_indcollation = indcollation;
5834 
5835  /* finally, read the vector of indoption values */
5836  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5837  goto read_failed;
5838 
5839  indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5840  if (fread(indoption, 1, len, fp) != len)
5841  goto read_failed;
5842 
5843  rel->rd_indoption = indoption;
5844 
5845  /* finally, read the vector of opcoptions values */
5846  rel->rd_opcoptions = (bytea **)
5847  MemoryContextAllocZero(indexcxt, sizeof(*rel->rd_opcoptions) * relform->relnatts);
5848 
5849  for (i = 0; i < relform->relnatts; i++)
5850  {
5851  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5852  goto read_failed;
5853 
5854  if (len > 0)
5855  {
5856  rel->rd_opcoptions[i] = (bytea *) MemoryContextAlloc(indexcxt, len);
5857  if (fread(rel->rd_opcoptions[i], 1, len, fp) != len)
5858  goto read_failed;
5859  }
5860  }
5861 
5862  /* set up zeroed fmgr-info vector */
5863  nsupport = relform->relnatts * rel->rd_indam->amsupport;
5864  rel->rd_supportinfo = (FmgrInfo *)
5865  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5866  }
5867  else
5868  {
5869  /* Count nailed rels to ensure we have 'em all */
5870  if (rel->rd_isnailed)
5871  nailed_rels++;
5872 
5873  /* Load table AM data */
5874  if (rel->rd_rel->relkind == RELKIND_RELATION ||
5875  rel->rd_rel->relkind == RELKIND_SEQUENCE ||
5876  rel->rd_rel->relkind == RELKIND_TOASTVALUE ||
5877  rel->rd_rel->relkind == RELKIND_MATVIEW)
5879 
5880  Assert(rel->rd_index == NULL);
5881  Assert(rel->rd_indextuple == NULL);
5882  Assert(rel->rd_indexcxt == NULL);
5883  Assert(rel->rd_indam == NULL);
5884  Assert(rel->rd_opfamily == NULL);
5885  Assert(rel->rd_opcintype == NULL);
5886  Assert(rel->rd_support == NULL);
5887  Assert(rel->rd_supportinfo == NULL);
5888  Assert(rel->rd_indoption == NULL);
5889  Assert(rel->rd_indcollation == NULL);
5890  Assert(rel->rd_opcoptions == NULL);
5891  }
5892 
5893  /*
5894  * Rules and triggers are not saved (mainly because the internal
5895  * format is complex and subject to change). They must be rebuilt if
5896  * needed by RelationCacheInitializePhase3. This is not expected to
5897  * be a big performance hit since few system catalogs have such. Ditto
5898  * for RLS policy data, partition info, index expressions, predicates,
5899  * exclusion info, and FDW info.
5900  */
5901  rel->rd_rules = NULL;
5902  rel->rd_rulescxt = NULL;
5903  rel->trigdesc = NULL;
5904  rel->rd_rsdesc = NULL;
5905  rel->rd_partkey = NULL;
5906  rel->rd_partkeycxt = NULL;
5907  rel->rd_partdesc = NULL;
5908  rel->rd_pdcxt = NULL;
5909  rel->rd_partcheck = NIL;
5910  rel->rd_partcheckvalid = false;
5911  rel->rd_partcheckcxt = NULL;
5912  rel->rd_indexprs = NIL;
5913  rel->rd_indpred = NIL;
5914  rel->rd_exclops = NULL;
5915  rel->rd_exclprocs = NULL;
5916  rel->rd_exclstrats = NULL;
5917  rel->rd_fdwroutine = NULL;
5918 
5919  /*
5920  * Reset transient-state fields in the relcache entry
5921  */
5922  rel->rd_smgr = NULL;
5923  if (rel->rd_isnailed)
5924  rel->rd_refcnt = 1;
5925  else
5926  rel->rd_refcnt = 0;
5927  rel->rd_indexvalid = false;
5928  rel->rd_indexlist = NIL;
5929  rel->rd_pkindex = InvalidOid;
5930  rel->rd_replidindex = InvalidOid;
5931  rel->rd_indexattr = NULL;
5932  rel->rd_keyattr = NULL;
5933  rel->rd_pkattr = NULL;
5934  rel->rd_idattr = NULL;
5935  rel->rd_pubactions = NULL;
5936  rel->rd_statvalid = false;
59