PostgreSQL Source Code  git master
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/parallel.h"
36 #include "access/reloptions.h"
37 #include "access/sysattr.h"
38 #include "access/table.h"
39 #include "access/tableam.h"
40 #include "access/tupdesc_details.h"
41 #include "access/xact.h"
42 #include "catalog/binary_upgrade.h"
43 #include "catalog/catalog.h"
44 #include "catalog/indexing.h"
45 #include "catalog/namespace.h"
46 #include "catalog/partition.h"
47 #include "catalog/pg_am.h"
48 #include "catalog/pg_amproc.h"
49 #include "catalog/pg_attrdef.h"
51 #include "catalog/pg_authid.h"
52 #include "catalog/pg_constraint.h"
53 #include "catalog/pg_database.h"
54 #include "catalog/pg_namespace.h"
55 #include "catalog/pg_opclass.h"
56 #include "catalog/pg_proc.h"
57 #include "catalog/pg_publication.h"
58 #include "catalog/pg_rewrite.h"
59 #include "catalog/pg_shseclabel.h"
62 #include "catalog/pg_tablespace.h"
63 #include "catalog/pg_trigger.h"
64 #include "catalog/pg_type.h"
65 #include "catalog/schemapg.h"
66 #include "catalog/storage.h"
67 #include "commands/policy.h"
69 #include "commands/trigger.h"
70 #include "common/int.h"
71 #include "miscadmin.h"
72 #include "nodes/makefuncs.h"
73 #include "nodes/nodeFuncs.h"
74 #include "optimizer/optimizer.h"
75 #include "pgstat.h"
76 #include "rewrite/rewriteDefine.h"
77 #include "rewrite/rowsecurity.h"
78 #include "storage/lmgr.h"
79 #include "storage/smgr.h"
80 #include "utils/array.h"
81 #include "utils/builtins.h"
82 #include "utils/catcache.h"
83 #include "utils/datum.h"
84 #include "utils/fmgroids.h"
85 #include "utils/inval.h"
86 #include "utils/lsyscache.h"
87 #include "utils/memutils.h"
88 #include "utils/relmapper.h"
89 #include "utils/resowner.h"
90 #include "utils/snapmgr.h"
91 #include "utils/syscache.h"
92 
93 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
94 
95 /*
96  * Whether to bother checking if relation cache memory needs to be freed
97  * eagerly. See also RelationBuildDesc() and pg_config_manual.h.
98  */
99 #if defined(RECOVER_RELATION_BUILD_MEMORY) && (RECOVER_RELATION_BUILD_MEMORY != 0)
100 #define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1
101 #else
102 #define RECOVER_RELATION_BUILD_MEMORY 0
103 #ifdef DISCARD_CACHES_ENABLED
104 #define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1
105 #endif
106 #endif
107 
108 /*
109  * hardcoded tuple descriptors, contents generated by genbki.pl
110  */
111 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
112 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
113 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
114 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
115 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
116 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
117 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
118 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
119 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
120 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
121 
122 /*
123  * Hash tables that index the relation cache
124  *
125  * We used to index the cache by both name and OID, but now there
126  * is only an index by OID.
127  */
128 typedef struct relidcacheent
129 {
133 
135 
136 /*
137  * This flag is false until we have prepared the critical relcache entries
138  * that are needed to do indexscans on the tables read by relcache building.
139  */
141 
142 /*
143  * This flag is false until we have prepared the critical relcache entries
144  * for shared catalogs (which are the tables needed for login).
145  */
147 
148 /*
149  * This counter counts relcache inval events received since backend startup
150  * (but only for rels that are actually in cache). Presently, we use it only
151  * to detect whether data about to be written by write_relcache_init_file()
152  * might already be obsolete.
153  */
154 static long relcacheInvalsReceived = 0L;
155 
156 /*
157  * in_progress_list is a stack of ongoing RelationBuildDesc() calls. CREATE
158  * INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
159  * It critically relies on each backend absorbing those changes no later than
160  * next transaction start. Hence, RelationBuildDesc() loops until it finishes
161  * without accepting a relevant invalidation. (Most invalidation consumers
162  * don't do this.)
163  */
164 typedef struct inprogressent
165 {
166  Oid reloid; /* OID of relation being built */
167  bool invalidated; /* whether an invalidation arrived for it */
169 
173 
174 /*
175  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
176  * cleanup work. This list intentionally has limited size; if it overflows,
177  * we fall back to scanning the whole hashtable. There is no value in a very
178  * large list because (1) at some point, a hash_seq_search scan is faster than
179  * retail lookups, and (2) the value of this is to reduce EOXact work for
180  * short transactions, which can't have dirtied all that many tables anyway.
181  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
182  * cleanup processing must be idempotent.
183  */
184 #define MAX_EOXACT_LIST 32
186 static int eoxact_list_len = 0;
187 static bool eoxact_list_overflowed = false;
188 
189 #define EOXactListAdd(rel) \
190  do { \
191  if (eoxact_list_len < MAX_EOXACT_LIST) \
192  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
193  else \
194  eoxact_list_overflowed = true; \
195  } while (0)
196 
197 /*
198  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
199  * cleanup work. The array expands as needed; there is no hashtable because
200  * we don't need to access individual items except at EOXact.
201  */
203 static int NextEOXactTupleDescNum = 0;
204 static int EOXactTupleDescArrayLen = 0;
205 
206 /*
207  * macros to manipulate the lookup hashtable
208  */
209 #define RelationCacheInsert(RELATION, replace_allowed) \
210 do { \
211  RelIdCacheEnt *hentry; bool found; \
212  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
213  &((RELATION)->rd_id), \
214  HASH_ENTER, &found); \
215  if (found) \
216  { \
217  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
218  Relation _old_rel = hentry->reldesc; \
219  Assert(replace_allowed); \
220  hentry->reldesc = (RELATION); \
221  if (RelationHasReferenceCountZero(_old_rel)) \
222  RelationDestroyRelation(_old_rel, false); \
223  else if (!IsBootstrapProcessingMode()) \
224  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
225  RelationGetRelationName(_old_rel)); \
226  } \
227  else \
228  hentry->reldesc = (RELATION); \
229 } while(0)
230 
231 #define RelationIdCacheLookup(ID, RELATION) \
232 do { \
233  RelIdCacheEnt *hentry; \
234  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
235  &(ID), \
236  HASH_FIND, NULL); \
237  if (hentry) \
238  RELATION = hentry->reldesc; \
239  else \
240  RELATION = NULL; \
241 } while(0)
242 
243 #define RelationCacheDelete(RELATION) \
244 do { \
245  RelIdCacheEnt *hentry; \
246  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
247  &((RELATION)->rd_id), \
248  HASH_REMOVE, NULL); \
249  if (hentry == NULL) \
250  elog(WARNING, "failed to delete relcache entry for OID %u", \
251  (RELATION)->rd_id); \
252 } while(0)
253 
254 
255 /*
256  * Special cache for opclass-related information
257  *
258  * Note: only default support procs get cached, ie, those with
259  * lefttype = righttype = opcintype.
260  */
261 typedef struct opclasscacheent
262 {
263  Oid opclassoid; /* lookup key: OID of opclass */
264  bool valid; /* set true after successful fill-in */
265  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
266  Oid opcfamily; /* OID of opclass's family */
267  Oid opcintype; /* OID of opclass's declared input type */
268  RegProcedure *supportProcs; /* OIDs of support procedures */
270 
271 static HTAB *OpClassCache = NULL;
272 
273 
274 /* non-export function prototypes */
275 
276 static void RelationCloseCleanup(Relation relation);
277 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
278 static void RelationInvalidateRelation(Relation relation);
279 static void RelationClearRelation(Relation relation, bool rebuild);
280 
281 static void RelationReloadIndexInfo(Relation relation);
282 static void RelationReloadNailed(Relation relation);
283 static void RelationFlushRelation(Relation relation);
285 #ifdef USE_ASSERT_CHECKING
286 static void AssertPendingSyncConsistency(Relation relation);
287 #endif
288 static void AtEOXact_cleanup(Relation relation, bool isCommit);
289 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
290  SubTransactionId mySubid, SubTransactionId parentSubid);
291 static bool load_relcache_init_file(bool shared);
292 static void write_relcache_init_file(bool shared);
293 static void write_item(const void *data, Size len, FILE *fp);
294 
295 static void formrdesc(const char *relationName, Oid relationReltype,
296  bool isshared, int natts, const FormData_pg_attribute *attrs);
297 
298 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
300 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
301 static void RelationBuildTupleDesc(Relation relation);
302 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
303 static void RelationInitPhysicalAddr(Relation relation);
304 static void load_critical_index(Oid indexoid, Oid heapoid);
305 static TupleDesc GetPgClassDescriptor(void);
306 static TupleDesc GetPgIndexDescriptor(void);
307 static void AttrDefaultFetch(Relation relation, int ndef);
308 static int AttrDefaultCmp(const void *a, const void *b);
309 static void CheckConstraintFetch(Relation relation);
310 static int CheckConstraintCmp(const void *a, const void *b);
311 static void InitIndexAmRoutine(Relation relation);
312 static void IndexSupportInitialize(oidvector *indclass,
313  RegProcedure *indexSupport,
314  Oid *opFamily,
315  Oid *opcInType,
316  StrategyNumber maxSupportNumber,
317  AttrNumber maxAttributeNumber);
318 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
319  StrategyNumber numSupport);
320 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
321 static void unlink_initfile(const char *initfilename, int elevel);
322 
323 
324 /*
325  * ScanPgRelation
326  *
327  * This is used by RelationBuildDesc to find a pg_class
328  * tuple matching targetRelId. The caller must hold at least
329  * AccessShareLock on the target relid to prevent concurrent-update
330  * scenarios; it isn't guaranteed that all scans used to build the
331  * relcache entry will use the same snapshot. If, for example,
332  * an attribute were to be added after scanning pg_class and before
333  * scanning pg_attribute, relnatts wouldn't match.
334  *
335  * NB: the returned tuple has been copied into palloc'd storage
336  * and must eventually be freed with heap_freetuple.
337  */
338 static HeapTuple
339 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
340 {
341  HeapTuple pg_class_tuple;
342  Relation pg_class_desc;
343  SysScanDesc pg_class_scan;
344  ScanKeyData key[1];
345  Snapshot snapshot = NULL;
346 
347  /*
348  * If something goes wrong during backend startup, we might find ourselves
349  * trying to read pg_class before we've selected a database. That ain't
350  * gonna work, so bail out with a useful error message. If this happens,
351  * it probably means a relcache entry that needs to be nailed isn't.
352  */
353  if (!OidIsValid(MyDatabaseId))
354  elog(FATAL, "cannot read pg_class without having selected a database");
355 
356  /*
357  * form a scan key
358  */
359  ScanKeyInit(&key[0],
360  Anum_pg_class_oid,
361  BTEqualStrategyNumber, F_OIDEQ,
362  ObjectIdGetDatum(targetRelId));
363 
364  /*
365  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
366  * built the critical relcache entries (this includes initdb and startup
367  * without a pg_internal.init file). The caller can also force a heap
368  * scan by setting indexOK == false.
369  */
370  pg_class_desc = table_open(RelationRelationId, AccessShareLock);
371 
372  /*
373  * The caller might need a tuple that's newer than the one the historic
374  * snapshot; currently the only case requiring to do so is looking up the
375  * relfilenumber of non mapped system relations during decoding. That
376  * snapshot can't change in the midst of a relcache build, so there's no
377  * need to register the snapshot.
378  */
379  if (force_non_historic)
380  snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
381 
382  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
383  indexOK && criticalRelcachesBuilt,
384  snapshot,
385  1, key);
386 
387  pg_class_tuple = systable_getnext(pg_class_scan);
388 
389  /*
390  * Must copy tuple before releasing buffer.
391  */
392  if (HeapTupleIsValid(pg_class_tuple))
393  pg_class_tuple = heap_copytuple(pg_class_tuple);
394 
395  /* all done */
396  systable_endscan(pg_class_scan);
397  table_close(pg_class_desc, AccessShareLock);
398 
399  return pg_class_tuple;
400 }
401 
402 /*
403  * AllocateRelationDesc
404  *
405  * This is used to allocate memory for a new relation descriptor
406  * and initialize the rd_rel field from the given pg_class tuple.
407  */
408 static Relation
410 {
411  Relation relation;
412  MemoryContext oldcxt;
413  Form_pg_class relationForm;
414 
415  /* Relcache entries must live in CacheMemoryContext */
417 
418  /*
419  * allocate and zero space for new relation descriptor
420  */
421  relation = (Relation) palloc0(sizeof(RelationData));
422 
423  /* make sure relation is marked as having no open file yet */
424  relation->rd_smgr = NULL;
425 
426  /*
427  * Copy the relation tuple form
428  *
429  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
430  * variable-length fields (relacl, reloptions) are NOT stored in the
431  * relcache --- there'd be little point in it, since we don't copy the
432  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
433  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
434  * it from the syscache if you need it. The same goes for the original
435  * form of reloptions (however, we do store the parsed form of reloptions
436  * in rd_options).
437  */
438  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
439 
440  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
441 
442  /* initialize relation tuple form */
443  relation->rd_rel = relationForm;
444 
445  /* and allocate attribute tuple form storage */
446  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
447  /* which we mark as a reference-counted tupdesc */
448  relation->rd_att->tdrefcount = 1;
449 
450  MemoryContextSwitchTo(oldcxt);
451 
452  return relation;
453 }
454 
455 /*
456  * RelationParseRelOptions
457  * Convert pg_class.reloptions into pre-parsed rd_options
458  *
459  * tuple is the real pg_class tuple (not rd_rel!) for relation
460  *
461  * Note: rd_rel and (if an index) rd_indam must be valid already
462  */
463 static void
465 {
466  bytea *options;
467  amoptions_function amoptsfn;
468 
469  relation->rd_options = NULL;
470 
471  /*
472  * Look up any AM-specific parse function; fall out if relkind should not
473  * have options.
474  */
475  switch (relation->rd_rel->relkind)
476  {
477  case RELKIND_RELATION:
478  case RELKIND_TOASTVALUE:
479  case RELKIND_VIEW:
480  case RELKIND_MATVIEW:
481  case RELKIND_PARTITIONED_TABLE:
482  amoptsfn = NULL;
483  break;
484  case RELKIND_INDEX:
485  case RELKIND_PARTITIONED_INDEX:
486  amoptsfn = relation->rd_indam->amoptions;
487  break;
488  default:
489  return;
490  }
491 
492  /*
493  * Fetch reloptions from tuple; have to use a hardwired descriptor because
494  * we might not have any other for pg_class yet (consider executing this
495  * code for pg_class itself)
496  */
497  options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
498 
499  /*
500  * Copy parsed data into CacheMemoryContext. To guard against the
501  * possibility of leaks in the reloptions code, we want to do the actual
502  * parsing in the caller's memory context and copy the results into
503  * CacheMemoryContext after the fact.
504  */
505  if (options)
506  {
508  VARSIZE(options));
509  memcpy(relation->rd_options, options, VARSIZE(options));
510  pfree(options);
511  }
512 }
513 
514 /*
515  * RelationBuildTupleDesc
516  *
517  * Form the relation's tuple descriptor from information in
518  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
519  */
520 static void
522 {
523  HeapTuple pg_attribute_tuple;
524  Relation pg_attribute_desc;
525  SysScanDesc pg_attribute_scan;
526  ScanKeyData skey[2];
527  int need;
528  TupleConstr *constr;
529  AttrMissing *attrmiss = NULL;
530  int ndef = 0;
531 
532  /* fill rd_att's type ID fields (compare heap.c's AddNewRelationTuple) */
533  relation->rd_att->tdtypeid =
534  relation->rd_rel->reltype ? relation->rd_rel->reltype : RECORDOID;
535  relation->rd_att->tdtypmod = -1; /* just to be sure */
536 
538  sizeof(TupleConstr));
539 
540  /*
541  * Form a scan key that selects only user attributes (attnum > 0).
542  * (Eliminating system attribute rows at the index level is lots faster
543  * than fetching them.)
544  */
545  ScanKeyInit(&skey[0],
546  Anum_pg_attribute_attrelid,
547  BTEqualStrategyNumber, F_OIDEQ,
549  ScanKeyInit(&skey[1],
550  Anum_pg_attribute_attnum,
551  BTGreaterStrategyNumber, F_INT2GT,
552  Int16GetDatum(0));
553 
554  /*
555  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
556  * built the critical relcache entries (this includes initdb and startup
557  * without a pg_internal.init file).
558  */
559  pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock);
560  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
561  AttributeRelidNumIndexId,
563  NULL,
564  2, skey);
565 
566  /*
567  * add attribute data to relation->rd_att
568  */
569  need = RelationGetNumberOfAttributes(relation);
570 
571  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
572  {
573  Form_pg_attribute attp;
574  int attnum;
575 
576  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
577 
578  attnum = attp->attnum;
579  if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation))
580  elog(ERROR, "invalid attribute number %d for relation \"%s\"",
581  attp->attnum, RelationGetRelationName(relation));
582 
583  memcpy(TupleDescAttr(relation->rd_att, attnum - 1),
584  attp,
586 
587  /* Update constraint/default info */
588  if (attp->attnotnull)
589  constr->has_not_null = true;
590  if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED)
591  constr->has_generated_stored = true;
592  if (attp->atthasdef)
593  ndef++;
594 
595  /* If the column has a "missing" value, put it in the attrmiss array */
596  if (attp->atthasmissing)
597  {
598  Datum missingval;
599  bool missingNull;
600 
601  /* Do we have a missing value? */
602  missingval = heap_getattr(pg_attribute_tuple,
603  Anum_pg_attribute_attmissingval,
604  pg_attribute_desc->rd_att,
605  &missingNull);
606  if (!missingNull)
607  {
608  /* Yes, fetch from the array */
609  MemoryContext oldcxt;
610  bool is_null;
611  int one = 1;
612  Datum missval;
613 
614  if (attrmiss == NULL)
615  attrmiss = (AttrMissing *)
617  relation->rd_rel->relnatts *
618  sizeof(AttrMissing));
619 
620  missval = array_get_element(missingval,
621  1,
622  &one,
623  -1,
624  attp->attlen,
625  attp->attbyval,
626  attp->attalign,
627  &is_null);
628  Assert(!is_null);
629  if (attp->attbyval)
630  {
631  /* for copy by val just copy the datum direct */
632  attrmiss[attnum - 1].am_value = missval;
633  }
634  else
635  {
636  /* otherwise copy in the correct context */
638  attrmiss[attnum - 1].am_value = datumCopy(missval,
639  attp->attbyval,
640  attp->attlen);
641  MemoryContextSwitchTo(oldcxt);
642  }
643  attrmiss[attnum - 1].am_present = true;
644  }
645  }
646  need--;
647  if (need == 0)
648  break;
649  }
650 
651  /*
652  * end the scan and close the attribute relation
653  */
654  systable_endscan(pg_attribute_scan);
655  table_close(pg_attribute_desc, AccessShareLock);
656 
657  if (need != 0)
658  elog(ERROR, "pg_attribute catalog is missing %d attribute(s) for relation OID %u",
659  need, RelationGetRelid(relation));
660 
661  /*
662  * The attcacheoff values we read from pg_attribute should all be -1
663  * ("unknown"). Verify this if assert checking is on. They will be
664  * computed when and if needed during tuple access.
665  */
666 #ifdef USE_ASSERT_CHECKING
667  {
668  int i;
669 
670  for (i = 0; i < RelationGetNumberOfAttributes(relation); i++)
671  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
672  }
673 #endif
674 
675  /*
676  * However, we can easily set the attcacheoff value for the first
677  * attribute: it must be zero. This eliminates the need for special cases
678  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
679  */
680  if (RelationGetNumberOfAttributes(relation) > 0)
681  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
682 
683  /*
684  * Set up constraint/default info
685  */
686  if (constr->has_not_null ||
687  constr->has_generated_stored ||
688  ndef > 0 ||
689  attrmiss ||
690  relation->rd_rel->relchecks > 0)
691  {
692  relation->rd_att->constr = constr;
693 
694  if (ndef > 0) /* DEFAULTs */
695  AttrDefaultFetch(relation, ndef);
696  else
697  constr->num_defval = 0;
698 
699  constr->missing = attrmiss;
700 
701  if (relation->rd_rel->relchecks > 0) /* CHECKs */
702  CheckConstraintFetch(relation);
703  else
704  constr->num_check = 0;
705  }
706  else
707  {
708  pfree(constr);
709  relation->rd_att->constr = NULL;
710  }
711 }
712 
713 /*
714  * RelationBuildRuleLock
715  *
716  * Form the relation's rewrite rules from information in
717  * the pg_rewrite system catalog.
718  *
719  * Note: The rule parsetrees are potentially very complex node structures.
720  * To allow these trees to be freed when the relcache entry is flushed,
721  * we make a private memory context to hold the RuleLock information for
722  * each relcache entry that has associated rules. The context is used
723  * just for rule info, not for any other subsidiary data of the relcache
724  * entry, because that keeps the update logic in RelationClearRelation()
725  * manageable. The other subsidiary data structures are simple enough
726  * to be easy to free explicitly, anyway.
727  *
728  * Note: The relation's reloptions must have been extracted first.
729  */
730 static void
732 {
733  MemoryContext rulescxt;
734  MemoryContext oldcxt;
735  HeapTuple rewrite_tuple;
736  Relation rewrite_desc;
737  TupleDesc rewrite_tupdesc;
738  SysScanDesc rewrite_scan;
740  RuleLock *rulelock;
741  int numlocks;
742  RewriteRule **rules;
743  int maxlocks;
744 
745  /*
746  * Make the private context. Assume it'll not contain much data.
747  */
749  "relation rules",
751  relation->rd_rulescxt = rulescxt;
753  RelationGetRelationName(relation));
754 
755  /*
756  * allocate an array to hold the rewrite rules (the array is extended if
757  * necessary)
758  */
759  maxlocks = 4;
760  rules = (RewriteRule **)
761  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
762  numlocks = 0;
763 
764  /*
765  * form a scan key
766  */
767  ScanKeyInit(&key,
768  Anum_pg_rewrite_ev_class,
769  BTEqualStrategyNumber, F_OIDEQ,
771 
772  /*
773  * open pg_rewrite and begin a scan
774  *
775  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
776  * be reading the rules in name order, except possibly during
777  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
778  * ensures that rules will be fired in name order.
779  */
780  rewrite_desc = table_open(RewriteRelationId, AccessShareLock);
781  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
782  rewrite_scan = systable_beginscan(rewrite_desc,
783  RewriteRelRulenameIndexId,
784  true, NULL,
785  1, &key);
786 
787  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
788  {
789  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
790  bool isnull;
791  Datum rule_datum;
792  char *rule_str;
793  RewriteRule *rule;
794  Oid check_as_user;
795 
796  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
797  sizeof(RewriteRule));
798 
799  rule->ruleId = rewrite_form->oid;
800 
801  rule->event = rewrite_form->ev_type - '0';
802  rule->enabled = rewrite_form->ev_enabled;
803  rule->isInstead = rewrite_form->is_instead;
804 
805  /*
806  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
807  * rule strings are often large enough to be toasted. To avoid
808  * leaking memory in the caller's context, do the detoasting here so
809  * we can free the detoasted version.
810  */
811  rule_datum = heap_getattr(rewrite_tuple,
812  Anum_pg_rewrite_ev_action,
813  rewrite_tupdesc,
814  &isnull);
815  Assert(!isnull);
816  rule_str = TextDatumGetCString(rule_datum);
817  oldcxt = MemoryContextSwitchTo(rulescxt);
818  rule->actions = (List *) stringToNode(rule_str);
819  MemoryContextSwitchTo(oldcxt);
820  pfree(rule_str);
821 
822  rule_datum = heap_getattr(rewrite_tuple,
823  Anum_pg_rewrite_ev_qual,
824  rewrite_tupdesc,
825  &isnull);
826  Assert(!isnull);
827  rule_str = TextDatumGetCString(rule_datum);
828  oldcxt = MemoryContextSwitchTo(rulescxt);
829  rule->qual = (Node *) stringToNode(rule_str);
830  MemoryContextSwitchTo(oldcxt);
831  pfree(rule_str);
832 
833  /*
834  * If this is a SELECT rule defining a view, and the view has
835  * "security_invoker" set, we must perform all permissions checks on
836  * relations referred to by the rule as the invoking user.
837  *
838  * In all other cases (including non-SELECT rules on security invoker
839  * views), perform the permissions checks as the relation owner.
840  */
841  if (rule->event == CMD_SELECT &&
842  relation->rd_rel->relkind == RELKIND_VIEW &&
843  RelationHasSecurityInvoker(relation))
844  check_as_user = InvalidOid;
845  else
846  check_as_user = relation->rd_rel->relowner;
847 
848  /*
849  * Scan through the rule's actions and set the checkAsUser field on
850  * all RTEPermissionInfos. We have to look at the qual as well, in
851  * case it contains sublinks.
852  *
853  * The reason for doing this when the rule is loaded, rather than when
854  * it is stored, is that otherwise ALTER TABLE OWNER would have to
855  * grovel through stored rules to update checkAsUser fields. Scanning
856  * the rule tree during load is relatively cheap (compared to
857  * constructing it in the first place), so we do it here.
858  */
859  setRuleCheckAsUser((Node *) rule->actions, check_as_user);
860  setRuleCheckAsUser(rule->qual, check_as_user);
861 
862  if (numlocks >= maxlocks)
863  {
864  maxlocks *= 2;
865  rules = (RewriteRule **)
866  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
867  }
868  rules[numlocks++] = rule;
869  }
870 
871  /*
872  * end the scan and close the attribute relation
873  */
874  systable_endscan(rewrite_scan);
875  table_close(rewrite_desc, AccessShareLock);
876 
877  /*
878  * there might not be any rules (if relhasrules is out-of-date)
879  */
880  if (numlocks == 0)
881  {
882  relation->rd_rules = NULL;
883  relation->rd_rulescxt = NULL;
884  MemoryContextDelete(rulescxt);
885  return;
886  }
887 
888  /*
889  * form a RuleLock and insert into relation
890  */
891  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
892  rulelock->numLocks = numlocks;
893  rulelock->rules = rules;
894 
895  relation->rd_rules = rulelock;
896 }
897 
898 /*
899  * equalRuleLocks
900  *
901  * Determine whether two RuleLocks are equivalent
902  *
903  * Probably this should be in the rules code someplace...
904  */
905 static bool
907 {
908  int i;
909 
910  /*
911  * As of 7.3 we assume the rule ordering is repeatable, because
912  * RelationBuildRuleLock should read 'em in a consistent order. So just
913  * compare corresponding slots.
914  */
915  if (rlock1 != NULL)
916  {
917  if (rlock2 == NULL)
918  return false;
919  if (rlock1->numLocks != rlock2->numLocks)
920  return false;
921  for (i = 0; i < rlock1->numLocks; i++)
922  {
923  RewriteRule *rule1 = rlock1->rules[i];
924  RewriteRule *rule2 = rlock2->rules[i];
925 
926  if (rule1->ruleId != rule2->ruleId)
927  return false;
928  if (rule1->event != rule2->event)
929  return false;
930  if (rule1->enabled != rule2->enabled)
931  return false;
932  if (rule1->isInstead != rule2->isInstead)
933  return false;
934  if (!equal(rule1->qual, rule2->qual))
935  return false;
936  if (!equal(rule1->actions, rule2->actions))
937  return false;
938  }
939  }
940  else if (rlock2 != NULL)
941  return false;
942  return true;
943 }
944 
945 /*
946  * equalPolicy
947  *
948  * Determine whether two policies are equivalent
949  */
950 static bool
952 {
953  int i;
954  Oid *r1,
955  *r2;
956 
957  if (policy1 != NULL)
958  {
959  if (policy2 == NULL)
960  return false;
961 
962  if (policy1->polcmd != policy2->polcmd)
963  return false;
964  if (policy1->hassublinks != policy2->hassublinks)
965  return false;
966  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
967  return false;
968  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
969  return false;
970 
971  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
972  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
973 
974  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
975  {
976  if (r1[i] != r2[i])
977  return false;
978  }
979 
980  if (!equal(policy1->qual, policy2->qual))
981  return false;
982  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
983  return false;
984  }
985  else if (policy2 != NULL)
986  return false;
987 
988  return true;
989 }
990 
991 /*
992  * equalRSDesc
993  *
994  * Determine whether two RowSecurityDesc's are equivalent
995  */
996 static bool
998 {
999  ListCell *lc,
1000  *rc;
1001 
1002  if (rsdesc1 == NULL && rsdesc2 == NULL)
1003  return true;
1004 
1005  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
1006  (rsdesc1 == NULL && rsdesc2 != NULL))
1007  return false;
1008 
1009  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
1010  return false;
1011 
1012  /* RelationBuildRowSecurity should build policies in order */
1013  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1014  {
1017 
1018  if (!equalPolicy(l, r))
1019  return false;
1020  }
1021 
1022  return true;
1023 }
1024 
1025 /*
1026  * RelationBuildDesc
1027  *
1028  * Build a relation descriptor. The caller must hold at least
1029  * AccessShareLock on the target relid.
1030  *
1031  * The new descriptor is inserted into the hash table if insertIt is true.
1032  *
1033  * Returns NULL if no pg_class row could be found for the given relid
1034  * (suggesting we are trying to access a just-deleted relation).
1035  * Any other error is reported via elog.
1036  */
1037 static Relation
1038 RelationBuildDesc(Oid targetRelId, bool insertIt)
1039 {
1040  int in_progress_offset;
1041  Relation relation;
1042  Oid relid;
1043  HeapTuple pg_class_tuple;
1044  Form_pg_class relp;
1045 
1046  /*
1047  * This function and its subroutines can allocate a good deal of transient
1048  * data in CurrentMemoryContext. Traditionally we've just leaked that
1049  * data, reasoning that the caller's context is at worst of transaction
1050  * scope, and relcache loads shouldn't happen so often that it's essential
1051  * to recover transient data before end of statement/transaction. However
1052  * that's definitely not true when debug_discard_caches is active, and
1053  * perhaps it's not true in other cases.
1054  *
1055  * When debug_discard_caches is active or when forced to by
1056  * RECOVER_RELATION_BUILD_MEMORY=1, arrange to allocate the junk in a
1057  * temporary context that we'll free before returning. Make it a child of
1058  * caller's context so that it will get cleaned up appropriately if we
1059  * error out partway through.
1060  */
1061 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1062  MemoryContext tmpcxt = NULL;
1063  MemoryContext oldcxt = NULL;
1064 
1066  {
1068  "RelationBuildDesc workspace",
1070  oldcxt = MemoryContextSwitchTo(tmpcxt);
1071  }
1072 #endif
1073 
1074  /* Register to catch invalidation messages */
1076  {
1077  int allocsize;
1078 
1079  allocsize = in_progress_list_maxlen * 2;
1081  allocsize * sizeof(*in_progress_list));
1082  in_progress_list_maxlen = allocsize;
1083  }
1084  in_progress_offset = in_progress_list_len++;
1085  in_progress_list[in_progress_offset].reloid = targetRelId;
1086 retry:
1087  in_progress_list[in_progress_offset].invalidated = false;
1088 
1089  /*
1090  * find the tuple in pg_class corresponding to the given relation id
1091  */
1092  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1093 
1094  /*
1095  * if no such tuple exists, return NULL
1096  */
1097  if (!HeapTupleIsValid(pg_class_tuple))
1098  {
1099 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1100  if (tmpcxt)
1101  {
1102  /* Return to caller's context, and blow away the temporary context */
1103  MemoryContextSwitchTo(oldcxt);
1104  MemoryContextDelete(tmpcxt);
1105  }
1106 #endif
1107  Assert(in_progress_offset + 1 == in_progress_list_len);
1109  return NULL;
1110  }
1111 
1112  /*
1113  * get information from the pg_class_tuple
1114  */
1115  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1116  relid = relp->oid;
1117  Assert(relid == targetRelId);
1118 
1119  /*
1120  * allocate storage for the relation descriptor, and copy pg_class_tuple
1121  * to relation->rd_rel.
1122  */
1123  relation = AllocateRelationDesc(relp);
1124 
1125  /*
1126  * initialize the relation's relation id (relation->rd_id)
1127  */
1128  RelationGetRelid(relation) = relid;
1129 
1130  /*
1131  * Normal relations are not nailed into the cache. Since we don't flush
1132  * new relations, it won't be new. It could be temp though.
1133  */
1134  relation->rd_refcnt = 0;
1135  relation->rd_isnailed = false;
1140  switch (relation->rd_rel->relpersistence)
1141  {
1142  case RELPERSISTENCE_UNLOGGED:
1143  case RELPERSISTENCE_PERMANENT:
1144  relation->rd_backend = INVALID_PROC_NUMBER;
1145  relation->rd_islocaltemp = false;
1146  break;
1147  case RELPERSISTENCE_TEMP:
1148  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1149  {
1150  relation->rd_backend = ProcNumberForTempRelations();
1151  relation->rd_islocaltemp = true;
1152  }
1153  else
1154  {
1155  /*
1156  * If it's a temp table, but not one of ours, we have to use
1157  * the slow, grotty method to figure out the owning backend.
1158  *
1159  * Note: it's possible that rd_backend gets set to
1160  * MyProcNumber here, in case we are looking at a pg_class
1161  * entry left over from a crashed backend that coincidentally
1162  * had the same ProcNumber we're using. We should *not*
1163  * consider such a table to be "ours"; this is why we need the
1164  * separate rd_islocaltemp flag. The pg_class entry will get
1165  * flushed if/when we clean out the corresponding temp table
1166  * namespace in preparation for using it.
1167  */
1168  relation->rd_backend =
1169  GetTempNamespaceProcNumber(relation->rd_rel->relnamespace);
1170  Assert(relation->rd_backend != INVALID_PROC_NUMBER);
1171  relation->rd_islocaltemp = false;
1172  }
1173  break;
1174  default:
1175  elog(ERROR, "invalid relpersistence: %c",
1176  relation->rd_rel->relpersistence);
1177  break;
1178  }
1179 
1180  /*
1181  * initialize the tuple descriptor (relation->rd_att).
1182  */
1183  RelationBuildTupleDesc(relation);
1184 
1185  /* foreign key data is not loaded till asked for */
1186  relation->rd_fkeylist = NIL;
1187  relation->rd_fkeyvalid = false;
1188 
1189  /* partitioning data is not loaded till asked for */
1190  relation->rd_partkey = NULL;
1191  relation->rd_partkeycxt = NULL;
1192  relation->rd_partdesc = NULL;
1193  relation->rd_partdesc_nodetached = NULL;
1195  relation->rd_pdcxt = NULL;
1196  relation->rd_pddcxt = NULL;
1197  relation->rd_partcheck = NIL;
1198  relation->rd_partcheckvalid = false;
1199  relation->rd_partcheckcxt = NULL;
1200 
1201  /*
1202  * initialize access method information
1203  */
1204  if (relation->rd_rel->relkind == RELKIND_INDEX ||
1205  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
1206  RelationInitIndexAccessInfo(relation);
1207  else if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) ||
1208  relation->rd_rel->relkind == RELKIND_SEQUENCE)
1210  else if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1211  {
1212  /*
1213  * Do nothing: access methods are a setting that partitions can
1214  * inherit.
1215  */
1216  }
1217  else
1218  Assert(relation->rd_rel->relam == InvalidOid);
1219 
1220  /* extract reloptions if any */
1221  RelationParseRelOptions(relation, pg_class_tuple);
1222 
1223  /*
1224  * Fetch rules and triggers that affect this relation.
1225  *
1226  * Note that RelationBuildRuleLock() relies on this being done after
1227  * extracting the relation's reloptions.
1228  */
1229  if (relation->rd_rel->relhasrules)
1230  RelationBuildRuleLock(relation);
1231  else
1232  {
1233  relation->rd_rules = NULL;
1234  relation->rd_rulescxt = NULL;
1235  }
1236 
1237  if (relation->rd_rel->relhastriggers)
1238  RelationBuildTriggers(relation);
1239  else
1240  relation->trigdesc = NULL;
1241 
1242  if (relation->rd_rel->relrowsecurity)
1243  RelationBuildRowSecurity(relation);
1244  else
1245  relation->rd_rsdesc = NULL;
1246 
1247  /*
1248  * initialize the relation lock manager information
1249  */
1250  RelationInitLockInfo(relation); /* see lmgr.c */
1251 
1252  /*
1253  * initialize physical addressing information for the relation
1254  */
1255  RelationInitPhysicalAddr(relation);
1256 
1257  /* make sure relation is marked as having no open file yet */
1258  relation->rd_smgr = NULL;
1259 
1260  /*
1261  * now we can free the memory allocated for pg_class_tuple
1262  */
1263  heap_freetuple(pg_class_tuple);
1264 
1265  /*
1266  * If an invalidation arrived mid-build, start over. Between here and the
1267  * end of this function, don't add code that does or reasonably could read
1268  * system catalogs. That range must be free from invalidation processing
1269  * for the !insertIt case. For the insertIt case, RelationCacheInsert()
1270  * will enroll this relation in ordinary relcache invalidation processing,
1271  */
1272  if (in_progress_list[in_progress_offset].invalidated)
1273  {
1274  RelationDestroyRelation(relation, false);
1275  goto retry;
1276  }
1277  Assert(in_progress_offset + 1 == in_progress_list_len);
1279 
1280  /*
1281  * Insert newly created relation into relcache hash table, if requested.
1282  *
1283  * There is one scenario in which we might find a hashtable entry already
1284  * present, even though our caller failed to find it: if the relation is a
1285  * system catalog or index that's used during relcache load, we might have
1286  * recursively created the same relcache entry during the preceding steps.
1287  * So allow RelationCacheInsert to delete any already-present relcache
1288  * entry for the same OID. The already-present entry should have refcount
1289  * zero (else somebody forgot to close it); in the event that it doesn't,
1290  * we'll elog a WARNING and leak the already-present entry.
1291  */
1292  if (insertIt)
1293  RelationCacheInsert(relation, true);
1294 
1295  /* It's fully valid */
1296  relation->rd_isvalid = true;
1297 
1298 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1299  if (tmpcxt)
1300  {
1301  /* Return to caller's context, and blow away the temporary context */
1302  MemoryContextSwitchTo(oldcxt);
1303  MemoryContextDelete(tmpcxt);
1304  }
1305 #endif
1306 
1307  return relation;
1308 }
1309 
1310 /*
1311  * Initialize the physical addressing info (RelFileLocator) for a relcache entry
1312  *
1313  * Note: at the physical level, relations in the pg_global tablespace must
1314  * be treated as shared, even if relisshared isn't set. Hence we do not
1315  * look at relisshared here.
1316  */
1317 static void
1319 {
1320  RelFileNumber oldnumber = relation->rd_locator.relNumber;
1321 
1322  /* these relations kinds never have storage */
1323  if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
1324  return;
1325 
1326  if (relation->rd_rel->reltablespace)
1327  relation->rd_locator.spcOid = relation->rd_rel->reltablespace;
1328  else
1330  if (relation->rd_locator.spcOid == GLOBALTABLESPACE_OID)
1331  relation->rd_locator.dbOid = InvalidOid;
1332  else
1333  relation->rd_locator.dbOid = MyDatabaseId;
1334 
1335  if (relation->rd_rel->relfilenode)
1336  {
1337  /*
1338  * Even if we are using a decoding snapshot that doesn't represent the
1339  * current state of the catalog we need to make sure the filenode
1340  * points to the current file since the older file will be gone (or
1341  * truncated). The new file will still contain older rows so lookups
1342  * in them will work correctly. This wouldn't work correctly if
1343  * rewrites were allowed to change the schema in an incompatible way,
1344  * but those are prevented both on catalog tables and on user tables
1345  * declared as additional catalog tables.
1346  */
1349  && IsTransactionState())
1350  {
1351  HeapTuple phys_tuple;
1352  Form_pg_class physrel;
1353 
1354  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1355  RelationGetRelid(relation) != ClassOidIndexId,
1356  true);
1357  if (!HeapTupleIsValid(phys_tuple))
1358  elog(ERROR, "could not find pg_class entry for %u",
1359  RelationGetRelid(relation));
1360  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1361 
1362  relation->rd_rel->reltablespace = physrel->reltablespace;
1363  relation->rd_rel->relfilenode = physrel->relfilenode;
1364  heap_freetuple(phys_tuple);
1365  }
1366 
1367  relation->rd_locator.relNumber = relation->rd_rel->relfilenode;
1368  }
1369  else
1370  {
1371  /* Consult the relation mapper */
1372  relation->rd_locator.relNumber =
1374  relation->rd_rel->relisshared);
1375  if (!RelFileNumberIsValid(relation->rd_locator.relNumber))
1376  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1377  RelationGetRelationName(relation), relation->rd_id);
1378  }
1379 
1380  /*
1381  * For RelationNeedsWAL() to answer correctly on parallel workers, restore
1382  * rd_firstRelfilelocatorSubid. No subtransactions start or end while in
1383  * parallel mode, so the specific SubTransactionId does not matter.
1384  */
1385  if (IsParallelWorker() && oldnumber != relation->rd_locator.relNumber)
1386  {
1387  if (RelFileLocatorSkippingWAL(relation->rd_locator))
1389  else
1391  }
1392 }
1393 
1394 /*
1395  * Fill in the IndexAmRoutine for an index relation.
1396  *
1397  * relation's rd_amhandler and rd_indexcxt must be valid already.
1398  */
1399 static void
1401 {
1402  IndexAmRoutine *cached,
1403  *tmp;
1404 
1405  /*
1406  * Call the amhandler in current, short-lived memory context, just in case
1407  * it leaks anything (it probably won't, but let's be paranoid).
1408  */
1409  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1410 
1411  /* OK, now transfer the data into relation's rd_indexcxt. */
1412  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1413  sizeof(IndexAmRoutine));
1414  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1415  relation->rd_indam = cached;
1416 
1417  pfree(tmp);
1418 }
1419 
1420 /*
1421  * Initialize index-access-method support data for an index relation
1422  */
1423 void
1425 {
1426  HeapTuple tuple;
1427  Form_pg_am aform;
1428  Datum indcollDatum;
1429  Datum indclassDatum;
1430  Datum indoptionDatum;
1431  bool isnull;
1432  oidvector *indcoll;
1433  oidvector *indclass;
1434  int2vector *indoption;
1435  MemoryContext indexcxt;
1436  MemoryContext oldcontext;
1437  int indnatts;
1438  int indnkeyatts;
1439  uint16 amsupport;
1440 
1441  /*
1442  * Make a copy of the pg_index entry for the index. Since pg_index
1443  * contains variable-length and possibly-null fields, we have to do this
1444  * honestly rather than just treating it as a Form_pg_index struct.
1445  */
1446  tuple = SearchSysCache1(INDEXRELID,
1447  ObjectIdGetDatum(RelationGetRelid(relation)));
1448  if (!HeapTupleIsValid(tuple))
1449  elog(ERROR, "cache lookup failed for index %u",
1450  RelationGetRelid(relation));
1452  relation->rd_indextuple = heap_copytuple(tuple);
1453  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1454  MemoryContextSwitchTo(oldcontext);
1455  ReleaseSysCache(tuple);
1456 
1457  /*
1458  * Look up the index's access method, save the OID of its handler function
1459  */
1460  Assert(relation->rd_rel->relam != InvalidOid);
1461  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1462  if (!HeapTupleIsValid(tuple))
1463  elog(ERROR, "cache lookup failed for access method %u",
1464  relation->rd_rel->relam);
1465  aform = (Form_pg_am) GETSTRUCT(tuple);
1466  relation->rd_amhandler = aform->amhandler;
1467  ReleaseSysCache(tuple);
1468 
1469  indnatts = RelationGetNumberOfAttributes(relation);
1470  if (indnatts != IndexRelationGetNumberOfAttributes(relation))
1471  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1472  RelationGetRelid(relation));
1473  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation);
1474 
1475  /*
1476  * Make the private context to hold index access info. The reason we need
1477  * a context, and not just a couple of pallocs, is so that we won't leak
1478  * any subsidiary info attached to fmgr lookup records.
1479  */
1481  "index info",
1483  relation->rd_indexcxt = indexcxt;
1485  RelationGetRelationName(relation));
1486 
1487  /*
1488  * Now we can fetch the index AM's API struct
1489  */
1490  InitIndexAmRoutine(relation);
1491 
1492  /*
1493  * Allocate arrays to hold data. Opclasses are not used for included
1494  * columns, so allocate them for indnkeyatts only.
1495  */
1496  relation->rd_opfamily = (Oid *)
1497  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1498  relation->rd_opcintype = (Oid *)
1499  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1500 
1501  amsupport = relation->rd_indam->amsupport;
1502  if (amsupport > 0)
1503  {
1504  int nsupport = indnatts * amsupport;
1505 
1506  relation->rd_support = (RegProcedure *)
1507  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1508  relation->rd_supportinfo = (FmgrInfo *)
1509  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1510  }
1511  else
1512  {
1513  relation->rd_support = NULL;
1514  relation->rd_supportinfo = NULL;
1515  }
1516 
1517  relation->rd_indcollation = (Oid *)
1518  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1519 
1520  relation->rd_indoption = (int16 *)
1521  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16));
1522 
1523  /*
1524  * indcollation cannot be referenced directly through the C struct,
1525  * because it comes after the variable-width indkey field. Must extract
1526  * the datum the hard way...
1527  */
1528  indcollDatum = fastgetattr(relation->rd_indextuple,
1529  Anum_pg_index_indcollation,
1531  &isnull);
1532  Assert(!isnull);
1533  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1534  memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid));
1535 
1536  /*
1537  * indclass cannot be referenced directly through the C struct, because it
1538  * comes after the variable-width indkey field. Must extract the datum
1539  * the hard way...
1540  */
1541  indclassDatum = fastgetattr(relation->rd_indextuple,
1542  Anum_pg_index_indclass,
1544  &isnull);
1545  Assert(!isnull);
1546  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1547 
1548  /*
1549  * Fill the support procedure OID array, as well as the info about
1550  * opfamilies and opclass input types. (aminfo and supportinfo are left
1551  * as zeroes, and are filled on-the-fly when used)
1552  */
1553  IndexSupportInitialize(indclass, relation->rd_support,
1554  relation->rd_opfamily, relation->rd_opcintype,
1555  amsupport, indnkeyatts);
1556 
1557  /*
1558  * Similarly extract indoption and copy it to the cache entry
1559  */
1560  indoptionDatum = fastgetattr(relation->rd_indextuple,
1561  Anum_pg_index_indoption,
1563  &isnull);
1564  Assert(!isnull);
1565  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1566  memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16));
1567 
1568  (void) RelationGetIndexAttOptions(relation, false);
1569 
1570  /*
1571  * expressions, predicate, exclusion caches will be filled later
1572  */
1573  relation->rd_indexprs = NIL;
1574  relation->rd_indpred = NIL;
1575  relation->rd_exclops = NULL;
1576  relation->rd_exclprocs = NULL;
1577  relation->rd_exclstrats = NULL;
1578  relation->rd_amcache = NULL;
1579 }
1580 
1581 /*
1582  * IndexSupportInitialize
1583  * Initializes an index's cached opclass information,
1584  * given the index's pg_index.indclass entry.
1585  *
1586  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1587  * which are arrays allocated by the caller.
1588  *
1589  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1590  * indicate the size of the arrays it has allocated --- but in practice these
1591  * numbers must always match those obtainable from the system catalog entries
1592  * for the index and access method.
1593  */
1594 static void
1596  RegProcedure *indexSupport,
1597  Oid *opFamily,
1598  Oid *opcInType,
1599  StrategyNumber maxSupportNumber,
1600  AttrNumber maxAttributeNumber)
1601 {
1602  int attIndex;
1603 
1604  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1605  {
1606  OpClassCacheEnt *opcentry;
1607 
1608  if (!OidIsValid(indclass->values[attIndex]))
1609  elog(ERROR, "bogus pg_index tuple");
1610 
1611  /* look up the info for this opclass, using a cache */
1612  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1613  maxSupportNumber);
1614 
1615  /* copy cached data into relcache entry */
1616  opFamily[attIndex] = opcentry->opcfamily;
1617  opcInType[attIndex] = opcentry->opcintype;
1618  if (maxSupportNumber > 0)
1619  memcpy(&indexSupport[attIndex * maxSupportNumber],
1620  opcentry->supportProcs,
1621  maxSupportNumber * sizeof(RegProcedure));
1622  }
1623 }
1624 
1625 /*
1626  * LookupOpclassInfo
1627  *
1628  * This routine maintains a per-opclass cache of the information needed
1629  * by IndexSupportInitialize(). This is more efficient than relying on
1630  * the catalog cache, because we can load all the info about a particular
1631  * opclass in a single indexscan of pg_amproc.
1632  *
1633  * The information from pg_am about expected range of support function
1634  * numbers is passed in, rather than being looked up, mainly because the
1635  * caller will have it already.
1636  *
1637  * Note there is no provision for flushing the cache. This is OK at the
1638  * moment because there is no way to ALTER any interesting properties of an
1639  * existing opclass --- all you can do is drop it, which will result in
1640  * a useless but harmless dead entry in the cache. To support altering
1641  * opclass membership (not the same as opfamily membership!), we'd need to
1642  * be able to flush this cache as well as the contents of relcache entries
1643  * for indexes.
1644  */
1645 static OpClassCacheEnt *
1646 LookupOpclassInfo(Oid operatorClassOid,
1647  StrategyNumber numSupport)
1648 {
1649  OpClassCacheEnt *opcentry;
1650  bool found;
1651  Relation rel;
1652  SysScanDesc scan;
1653  ScanKeyData skey[3];
1654  HeapTuple htup;
1655  bool indexOK;
1656 
1657  if (OpClassCache == NULL)
1658  {
1659  /* First time through: initialize the opclass cache */
1660  HASHCTL ctl;
1661 
1662  /* Also make sure CacheMemoryContext exists */
1663  if (!CacheMemoryContext)
1665 
1666  ctl.keysize = sizeof(Oid);
1667  ctl.entrysize = sizeof(OpClassCacheEnt);
1668  OpClassCache = hash_create("Operator class cache", 64,
1669  &ctl, HASH_ELEM | HASH_BLOBS);
1670  }
1671 
1672  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1673  &operatorClassOid,
1674  HASH_ENTER, &found);
1675 
1676  if (!found)
1677  {
1678  /* Initialize new entry */
1679  opcentry->valid = false; /* until known OK */
1680  opcentry->numSupport = numSupport;
1681  opcentry->supportProcs = NULL; /* filled below */
1682  }
1683  else
1684  {
1685  Assert(numSupport == opcentry->numSupport);
1686  }
1687 
1688  /*
1689  * When aggressively testing cache-flush hazards, we disable the operator
1690  * class cache and force reloading of the info on each call. This models
1691  * no real-world behavior, since the cache entries are never invalidated
1692  * otherwise. However it can be helpful for detecting bugs in the cache
1693  * loading logic itself, such as reliance on a non-nailed index. Given
1694  * the limited use-case and the fact that this adds a great deal of
1695  * expense, we enable it only for high values of debug_discard_caches.
1696  */
1697 #ifdef DISCARD_CACHES_ENABLED
1698  if (debug_discard_caches > 2)
1699  opcentry->valid = false;
1700 #endif
1701 
1702  if (opcentry->valid)
1703  return opcentry;
1704 
1705  /*
1706  * Need to fill in new entry. First allocate space, unless we already did
1707  * so in some previous attempt.
1708  */
1709  if (opcentry->supportProcs == NULL && numSupport > 0)
1710  opcentry->supportProcs = (RegProcedure *)
1712  numSupport * sizeof(RegProcedure));
1713 
1714  /*
1715  * To avoid infinite recursion during startup, force heap scans if we're
1716  * looking up info for the opclasses used by the indexes we would like to
1717  * reference here.
1718  */
1719  indexOK = criticalRelcachesBuilt ||
1720  (operatorClassOid != OID_BTREE_OPS_OID &&
1721  operatorClassOid != INT2_BTREE_OPS_OID);
1722 
1723  /*
1724  * We have to fetch the pg_opclass row to determine its opfamily and
1725  * opcintype, which are needed to look up related operators and functions.
1726  * It'd be convenient to use the syscache here, but that probably doesn't
1727  * work while bootstrapping.
1728  */
1729  ScanKeyInit(&skey[0],
1730  Anum_pg_opclass_oid,
1731  BTEqualStrategyNumber, F_OIDEQ,
1732  ObjectIdGetDatum(operatorClassOid));
1733  rel = table_open(OperatorClassRelationId, AccessShareLock);
1734  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1735  NULL, 1, skey);
1736 
1737  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1738  {
1739  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1740 
1741  opcentry->opcfamily = opclassform->opcfamily;
1742  opcentry->opcintype = opclassform->opcintype;
1743  }
1744  else
1745  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1746 
1747  systable_endscan(scan);
1749 
1750  /*
1751  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1752  * the default ones (those with lefttype = righttype = opcintype).
1753  */
1754  if (numSupport > 0)
1755  {
1756  ScanKeyInit(&skey[0],
1757  Anum_pg_amproc_amprocfamily,
1758  BTEqualStrategyNumber, F_OIDEQ,
1759  ObjectIdGetDatum(opcentry->opcfamily));
1760  ScanKeyInit(&skey[1],
1761  Anum_pg_amproc_amproclefttype,
1762  BTEqualStrategyNumber, F_OIDEQ,
1763  ObjectIdGetDatum(opcentry->opcintype));
1764  ScanKeyInit(&skey[2],
1765  Anum_pg_amproc_amprocrighttype,
1766  BTEqualStrategyNumber, F_OIDEQ,
1767  ObjectIdGetDatum(opcentry->opcintype));
1768  rel = table_open(AccessMethodProcedureRelationId, AccessShareLock);
1769  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1770  NULL, 3, skey);
1771 
1772  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1773  {
1774  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1775 
1776  if (amprocform->amprocnum <= 0 ||
1777  (StrategyNumber) amprocform->amprocnum > numSupport)
1778  elog(ERROR, "invalid amproc number %d for opclass %u",
1779  amprocform->amprocnum, operatorClassOid);
1780 
1781  opcentry->supportProcs[amprocform->amprocnum - 1] =
1782  amprocform->amproc;
1783  }
1784 
1785  systable_endscan(scan);
1787  }
1788 
1789  opcentry->valid = true;
1790  return opcentry;
1791 }
1792 
1793 /*
1794  * Fill in the TableAmRoutine for a relation
1795  *
1796  * relation's rd_amhandler must be valid already.
1797  */
1798 static void
1800 {
1801  relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
1802 }
1803 
1804 /*
1805  * Initialize table access method support for a table like relation
1806  */
1807 void
1809 {
1810  HeapTuple tuple;
1811  Form_pg_am aform;
1812 
1813  if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
1814  {
1815  /*
1816  * Sequences are currently accessed like heap tables, but it doesn't
1817  * seem prudent to show that in the catalog. So just overwrite it
1818  * here.
1819  */
1820  Assert(relation->rd_rel->relam == InvalidOid);
1821  relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER;
1822  }
1823  else if (IsCatalogRelation(relation))
1824  {
1825  /*
1826  * Avoid doing a syscache lookup for catalog tables.
1827  */
1828  Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
1829  relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER;
1830  }
1831  else
1832  {
1833  /*
1834  * Look up the table access method, save the OID of its handler
1835  * function.
1836  */
1837  Assert(relation->rd_rel->relam != InvalidOid);
1838  tuple = SearchSysCache1(AMOID,
1839  ObjectIdGetDatum(relation->rd_rel->relam));
1840  if (!HeapTupleIsValid(tuple))
1841  elog(ERROR, "cache lookup failed for access method %u",
1842  relation->rd_rel->relam);
1843  aform = (Form_pg_am) GETSTRUCT(tuple);
1844  relation->rd_amhandler = aform->amhandler;
1845  ReleaseSysCache(tuple);
1846  }
1847 
1848  /*
1849  * Now we can fetch the table AM's API struct
1850  */
1851  InitTableAmRoutine(relation);
1852 }
1853 
1854 /*
1855  * formrdesc
1856  *
1857  * This is a special cut-down version of RelationBuildDesc(),
1858  * used while initializing the relcache.
1859  * The relation descriptor is built just from the supplied parameters,
1860  * without actually looking at any system table entries. We cheat
1861  * quite a lot since we only need to work for a few basic system
1862  * catalogs.
1863  *
1864  * The catalogs this is used for can't have constraints (except attnotnull),
1865  * default values, rules, or triggers, since we don't cope with any of that.
1866  * (Well, actually, this only matters for properties that need to be valid
1867  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1868  * these properties matter then...)
1869  *
1870  * NOTE: we assume we are already switched into CacheMemoryContext.
1871  */
1872 static void
1873 formrdesc(const char *relationName, Oid relationReltype,
1874  bool isshared,
1875  int natts, const FormData_pg_attribute *attrs)
1876 {
1877  Relation relation;
1878  int i;
1879  bool has_not_null;
1880 
1881  /*
1882  * allocate new relation desc, clear all fields of reldesc
1883  */
1884  relation = (Relation) palloc0(sizeof(RelationData));
1885 
1886  /* make sure relation is marked as having no open file yet */
1887  relation->rd_smgr = NULL;
1888 
1889  /*
1890  * initialize reference count: 1 because it is nailed in cache
1891  */
1892  relation->rd_refcnt = 1;
1893 
1894  /*
1895  * all entries built with this routine are nailed-in-cache; none are for
1896  * new or temp relations.
1897  */
1898  relation->rd_isnailed = true;
1903  relation->rd_backend = INVALID_PROC_NUMBER;
1904  relation->rd_islocaltemp = false;
1905 
1906  /*
1907  * initialize relation tuple form
1908  *
1909  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1910  * get us launched. RelationCacheInitializePhase3() will read the real
1911  * data from pg_class and replace what we've done here. Note in
1912  * particular that relowner is left as zero; this cues
1913  * RelationCacheInitializePhase3 that the real data isn't there yet.
1914  */
1916 
1917  namestrcpy(&relation->rd_rel->relname, relationName);
1918  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1919  relation->rd_rel->reltype = relationReltype;
1920 
1921  /*
1922  * It's important to distinguish between shared and non-shared relations,
1923  * even at bootstrap time, to make sure we know where they are stored.
1924  */
1925  relation->rd_rel->relisshared = isshared;
1926  if (isshared)
1927  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1928 
1929  /* formrdesc is used only for permanent relations */
1930  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1931 
1932  /* ... and they're always populated, too */
1933  relation->rd_rel->relispopulated = true;
1934 
1935  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1936  relation->rd_rel->relpages = 0;
1937  relation->rd_rel->reltuples = -1;
1938  relation->rd_rel->relallvisible = 0;
1939  relation->rd_rel->relkind = RELKIND_RELATION;
1940  relation->rd_rel->relnatts = (int16) natts;
1941  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1942 
1943  /*
1944  * initialize attribute tuple form
1945  *
1946  * Unlike the case with the relation tuple, this data had better be right
1947  * because it will never be replaced. The data comes from
1948  * src/include/catalog/ headers via genbki.pl.
1949  */
1950  relation->rd_att = CreateTemplateTupleDesc(natts);
1951  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1952 
1953  relation->rd_att->tdtypeid = relationReltype;
1954  relation->rd_att->tdtypmod = -1; /* just to be sure */
1955 
1956  /*
1957  * initialize tuple desc info
1958  */
1959  has_not_null = false;
1960  for (i = 0; i < natts; i++)
1961  {
1962  memcpy(TupleDescAttr(relation->rd_att, i),
1963  &attrs[i],
1965  has_not_null |= attrs[i].attnotnull;
1966  /* make sure attcacheoff is valid */
1967  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1968  }
1969 
1970  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1971  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1972 
1973  /* mark not-null status */
1974  if (has_not_null)
1975  {
1976  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1977 
1978  constr->has_not_null = true;
1979  relation->rd_att->constr = constr;
1980  }
1981 
1982  /*
1983  * initialize relation id from info in att array (my, this is ugly)
1984  */
1985  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
1986 
1987  /*
1988  * All relations made with formrdesc are mapped. This is necessarily so
1989  * because there is no other way to know what filenumber they currently
1990  * have. In bootstrap mode, add them to the initial relation mapper data,
1991  * specifying that the initial filenumber is the same as the OID.
1992  */
1993  relation->rd_rel->relfilenode = InvalidRelFileNumber;
1996  RelationGetRelid(relation),
1997  isshared, true);
1998 
1999  /*
2000  * initialize the relation lock manager information
2001  */
2002  RelationInitLockInfo(relation); /* see lmgr.c */
2003 
2004  /*
2005  * initialize physical addressing information for the relation
2006  */
2007  RelationInitPhysicalAddr(relation);
2008 
2009  /*
2010  * initialize the table am handler
2011  */
2012  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
2013  relation->rd_tableam = GetHeapamTableAmRoutine();
2014 
2015  /*
2016  * initialize the rel-has-index flag, using hardwired knowledge
2017  */
2019  {
2020  /* In bootstrap mode, we have no indexes */
2021  relation->rd_rel->relhasindex = false;
2022  }
2023  else
2024  {
2025  /* Otherwise, all the rels formrdesc is used for have indexes */
2026  relation->rd_rel->relhasindex = true;
2027  }
2028 
2029  /*
2030  * add new reldesc to relcache
2031  */
2032  RelationCacheInsert(relation, false);
2033 
2034  /* It's fully valid */
2035  relation->rd_isvalid = true;
2036 }
2037 
2038 
2039 /* ----------------------------------------------------------------
2040  * Relation Descriptor Lookup Interface
2041  * ----------------------------------------------------------------
2042  */
2043 
2044 /*
2045  * RelationIdGetRelation
2046  *
2047  * Lookup a reldesc by OID; make one if not already in cache.
2048  *
2049  * Returns NULL if no pg_class row could be found for the given relid
2050  * (suggesting we are trying to access a just-deleted relation).
2051  * Any other error is reported via elog.
2052  *
2053  * NB: caller should already have at least AccessShareLock on the
2054  * relation ID, else there are nasty race conditions.
2055  *
2056  * NB: relation ref count is incremented, or set to 1 if new entry.
2057  * Caller should eventually decrement count. (Usually,
2058  * that happens by calling RelationClose().)
2059  */
2060 Relation
2062 {
2063  Relation rd;
2064 
2065  /* Make sure we're in an xact, even if this ends up being a cache hit */
2067 
2068  /*
2069  * first try to find reldesc in the cache
2070  */
2071  RelationIdCacheLookup(relationId, rd);
2072 
2073  if (RelationIsValid(rd))
2074  {
2075  /* return NULL for dropped relations */
2077  {
2078  Assert(!rd->rd_isvalid);
2079  return NULL;
2080  }
2081 
2083  /* revalidate cache entry if necessary */
2084  if (!rd->rd_isvalid)
2085  {
2086  /*
2087  * Indexes only have a limited number of possible schema changes,
2088  * and we don't want to use the full-blown procedure because it's
2089  * a headache for indexes that reload itself depends on.
2090  */
2091  if (rd->rd_rel->relkind == RELKIND_INDEX ||
2092  rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2094  else
2095  RelationClearRelation(rd, true);
2096 
2097  /*
2098  * Normally entries need to be valid here, but before the relcache
2099  * has been initialized, not enough infrastructure exists to
2100  * perform pg_class lookups. The structure of such entries doesn't
2101  * change, but we still want to update the rd_rel entry. So
2102  * rd_isvalid = false is left in place for a later lookup.
2103  */
2104  Assert(rd->rd_isvalid ||
2106  }
2107  return rd;
2108  }
2109 
2110  /*
2111  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2112  * it.
2113  */
2114  rd = RelationBuildDesc(relationId, true);
2115  if (RelationIsValid(rd))
2117  return rd;
2118 }
2119 
2120 /* ----------------------------------------------------------------
2121  * cache invalidation support routines
2122  * ----------------------------------------------------------------
2123  */
2124 
2125 /* ResourceOwner callbacks to track relcache references */
2126 static void ResOwnerReleaseRelation(Datum res);
2127 static char *ResOwnerPrintRelCache(Datum res);
2128 
2130 {
2131  .name = "relcache reference",
2132  .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS,
2133  .release_priority = RELEASE_PRIO_RELCACHE_REFS,
2134  .ReleaseResource = ResOwnerReleaseRelation,
2135  .DebugPrint = ResOwnerPrintRelCache
2136 };
2137 
2138 /* Convenience wrappers over ResourceOwnerRemember/Forget */
2139 static inline void
2141 {
2143 }
2144 static inline void
2146 {
2148 }
2149 
2150 /*
2151  * RelationIncrementReferenceCount
2152  * Increments relation reference count.
2153  *
2154  * Note: bootstrap mode has its own weird ideas about relation refcount
2155  * behavior; we ought to fix it someday, but for now, just disable
2156  * reference count ownership tracking in bootstrap mode.
2157  */
2158 void
2160 {
2162  rel->rd_refcnt += 1;
2165 }
2166 
2167 /*
2168  * RelationDecrementReferenceCount
2169  * Decrements relation reference count.
2170  */
2171 void
2173 {
2174  Assert(rel->rd_refcnt > 0);
2175  rel->rd_refcnt -= 1;
2178 }
2179 
2180 /*
2181  * RelationClose - close an open relation
2182  *
2183  * Actually, we just decrement the refcount.
2184  *
2185  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2186  * will be freed as soon as their refcount goes to zero. In combination
2187  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2188  * to catch references to already-released relcache entries. It slows
2189  * things down quite a bit, however.
2190  */
2191 void
2193 {
2194  /* Note: no locking manipulations needed */
2196 
2197  RelationCloseCleanup(relation);
2198 }
2199 
2200 static void
2202 {
2203  /*
2204  * If the relation is no longer open in this session, we can clean up any
2205  * stale partition descriptors it has. This is unlikely, so check to see
2206  * if there are child contexts before expending a call to mcxt.c.
2207  */
2208  if (RelationHasReferenceCountZero(relation))
2209  {
2210  if (relation->rd_pdcxt != NULL &&
2211  relation->rd_pdcxt->firstchild != NULL)
2213 
2214  if (relation->rd_pddcxt != NULL &&
2215  relation->rd_pddcxt->firstchild != NULL)
2217  }
2218 
2219 #ifdef RELCACHE_FORCE_RELEASE
2220  if (RelationHasReferenceCountZero(relation) &&
2221  relation->rd_createSubid == InvalidSubTransactionId &&
2223  RelationClearRelation(relation, false);
2224 #endif
2225 }
2226 
2227 /*
2228  * RelationReloadIndexInfo - reload minimal information for an open index
2229  *
2230  * This function is used only for indexes. A relcache inval on an index
2231  * can mean that its pg_class or pg_index row changed. There are only
2232  * very limited changes that are allowed to an existing index's schema,
2233  * so we can update the relcache entry without a complete rebuild; which
2234  * is fortunate because we can't rebuild an index entry that is "nailed"
2235  * and/or in active use. We support full replacement of the pg_class row,
2236  * as well as updates of a few simple fields of the pg_index row.
2237  *
2238  * We can't necessarily reread the catalog rows right away; we might be
2239  * in a failed transaction when we receive the SI notification. If so,
2240  * RelationClearRelation just marks the entry as invalid by setting
2241  * rd_isvalid to false. This routine is called to fix the entry when it
2242  * is next needed.
2243  *
2244  * We assume that at the time we are called, we have at least AccessShareLock
2245  * on the target index. (Note: in the calls from RelationClearRelation,
2246  * this is legitimate because we know the rel has positive refcount.)
2247  *
2248  * If the target index is an index on pg_class or pg_index, we'd better have
2249  * previously gotten at least AccessShareLock on its underlying catalog,
2250  * else we are at risk of deadlock against someone trying to exclusive-lock
2251  * the heap and index in that order. This is ensured in current usage by
2252  * only applying this to indexes being opened or having positive refcount.
2253  */
2254 static void
2256 {
2257  bool indexOK;
2258  HeapTuple pg_class_tuple;
2259  Form_pg_class relp;
2260 
2261  /* Should be called only for invalidated, live indexes */
2262  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
2263  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2264  !relation->rd_isvalid &&
2266 
2267  /* Ensure it's closed at smgr level */
2268  RelationCloseSmgr(relation);
2269 
2270  /* Must free any AM cached data upon relcache flush */
2271  if (relation->rd_amcache)
2272  pfree(relation->rd_amcache);
2273  relation->rd_amcache = NULL;
2274 
2275  /*
2276  * If it's a shared index, we might be called before backend startup has
2277  * finished selecting a database, in which case we have no way to read
2278  * pg_class yet. However, a shared index can never have any significant
2279  * schema updates, so it's okay to ignore the invalidation signal. Just
2280  * mark it valid and return without doing anything more.
2281  */
2282  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2283  {
2284  relation->rd_isvalid = true;
2285  return;
2286  }
2287 
2288  /*
2289  * Read the pg_class row
2290  *
2291  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2292  * for pg_class_oid_index ...
2293  */
2294  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2295  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2296  if (!HeapTupleIsValid(pg_class_tuple))
2297  elog(ERROR, "could not find pg_class tuple for index %u",
2298  RelationGetRelid(relation));
2299  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2300  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2301  /* Reload reloptions in case they changed */
2302  if (relation->rd_options)
2303  pfree(relation->rd_options);
2304  RelationParseRelOptions(relation, pg_class_tuple);
2305  /* done with pg_class tuple */
2306  heap_freetuple(pg_class_tuple);
2307  /* We must recalculate physical address in case it changed */
2308  RelationInitPhysicalAddr(relation);
2309 
2310  /*
2311  * For a non-system index, there are fields of the pg_index row that are
2312  * allowed to change, so re-read that row and update the relcache entry.
2313  * Most of the info derived from pg_index (such as support function lookup
2314  * info) cannot change, and indeed the whole point of this routine is to
2315  * update the relcache entry without clobbering that data; so wholesale
2316  * replacement is not appropriate.
2317  */
2318  if (!IsSystemRelation(relation))
2319  {
2320  HeapTuple tuple;
2322 
2323  tuple = SearchSysCache1(INDEXRELID,
2324  ObjectIdGetDatum(RelationGetRelid(relation)));
2325  if (!HeapTupleIsValid(tuple))
2326  elog(ERROR, "cache lookup failed for index %u",
2327  RelationGetRelid(relation));
2328  index = (Form_pg_index) GETSTRUCT(tuple);
2329 
2330  /*
2331  * Basically, let's just copy all the bool fields. There are one or
2332  * two of these that can't actually change in the current code, but
2333  * it's not worth it to track exactly which ones they are. None of
2334  * the array fields are allowed to change, though.
2335  */
2336  relation->rd_index->indisunique = index->indisunique;
2337  relation->rd_index->indnullsnotdistinct = index->indnullsnotdistinct;
2338  relation->rd_index->indisprimary = index->indisprimary;
2339  relation->rd_index->indisexclusion = index->indisexclusion;
2340  relation->rd_index->indimmediate = index->indimmediate;
2341  relation->rd_index->indisclustered = index->indisclustered;
2342  relation->rd_index->indisvalid = index->indisvalid;
2343  relation->rd_index->indcheckxmin = index->indcheckxmin;
2344  relation->rd_index->indisready = index->indisready;
2345  relation->rd_index->indislive = index->indislive;
2346  relation->rd_index->indisreplident = index->indisreplident;
2347 
2348  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2350  HeapTupleHeaderGetXmin(tuple->t_data));
2351 
2352  ReleaseSysCache(tuple);
2353  }
2354 
2355  /* Okay, now it's valid again */
2356  relation->rd_isvalid = true;
2357 }
2358 
2359 /*
2360  * RelationReloadNailed - reload minimal information for nailed relations.
2361  *
2362  * The structure of a nailed relation can never change (which is good, because
2363  * we rely on knowing their structure to be able to read catalog content). But
2364  * some parts, e.g. pg_class.relfrozenxid, are still important to have
2365  * accurate content for. Therefore those need to be reloaded after the arrival
2366  * of invalidations.
2367  */
2368 static void
2370 {
2371  Assert(relation->rd_isnailed);
2372 
2373  /*
2374  * Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2375  * mapping changed.
2376  */
2377  RelationInitPhysicalAddr(relation);
2378 
2379  /* flag as needing to be revalidated */
2380  relation->rd_isvalid = false;
2381 
2382  /*
2383  * Can only reread catalog contents if in a transaction. If the relation
2384  * is currently open (not counting the nailed refcount), do so
2385  * immediately. Otherwise we've already marked the entry as possibly
2386  * invalid, and it'll be fixed when next opened.
2387  */
2388  if (!IsTransactionState() || relation->rd_refcnt <= 1)
2389  return;
2390 
2391  if (relation->rd_rel->relkind == RELKIND_INDEX)
2392  {
2393  /*
2394  * If it's a nailed-but-not-mapped index, then we need to re-read the
2395  * pg_class row to see if its relfilenumber changed.
2396  */
2397  RelationReloadIndexInfo(relation);
2398  }
2399  else
2400  {
2401  /*
2402  * Reload a non-index entry. We can't easily do so if relcaches
2403  * aren't yet built, but that's fine because at that stage the
2404  * attributes that need to be current (like relfrozenxid) aren't yet
2405  * accessed. To ensure the entry will later be revalidated, we leave
2406  * it in invalid state, but allow use (cf. RelationIdGetRelation()).
2407  */
2409  {
2410  HeapTuple pg_class_tuple;
2411  Form_pg_class relp;
2412 
2413  /*
2414  * NB: Mark the entry as valid before starting to scan, to avoid
2415  * self-recursion when re-building pg_class.
2416  */
2417  relation->rd_isvalid = true;
2418 
2419  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2420  true, false);
2421  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2422  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2423  heap_freetuple(pg_class_tuple);
2424 
2425  /*
2426  * Again mark as valid, to protect against concurrently arriving
2427  * invalidations.
2428  */
2429  relation->rd_isvalid = true;
2430  }
2431  }
2432 }
2433 
2434 /*
2435  * RelationDestroyRelation
2436  *
2437  * Physically delete a relation cache entry and all subsidiary data.
2438  * Caller must already have unhooked the entry from the hash table.
2439  */
2440 static void
2441 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2442 {
2444 
2445  /*
2446  * Make sure smgr and lower levels close the relation's files, if they
2447  * weren't closed already. (This was probably done by caller, but let's
2448  * just be real sure.)
2449  */
2450  RelationCloseSmgr(relation);
2451 
2452  /* break mutual link with stats entry */
2453  pgstat_unlink_relation(relation);
2454 
2455  /*
2456  * Free all the subsidiary data structures of the relcache entry, then the
2457  * entry itself.
2458  */
2459  if (relation->rd_rel)
2460  pfree(relation->rd_rel);
2461  /* can't use DecrTupleDescRefCount here */
2462  Assert(relation->rd_att->tdrefcount > 0);
2463  if (--relation->rd_att->tdrefcount == 0)
2464  {
2465  /*
2466  * If we Rebuilt a relcache entry during a transaction then its
2467  * possible we did that because the TupDesc changed as the result of
2468  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2469  * possible someone copied that TupDesc, in which case the copy would
2470  * point to free'd memory. So if we rebuild an entry we keep the
2471  * TupDesc around until end of transaction, to be safe.
2472  */
2473  if (remember_tupdesc)
2475  else
2476  FreeTupleDesc(relation->rd_att);
2477  }
2478  FreeTriggerDesc(relation->trigdesc);
2479  list_free_deep(relation->rd_fkeylist);
2480  list_free(relation->rd_indexlist);
2481  list_free(relation->rd_statlist);
2482  bms_free(relation->rd_keyattr);
2483  bms_free(relation->rd_pkattr);
2484  bms_free(relation->rd_idattr);
2485  bms_free(relation->rd_hotblockingattr);
2486  bms_free(relation->rd_summarizedattr);
2487  if (relation->rd_pubdesc)
2488  pfree(relation->rd_pubdesc);
2489  if (relation->rd_options)
2490  pfree(relation->rd_options);
2491  if (relation->rd_indextuple)
2492  pfree(relation->rd_indextuple);
2493  if (relation->rd_amcache)
2494  pfree(relation->rd_amcache);
2495  if (relation->rd_fdwroutine)
2496  pfree(relation->rd_fdwroutine);
2497  if (relation->rd_indexcxt)
2498  MemoryContextDelete(relation->rd_indexcxt);
2499  if (relation->rd_rulescxt)
2500  MemoryContextDelete(relation->rd_rulescxt);
2501  if (relation->rd_rsdesc)
2502  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2503  if (relation->rd_partkeycxt)
2505  if (relation->rd_pdcxt)
2506  MemoryContextDelete(relation->rd_pdcxt);
2507  if (relation->rd_pddcxt)
2508  MemoryContextDelete(relation->rd_pddcxt);
2509  if (relation->rd_partcheckcxt)
2511  pfree(relation);
2512 }
2513 
2514 /*
2515  * RelationInvalidateRelation - mark a relation cache entry as invalid
2516  *
2517  * An entry that's marked as invalid will be reloaded on next access.
2518  */
2519 static void
2521 {
2522  /*
2523  * Make sure smgr and lower levels close the relation's files, if they
2524  * weren't closed already. If the relation is not getting deleted, the
2525  * next smgr access should reopen the files automatically. This ensures
2526  * that the low-level file access state is updated after, say, a vacuum
2527  * truncation.
2528  */
2529  RelationCloseSmgr(relation);
2530 
2531  /* Free AM cached data, if any */
2532  if (relation->rd_amcache)
2533  pfree(relation->rd_amcache);
2534  relation->rd_amcache = NULL;
2535 
2536  relation->rd_isvalid = false;
2537 }
2538 
2539 /*
2540  * RelationClearRelation
2541  *
2542  * Physically blow away a relation cache entry, or reset it and rebuild
2543  * it from scratch (that is, from catalog entries). The latter path is
2544  * used when we are notified of a change to an open relation (one with
2545  * refcount > 0).
2546  *
2547  * NB: when rebuilding, we'd better hold some lock on the relation,
2548  * else the catalog data we need to read could be changing under us.
2549  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2550  * a sinval reset could happen while we're accessing the catalogs, and
2551  * the rel would get blown away underneath us by RelationCacheInvalidate
2552  * if it has zero refcnt.
2553  *
2554  * The "rebuild" parameter is redundant in current usage because it has
2555  * to match the relation's refcnt status, but we keep it as a crosscheck
2556  * that we're doing what the caller expects.
2557  */
2558 static void
2559 RelationClearRelation(Relation relation, bool rebuild)
2560 {
2561  /*
2562  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2563  * course it would be an equally bad idea to blow away one with nonzero
2564  * refcnt, since that would leave someone somewhere with a dangling
2565  * pointer. All callers are expected to have verified that this holds.
2566  */
2567  Assert(rebuild ?
2568  !RelationHasReferenceCountZero(relation) :
2569  RelationHasReferenceCountZero(relation));
2570 
2571  /*
2572  * Make sure smgr and lower levels close the relation's files, if they
2573  * weren't closed already. If the relation is not getting deleted, the
2574  * next smgr access should reopen the files automatically. This ensures
2575  * that the low-level file access state is updated after, say, a vacuum
2576  * truncation.
2577  */
2578  RelationCloseSmgr(relation);
2579 
2580  /* Free AM cached data, if any */
2581  if (relation->rd_amcache)
2582  pfree(relation->rd_amcache);
2583  relation->rd_amcache = NULL;
2584 
2585  /*
2586  * Treat nailed-in system relations separately, they always need to be
2587  * accessible, so we can't blow them away.
2588  */
2589  if (relation->rd_isnailed)
2590  {
2591  RelationReloadNailed(relation);
2592  return;
2593  }
2594 
2595  /* Mark it invalid until we've finished rebuild */
2596  relation->rd_isvalid = false;
2597 
2598  /* See RelationForgetRelation(). */
2599  if (relation->rd_droppedSubid != InvalidSubTransactionId)
2600  return;
2601 
2602  /*
2603  * Even non-system indexes should not be blown away if they are open and
2604  * have valid index support information. This avoids problems with active
2605  * use of the index support information. As with nailed indexes, we
2606  * re-read the pg_class row to handle possible physical relocation of the
2607  * index, and we check for pg_index updates too.
2608  */
2609  if ((relation->rd_rel->relkind == RELKIND_INDEX ||
2610  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2611  relation->rd_refcnt > 0 &&
2612  relation->rd_indexcxt != NULL)
2613  {
2614  if (IsTransactionState())
2615  RelationReloadIndexInfo(relation);
2616  return;
2617  }
2618 
2619  /*
2620  * If we're really done with the relcache entry, blow it away. But if
2621  * someone is still using it, reconstruct the whole deal without moving
2622  * the physical RelationData record (so that the someone's pointer is
2623  * still valid).
2624  */
2625  if (!rebuild)
2626  {
2627  /* Remove it from the hash table */
2628  RelationCacheDelete(relation);
2629 
2630  /* And release storage */
2631  RelationDestroyRelation(relation, false);
2632  }
2633  else if (!IsTransactionState())
2634  {
2635  /*
2636  * If we're not inside a valid transaction, we can't do any catalog
2637  * access so it's not possible to rebuild yet. Just exit, leaving
2638  * rd_isvalid = false so that the rebuild will occur when the entry is
2639  * next opened.
2640  *
2641  * Note: it's possible that we come here during subtransaction abort,
2642  * and the reason for wanting to rebuild is that the rel is open in
2643  * the outer transaction. In that case it might seem unsafe to not
2644  * rebuild immediately, since whatever code has the rel already open
2645  * will keep on using the relcache entry as-is. However, in such a
2646  * case the outer transaction should be holding a lock that's
2647  * sufficient to prevent any significant change in the rel's schema,
2648  * so the existing entry contents should be good enough for its
2649  * purposes; at worst we might be behind on statistics updates or the
2650  * like. (See also CheckTableNotInUse() and its callers.) These same
2651  * remarks also apply to the cases above where we exit without having
2652  * done RelationReloadIndexInfo() yet.
2653  */
2654  return;
2655  }
2656  else
2657  {
2658  /*
2659  * Our strategy for rebuilding an open relcache entry is to build a
2660  * new entry from scratch, swap its contents with the old entry, and
2661  * finally delete the new entry (along with any infrastructure swapped
2662  * over from the old entry). This is to avoid trouble in case an
2663  * error causes us to lose control partway through. The old entry
2664  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2665  * on next access. Meanwhile it's not any less valid than it was
2666  * before, so any code that might expect to continue accessing it
2667  * isn't hurt by the rebuild failure. (Consider for example a
2668  * subtransaction that ALTERs a table and then gets canceled partway
2669  * through the cache entry rebuild. The outer transaction should
2670  * still see the not-modified cache entry as valid.) The worst
2671  * consequence of an error is leaking the necessarily-unreferenced new
2672  * entry, and this shouldn't happen often enough for that to be a big
2673  * problem.
2674  *
2675  * When rebuilding an open relcache entry, we must preserve ref count,
2676  * rd_*Subid, and rd_toastoid state. Also attempt to preserve the
2677  * pg_class entry (rd_rel), tupledesc, rewrite-rule, partition key,
2678  * and partition descriptor substructures in place, because various
2679  * places assume that these structures won't move while they are
2680  * working with an open relcache entry. (Note: the refcount
2681  * mechanism for tupledescs might someday allow us to remove this hack
2682  * for the tupledesc.)
2683  *
2684  * Note that this process does not touch CurrentResourceOwner; which
2685  * is good because whatever ref counts the entry may have do not
2686  * necessarily belong to that resource owner.
2687  */
2688  Relation newrel;
2689  Oid save_relid = RelationGetRelid(relation);
2690  bool keep_tupdesc;
2691  bool keep_rules;
2692  bool keep_policies;
2693  bool keep_partkey;
2694 
2695  /* Build temporary entry, but don't link it into hashtable */
2696  newrel = RelationBuildDesc(save_relid, false);
2697 
2698  /*
2699  * Between here and the end of the swap, don't add code that does or
2700  * reasonably could read system catalogs. That range must be free
2701  * from invalidation processing. See RelationBuildDesc() manipulation
2702  * of in_progress_list.
2703  */
2704 
2705  if (newrel == NULL)
2706  {
2707  /*
2708  * We can validly get here, if we're using a historic snapshot in
2709  * which a relation, accessed from outside logical decoding, is
2710  * still invisible. In that case it's fine to just mark the
2711  * relation as invalid and return - it'll fully get reloaded by
2712  * the cache reset at the end of logical decoding (or at the next
2713  * access). During normal processing we don't want to ignore this
2714  * case as it shouldn't happen there, as explained below.
2715  */
2716  if (HistoricSnapshotActive())
2717  return;
2718 
2719  /*
2720  * This shouldn't happen as dropping a relation is intended to be
2721  * impossible if still referenced (cf. CheckTableNotInUse()). But
2722  * if we get here anyway, we can't just delete the relcache entry,
2723  * as it possibly could get accessed later (as e.g. the error
2724  * might get trapped and handled via a subtransaction rollback).
2725  */
2726  elog(ERROR, "relation %u deleted while still in use", save_relid);
2727  }
2728 
2729  /*
2730  * If we were to, again, have cases of the relkind of a relcache entry
2731  * changing, we would need to ensure that pgstats does not get
2732  * confused.
2733  */
2734  Assert(relation->rd_rel->relkind == newrel->rd_rel->relkind);
2735 
2736  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2737  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2738  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2739  /* partkey is immutable once set up, so we can always keep it */
2740  keep_partkey = (relation->rd_partkey != NULL);
2741 
2742  /*
2743  * Perform swapping of the relcache entry contents. Within this
2744  * process the old entry is momentarily invalid, so there *must* be no
2745  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2746  * all-in-line code for safety.
2747  *
2748  * Since the vast majority of fields should be swapped, our method is
2749  * to swap the whole structures and then re-swap those few fields we
2750  * didn't want swapped.
2751  */
2752 #define SWAPFIELD(fldtype, fldname) \
2753  do { \
2754  fldtype _tmp = newrel->fldname; \
2755  newrel->fldname = relation->fldname; \
2756  relation->fldname = _tmp; \
2757  } while (0)
2758 
2759  /* swap all Relation struct fields */
2760  {
2761  RelationData tmpstruct;
2762 
2763  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2764  memcpy(newrel, relation, sizeof(RelationData));
2765  memcpy(relation, &tmpstruct, sizeof(RelationData));
2766  }
2767 
2768  /* rd_smgr must not be swapped, due to back-links from smgr level */
2769  SWAPFIELD(SMgrRelation, rd_smgr);
2770  /* rd_refcnt must be preserved */
2771  SWAPFIELD(int, rd_refcnt);
2772  /* isnailed shouldn't change */
2773  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2774  /* creation sub-XIDs must be preserved */
2775  SWAPFIELD(SubTransactionId, rd_createSubid);
2776  SWAPFIELD(SubTransactionId, rd_newRelfilelocatorSubid);
2777  SWAPFIELD(SubTransactionId, rd_firstRelfilelocatorSubid);
2778  SWAPFIELD(SubTransactionId, rd_droppedSubid);
2779  /* un-swap rd_rel pointers, swap contents instead */
2780  SWAPFIELD(Form_pg_class, rd_rel);
2781  /* ... but actually, we don't have to update newrel->rd_rel */
2782  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2783  /* preserve old tupledesc, rules, policies if no logical change */
2784  if (keep_tupdesc)
2785  SWAPFIELD(TupleDesc, rd_att);
2786  if (keep_rules)
2787  {
2788  SWAPFIELD(RuleLock *, rd_rules);
2789  SWAPFIELD(MemoryContext, rd_rulescxt);
2790  }
2791  if (keep_policies)
2792  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2793  /* toast OID override must be preserved */
2794  SWAPFIELD(Oid, rd_toastoid);
2795  /* pgstat_info / enabled must be preserved */
2796  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2797  SWAPFIELD(bool, pgstat_enabled);
2798  /* preserve old partition key if we have one */
2799  if (keep_partkey)
2800  {
2801  SWAPFIELD(PartitionKey, rd_partkey);
2802  SWAPFIELD(MemoryContext, rd_partkeycxt);
2803  }
2804  if (newrel->rd_pdcxt != NULL || newrel->rd_pddcxt != NULL)
2805  {
2806  /*
2807  * We are rebuilding a partitioned relation with a non-zero
2808  * reference count, so we must keep the old partition descriptor
2809  * around, in case there's a PartitionDirectory with a pointer to
2810  * it. This means we can't free the old rd_pdcxt yet. (This is
2811  * necessary because RelationGetPartitionDesc hands out direct
2812  * pointers to the relcache's data structure, unlike our usual
2813  * practice which is to hand out copies. We'd have the same
2814  * problem with rd_partkey, except that we always preserve that
2815  * once created.)
2816  *
2817  * To ensure that it's not leaked completely, re-attach it to the
2818  * new reldesc, or make it a child of the new reldesc's rd_pdcxt
2819  * in the unlikely event that there is one already. (Compare hack
2820  * in RelationBuildPartitionDesc.) RelationClose will clean up
2821  * any such contexts once the reference count reaches zero.
2822  *
2823  * In the case where the reference count is zero, this code is not
2824  * reached, which should be OK because in that case there should
2825  * be no PartitionDirectory with a pointer to the old entry.
2826  *
2827  * Note that newrel and relation have already been swapped, so the
2828  * "old" partition descriptor is actually the one hanging off of
2829  * newrel.
2830  */
2831  relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */
2832  relation->rd_partdesc_nodetached = NULL;
2834  if (relation->rd_pdcxt != NULL) /* probably never happens */
2835  MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
2836  else
2837  relation->rd_pdcxt = newrel->rd_pdcxt;
2838  if (relation->rd_pddcxt != NULL)
2839  MemoryContextSetParent(newrel->rd_pddcxt, relation->rd_pddcxt);
2840  else
2841  relation->rd_pddcxt = newrel->rd_pddcxt;
2842  /* drop newrel's pointers so we don't destroy it below */
2843  newrel->rd_partdesc = NULL;
2844  newrel->rd_partdesc_nodetached = NULL;
2846  newrel->rd_pdcxt = NULL;
2847  newrel->rd_pddcxt = NULL;
2848  }
2849 
2850 #undef SWAPFIELD
2851 
2852  /* And now we can throw away the temporary entry */
2853  RelationDestroyRelation(newrel, !keep_tupdesc);
2854  }
2855 }
2856 
2857 /*
2858  * RelationFlushRelation
2859  *
2860  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2861  * This is used when we receive a cache invalidation event for the rel.
2862  */
2863 static void
2865 {
2866  if (relation->rd_createSubid != InvalidSubTransactionId ||
2868  {
2869  /*
2870  * New relcache entries are always rebuilt, not flushed; else we'd
2871  * forget the "new" status of the relation. Ditto for the
2872  * new-relfilenumber status.
2873  */
2875  {
2876  /*
2877  * The rel could have zero refcnt here, so temporarily increment
2878  * the refcnt to ensure it's safe to rebuild it. We can assume
2879  * that the current transaction has some lock on the rel already.
2880  */
2882  RelationClearRelation(relation, true);
2884  }
2885  else
2886  {
2887  /*
2888  * During abort processing, the current resource owner is not
2889  * valid and we cannot hold a refcnt. Without a valid
2890  * transaction, RelationClearRelation() would just mark the rel as
2891  * invalid anyway, so we can do the same directly.
2892  */
2893  RelationInvalidateRelation(relation);
2894  }
2895  }
2896  else
2897  {
2898  /*
2899  * Pre-existing rels can be dropped from the relcache if not open.
2900  */
2901  bool rebuild = !RelationHasReferenceCountZero(relation);
2902 
2903  RelationClearRelation(relation, rebuild);
2904  }
2905 }
2906 
2907 /*
2908  * RelationForgetRelation - caller reports that it dropped the relation
2909  */
2910 void
2912 {
2913  Relation relation;
2914 
2915  RelationIdCacheLookup(rid, relation);
2916 
2917  if (!PointerIsValid(relation))
2918  return; /* not in cache, nothing to do */
2919 
2920  if (!RelationHasReferenceCountZero(relation))
2921  elog(ERROR, "relation %u is still open", rid);
2922 
2924  if (relation->rd_createSubid != InvalidSubTransactionId ||
2926  {
2927  /*
2928  * In the event of subtransaction rollback, we must not forget
2929  * rd_*Subid. Mark the entry "dropped" so RelationClearRelation()
2930  * invalidates it in lieu of destroying it. (If we're in a top
2931  * transaction, we could opt to destroy the entry.)
2932  */
2934  }
2935 
2936  RelationClearRelation(relation, false);
2937 }
2938 
2939 /*
2940  * RelationCacheInvalidateEntry
2941  *
2942  * This routine is invoked for SI cache flush messages.
2943  *
2944  * Any relcache entry matching the relid must be flushed. (Note: caller has
2945  * already determined that the relid belongs to our database or is a shared
2946  * relation.)
2947  *
2948  * We used to skip local relations, on the grounds that they could
2949  * not be targets of cross-backend SI update messages; but it seems
2950  * safer to process them, so that our *own* SI update messages will
2951  * have the same effects during CommandCounterIncrement for both
2952  * local and nonlocal relations.
2953  */
2954 void
2956 {
2957  Relation relation;
2958 
2959  RelationIdCacheLookup(relationId, relation);
2960 
2961  if (PointerIsValid(relation))
2962  {
2964  RelationFlushRelation(relation);
2965  }
2966  else
2967  {
2968  int i;
2969 
2970  for (i = 0; i < in_progress_list_len; i++)
2971  if (in_progress_list[i].reloid == relationId)
2972  in_progress_list[i].invalidated = true;
2973  }
2974 }
2975 
2976 /*
2977  * RelationCacheInvalidate
2978  * Blow away cached relation descriptors that have zero reference counts,
2979  * and rebuild those with positive reference counts. Also reset the smgr
2980  * relation cache and re-read relation mapping data.
2981  *
2982  * Apart from debug_discard_caches, this is currently used only to recover
2983  * from SI message buffer overflow, so we do not touch relations having
2984  * new-in-transaction relfilenumbers; they cannot be targets of cross-backend
2985  * SI updates (and our own updates now go through a separate linked list
2986  * that isn't limited by the SI message buffer size).
2987  *
2988  * We do this in two phases: the first pass deletes deletable items, and
2989  * the second one rebuilds the rebuildable items. This is essential for
2990  * safety, because hash_seq_search only copes with concurrent deletion of
2991  * the element it is currently visiting. If a second SI overflow were to
2992  * occur while we are walking the table, resulting in recursive entry to
2993  * this routine, we could crash because the inner invocation blows away
2994  * the entry next to be visited by the outer scan. But this way is OK,
2995  * because (a) during the first pass we won't process any more SI messages,
2996  * so hash_seq_search will complete safely; (b) during the second pass we
2997  * only hold onto pointers to nondeletable entries.
2998  *
2999  * The two-phase approach also makes it easy to update relfilenumbers for
3000  * mapped relations before we do anything else, and to ensure that the
3001  * second pass processes nailed-in-cache items before other nondeletable
3002  * items. This should ensure that system catalogs are up to date before
3003  * we attempt to use them to reload information about other open relations.
3004  *
3005  * After those two phases of work having immediate effects, we normally
3006  * signal any RelationBuildDesc() on the stack to start over. However, we
3007  * don't do this if called as part of debug_discard_caches. Otherwise,
3008  * RelationBuildDesc() would become an infinite loop.
3009  */
3010 void
3011 RelationCacheInvalidate(bool debug_discard)
3012 {
3013  HASH_SEQ_STATUS status;
3014  RelIdCacheEnt *idhentry;
3015  Relation relation;
3016  List *rebuildFirstList = NIL;
3017  List *rebuildList = NIL;
3018  ListCell *l;
3019  int i;
3020 
3021  /*
3022  * Reload relation mapping data before starting to reconstruct cache.
3023  */
3025 
3026  /* Phase 1 */
3027  hash_seq_init(&status, RelationIdCache);
3028 
3029  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3030  {
3031  relation = idhentry->reldesc;
3032 
3033  /*
3034  * Ignore new relations; no other backend will manipulate them before
3035  * we commit. Likewise, before replacing a relation's relfilelocator,
3036  * we shall have acquired AccessExclusiveLock and drained any
3037  * applicable pending invalidations.
3038  */
3039  if (relation->rd_createSubid != InvalidSubTransactionId ||
3041  continue;
3042 
3044 
3045  if (RelationHasReferenceCountZero(relation))
3046  {
3047  /* Delete this entry immediately */
3048  Assert(!relation->rd_isnailed);
3049  RelationClearRelation(relation, false);
3050  }
3051  else
3052  {
3053  /*
3054  * If it's a mapped relation, immediately update its rd_locator in
3055  * case its relfilenumber changed. We must do this during phase 1
3056  * in case the relation is consulted during rebuild of other
3057  * relcache entries in phase 2. It's safe since consulting the
3058  * map doesn't involve any access to relcache entries.
3059  */
3060  if (RelationIsMapped(relation))
3061  {
3062  RelationCloseSmgr(relation);
3063  RelationInitPhysicalAddr(relation);
3064  }
3065 
3066  /*
3067  * Add this entry to list of stuff to rebuild in second pass.
3068  * pg_class goes to the front of rebuildFirstList while
3069  * pg_class_oid_index goes to the back of rebuildFirstList, so
3070  * they are done first and second respectively. Other nailed
3071  * relations go to the front of rebuildList, so they'll be done
3072  * next in no particular order; and everything else goes to the
3073  * back of rebuildList.
3074  */
3075  if (RelationGetRelid(relation) == RelationRelationId)
3076  rebuildFirstList = lcons(relation, rebuildFirstList);
3077  else if (RelationGetRelid(relation) == ClassOidIndexId)
3078  rebuildFirstList = lappend(rebuildFirstList, relation);
3079  else if (relation->rd_isnailed)
3080  rebuildList = lcons(relation, rebuildList);
3081  else
3082  rebuildList = lappend(rebuildList, relation);
3083  }
3084  }
3085 
3086  /*
3087  * We cannot destroy the SMgrRelations as there might still be references
3088  * to them, but close the underlying file descriptors.
3089  */
3090  smgrreleaseall();
3091 
3092  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
3093  foreach(l, rebuildFirstList)
3094  {
3095  relation = (Relation) lfirst(l);
3096  RelationClearRelation(relation, true);
3097  }
3098  list_free(rebuildFirstList);
3099  foreach(l, rebuildList)
3100  {
3101  relation = (Relation) lfirst(l);
3102  RelationClearRelation(relation, true);
3103  }
3104  list_free(rebuildList);
3105 
3106  if (!debug_discard)
3107  /* Any RelationBuildDesc() on the stack must start over. */
3108  for (i = 0; i < in_progress_list_len; i++)
3109  in_progress_list[i].invalidated = true;
3110 }
3111 
3112 static void
3114 {
3115  if (EOXactTupleDescArray == NULL)
3116  {
3117  MemoryContext oldcxt;
3118 
3120 
3121  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
3124  MemoryContextSwitchTo(oldcxt);
3125  }
3127  {
3128  int32 newlen = EOXactTupleDescArrayLen * 2;
3129 
3131 
3133  newlen * sizeof(TupleDesc));
3134  EOXactTupleDescArrayLen = newlen;
3135  }
3136 
3138 }
3139 
3140 #ifdef USE_ASSERT_CHECKING
3141 static void
3142 AssertPendingSyncConsistency(Relation relation)
3143 {
3144  bool relcache_verdict =
3145  RelationIsPermanent(relation) &&
3146  ((relation->rd_createSubid != InvalidSubTransactionId &&
3147  RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) ||
3149 
3150  Assert(relcache_verdict == RelFileLocatorSkippingWAL(relation->rd_locator));
3151 
3152  if (relation->rd_droppedSubid != InvalidSubTransactionId)
3153  Assert(!relation->rd_isvalid &&
3154  (relation->rd_createSubid != InvalidSubTransactionId ||
3156 }
3157 
3158 /*
3159  * AssertPendingSyncs_RelationCache
3160  *
3161  * Assert that relcache.c and storage.c agree on whether to skip WAL.
3162  */
3163 void
3165 {
3166  HASH_SEQ_STATUS status;
3167  LOCALLOCK *locallock;
3168  Relation *rels;
3169  int maxrels;
3170  int nrels;
3171  RelIdCacheEnt *idhentry;
3172  int i;
3173 
3174  /*
3175  * Open every relation that this transaction has locked. If, for some
3176  * relation, storage.c is skipping WAL and relcache.c is not skipping WAL,
3177  * a CommandCounterIncrement() typically yields a local invalidation
3178  * message that destroys the relcache entry. By recreating such entries
3179  * here, we detect the problem.
3180  */
3182  maxrels = 1;
3183  rels = palloc(maxrels * sizeof(*rels));
3184  nrels = 0;
3185  hash_seq_init(&status, GetLockMethodLocalHash());
3186  while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
3187  {
3188  Oid relid;
3189  Relation r;
3190 
3191  if (locallock->nLocks <= 0)
3192  continue;
3193  if ((LockTagType) locallock->tag.lock.locktag_type !=
3195  continue;
3196  relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2);
3197  r = RelationIdGetRelation(relid);
3198  if (!RelationIsValid(r))
3199  continue;
3200  if (nrels >= maxrels)
3201  {
3202  maxrels *= 2;
3203  rels = repalloc(rels, maxrels * sizeof(*rels));
3204  }
3205  rels[nrels++] = r;
3206  }
3207 
3208  hash_seq_init(&status, RelationIdCache);
3209  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3210  AssertPendingSyncConsistency(idhentry->reldesc);
3211 
3212  for (i = 0; i < nrels; i++)
3213  RelationClose(rels[i]);
3215 }
3216 #endif
3217 
3218 /*
3219  * AtEOXact_RelationCache
3220  *
3221  * Clean up the relcache at main-transaction commit or abort.
3222  *
3223  * Note: this must be called *before* processing invalidation messages.
3224  * In the case of abort, we don't want to try to rebuild any invalidated
3225  * cache entries (since we can't safely do database accesses). Therefore
3226  * we must reset refcnts before handling pending invalidations.
3227  *
3228  * As of PostgreSQL 8.1, relcache refcnts should get released by the
3229  * ResourceOwner mechanism. This routine just does a debugging
3230  * cross-check that no pins remain. However, we also need to do special
3231  * cleanup when the current transaction created any relations or made use
3232  * of forced index lists.
3233  */
3234 void
3236 {
3237  HASH_SEQ_STATUS status;
3238  RelIdCacheEnt *idhentry;
3239  int i;
3240 
3241  /*
3242  * Forget in_progress_list. This is relevant when we're aborting due to
3243  * an error during RelationBuildDesc().
3244  */
3245  Assert(in_progress_list_len == 0 || !isCommit);
3247 
3248  /*
3249  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3250  * listed in it. Otherwise fall back on a hash_seq_search scan.
3251  *
3252  * For simplicity, eoxact_list[] entries are not deleted till end of
3253  * top-level transaction, even though we could remove them at
3254  * subtransaction end in some cases, or remove relations from the list if
3255  * they are cleared for other reasons. Therefore we should expect the
3256  * case that list entries are not found in the hashtable; if not, there's
3257  * nothing to do for them.
3258  */
3260  {
3261  hash_seq_init(&status, RelationIdCache);
3262  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3263  {
3264  AtEOXact_cleanup(idhentry->reldesc, isCommit);
3265  }
3266  }
3267  else
3268  {
3269  for (i = 0; i < eoxact_list_len; i++)
3270  {
3271  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3272  &eoxact_list[i],
3273  HASH_FIND,
3274  NULL);
3275  if (idhentry != NULL)
3276  AtEOXact_cleanup(idhentry->reldesc, isCommit);
3277  }
3278  }
3279 
3280  if (EOXactTupleDescArrayLen > 0)
3281  {
3282  Assert(EOXactTupleDescArray != NULL);
3283  for (i = 0; i < NextEOXactTupleDescNum; i++)
3286  EOXactTupleDescArray = NULL;
3287  }
3288 
3289  /* Now we're out of the transaction and can clear the lists */
3290  eoxact_list_len = 0;
3291  eoxact_list_overflowed = false;
3294 }
3295 
3296 /*
3297  * AtEOXact_cleanup
3298  *
3299  * Clean up a single rel at main-transaction commit or abort
3300  *
3301  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3302  * bother to prevent duplicate entries in eoxact_list[].
3303  */
3304 static void
3305 AtEOXact_cleanup(Relation relation, bool isCommit)
3306 {
3307  bool clear_relcache = false;
3308 
3309  /*
3310  * The relcache entry's ref count should be back to its normal
3311  * not-in-a-transaction state: 0 unless it's nailed in cache.
3312  *
3313  * In bootstrap mode, this is NOT true, so don't check it --- the
3314  * bootstrap code expects relations to stay open across start/commit
3315  * transaction calls. (That seems bogus, but it's not worth fixing.)
3316  *
3317  * Note: ideally this check would be applied to every relcache entry, not
3318  * just those that have eoxact work to do. But it's not worth forcing a
3319  * scan of the whole relcache just for this. (Moreover, doing so would
3320  * mean that assert-enabled testing never tests the hash_search code path
3321  * above, which seems a bad idea.)
3322  */
3323 #ifdef USE_ASSERT_CHECKING
3325  {
3326  int expected_refcnt;
3327 
3328  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3329  Assert(relation->rd_refcnt == expected_refcnt);
3330  }
3331 #endif
3332 
3333  /*
3334  * Is the relation live after this transaction ends?
3335  *
3336  * During commit, clear the relcache entry if it is preserved after
3337  * relation drop, in order not to orphan the entry. During rollback,
3338  * clear the relcache entry if the relation is created in the current
3339  * transaction since it isn't interesting any longer once we are out of
3340  * the transaction.
3341  */
3342  clear_relcache =
3343  (isCommit ?
3346 
3347  /*
3348  * Since we are now out of the transaction, reset the subids to zero. That
3349  * also lets RelationClearRelation() drop the relcache entry.
3350  */
3355 
3356  if (clear_relcache)
3357  {
3358  if (RelationHasReferenceCountZero(relation))
3359  {
3360  RelationClearRelation(relation, false);
3361  return;
3362  }
3363  else
3364  {
3365  /*
3366  * Hmm, somewhere there's a (leaked?) reference to the relation.
3367  * We daren't remove the entry for fear of dereferencing a
3368  * dangling pointer later. Bleat, and mark it as not belonging to
3369  * the current transaction. Hopefully it'll get cleaned up
3370  * eventually. This must be just a WARNING to avoid
3371  * error-during-error-recovery loops.
3372  */
3373  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3374  RelationGetRelationName(relation));
3375  }
3376  }
3377 }
3378 
3379 /*
3380  * AtEOSubXact_RelationCache
3381  *
3382  * Clean up the relcache at sub-transaction commit or abort.
3383  *
3384  * Note: this must be called *before* processing invalidation messages.
3385  */
3386 void
3388  SubTransactionId parentSubid)
3389 {
3390  HASH_SEQ_STATUS status;
3391  RelIdCacheEnt *idhentry;
3392  int i;
3393 
3394  /*
3395  * Forget in_progress_list. This is relevant when we're aborting due to
3396  * an error during RelationBuildDesc(). We don't commit subtransactions
3397  * during RelationBuildDesc().
3398  */
3399  Assert(in_progress_list_len == 0 || !isCommit);
3401 
3402  /*
3403  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3404  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3405  * logic as in AtEOXact_RelationCache.
3406  */
3408  {
3409  hash_seq_init(&status, RelationIdCache);
3410  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3411  {
3412  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3413  mySubid, parentSubid);
3414  }
3415  }
3416  else
3417  {
3418  for (i = 0; i < eoxact_list_len; i++)
3419  {
3420  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3421  &eoxact_list[i],
3422  HASH_FIND,
3423  NULL);
3424  if (idhentry != NULL)
3425  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3426  mySubid, parentSubid);
3427  }
3428  }
3429 
3430  /* Don't reset the list; we still need more cleanup later */
3431 }
3432 
3433 /*
3434  * AtEOSubXact_cleanup
3435  *
3436  * Clean up a single rel at subtransaction commit or abort
3437  *
3438  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3439  * bother to prevent duplicate entries in eoxact_list[].
3440  */
3441 static void
3442 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3443  SubTransactionId mySubid, SubTransactionId parentSubid)
3444 {
3445  /*
3446  * Is it a relation created in the current subtransaction?
3447  *
3448  * During subcommit, mark it as belonging to the parent, instead, as long
3449  * as it has not been dropped. Otherwise simply delete the relcache entry.
3450  * --- it isn't interesting any longer.
3451  */
3452  if (relation->rd_createSubid == mySubid)
3453  {
3454  /*
3455  * Valid rd_droppedSubid means the corresponding relation is dropped
3456  * but the relcache entry is preserved for at-commit pending sync. We
3457  * need to drop it explicitly here not to make the entry orphan.
3458  */
3459  Assert(relation->rd_droppedSubid == mySubid ||
3461  if (isCommit && relation->rd_droppedSubid == InvalidSubTransactionId)
3462  relation->rd_createSubid = parentSubid;
3463  else if (RelationHasReferenceCountZero(relation))
3464  {
3465  /* allow the entry to be removed */
3470  RelationClearRelation(relation, false);
3471  return;
3472  }
3473  else
3474  {
3475  /*
3476  * Hmm, somewhere there's a (leaked?) reference to the relation.
3477  * We daren't remove the entry for fear of dereferencing a
3478  * dangling pointer later. Bleat, and transfer it to the parent
3479  * subtransaction so we can try again later. This must be just a
3480  * WARNING to avoid error-during-error-recovery loops.
3481  */
3482  relation->rd_createSubid = parentSubid;
3483  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3484  RelationGetRelationName(relation));
3485  }
3486  }
3487 
3488  /*
3489  * Likewise, update or drop any new-relfilenumber-in-subtransaction record
3490  * or drop record.
3491  */
3492  if (relation->rd_newRelfilelocatorSubid == mySubid)
3493  {
3494  if (isCommit)
3495  relation->rd_newRelfilelocatorSubid = parentSubid;
3496  else
3498  }
3499 
3500  if (relation->rd_firstRelfilelocatorSubid == mySubid)
3501  {
3502  if (isCommit)
3503  relation->rd_firstRelfilelocatorSubid = parentSubid;
3504  else
3506  }
3507 
3508  if (relation->rd_droppedSubid == mySubid)
3509  {
3510  if (isCommit)
3511  relation->rd_droppedSubid = parentSubid;
3512  else
3514  }
3515 }
3516 
3517 
3518 /*
3519  * RelationBuildLocalRelation
3520  * Build a relcache entry for an about-to-be-created relation,
3521  * and enter it into the relcache.
3522  */
3523 Relation
3525  Oid relnamespace,
3526  TupleDesc tupDesc,
3527  Oid relid,
3528  Oid accessmtd,
3529  RelFileNumber relfilenumber,
3530  Oid reltablespace,
3531  bool shared_relation,
3532  bool mapped_relation,
3533  char relpersistence,
3534  char relkind)
3535 {
3536  Relation rel;
3537  MemoryContext oldcxt;
3538  int natts = tupDesc->natts;
3539  int i;
3540  bool has_not_null;
3541  bool nailit;
3542 
3543  Assert(natts >= 0);
3544 
3545  /*
3546  * check for creation of a rel that must be nailed in cache.
3547  *
3548  * XXX this list had better match the relations specially handled in
3549  * RelationCacheInitializePhase2/3.
3550  */
3551  switch (relid)
3552  {
3553  case DatabaseRelationId:
3554  case AuthIdRelationId:
3555  case AuthMemRelationId:
3556  case RelationRelationId:
3557  case AttributeRelationId:
3558  case ProcedureRelationId:
3559  case TypeRelationId:
3560  nailit = true;
3561  break;
3562  default:
3563  nailit = false;
3564  break;
3565  }
3566 
3567  /*
3568  * check that hardwired list of shared rels matches what's in the
3569  * bootstrap .bki file. If you get a failure here during initdb, you
3570  * probably need to fix IsSharedRelation() to match whatever you've done
3571  * to the set of shared relations.
3572  */
3573  if (shared_relation != IsSharedRelation(relid))
3574  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3575  relname, relid);
3576 
3577  /* Shared relations had better be mapped, too */
3578  Assert(mapped_relation || !shared_relation);
3579 
3580  /*
3581  * switch to the cache context to create the relcache entry.
3582  */
3583  if (!CacheMemoryContext)
3585 
3587 
3588  /*
3589  * allocate a new relation descriptor and fill in basic state fields.
3590  */
3591  rel = (Relation) palloc0(sizeof(RelationData));
3592 
3593  /* make sure relation is marked as having no open file yet */
3594  rel->rd_smgr = NULL;
3595 
3596  /* mark it nailed if appropriate */
3597  rel->rd_isnailed = nailit;
3598 
3599  rel->rd_refcnt = nailit ? 1 : 0;
3600 
3601  /* it's being created in this transaction */
3606 
3607  /*
3608  * create a new tuple descriptor from the one passed in. We do this
3609  * partly to copy it into the cache context, and partly because the new
3610  * relation can't have any defaults or constraints yet; they have to be
3611  * added in later steps, because they require additions to multiple system
3612  * catalogs. We can copy attnotnull constraints here, however.
3613  */
3614  rel->rd_att = CreateTupleDescCopy(tupDesc);
3615  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3616  has_not_null = false;
3617  for (i = 0; i < natts; i++)
3618  {
3619  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3620  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3621 
3622  datt->attidentity = satt->attidentity;
3623  datt->attgenerated = satt->attgenerated;
3624  datt->attnotnull = satt->attnotnull;
3625  has_not_null |= satt->attnotnull;
3626  }
3627 
3628  if (has_not_null)
3629  {
3630  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3631 
3632  constr->has_not_null = true;
3633  rel->rd_att->constr = constr;
3634  }
3635 
3636  /*
3637  * initialize relation tuple form (caller may add/override data later)
3638  */
3640 
3641  namestrcpy(&rel->rd_rel->relname, relname);
3642  rel->rd_rel->relnamespace = relnamespace;
3643 
3644  rel->rd_rel->relkind = relkind;
3645  rel->rd_rel->relnatts = natts;
3646  rel->rd_rel->reltype = InvalidOid;
3647  /* needed when bootstrapping: */
3648  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3649 
3650  /* set up persistence and relcache fields dependent on it */
3651  rel->rd_rel->relpersistence = relpersistence;
3652  switch (relpersistence)
3653  {
3654  case RELPERSISTENCE_UNLOGGED:
3655  case RELPERSISTENCE_PERMANENT:
3657  rel->rd_islocaltemp = false;
3658  break;
3659  case RELPERSISTENCE_TEMP:
3660  Assert(isTempOrTempToastNamespace(relnamespace));
3662  rel->rd_islocaltemp = true;
3663  break;
3664  default:
3665  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3666  break;
3667  }
3668 
3669  /* if it's a materialized view, it's not populated initially */
3670  if (relkind == RELKIND_MATVIEW)
3671  rel->rd_rel->relispopulated = false;
3672  else
3673  rel->rd_rel->relispopulated = true;
3674 
3675  /* set replica identity -- system catalogs and non-tables don't have one */
3676  if (!IsCatalogNamespace(relnamespace) &&
3677  (relkind == RELKIND_RELATION ||
3678  relkind == RELKIND_MATVIEW ||
3679  relkind == RELKIND_PARTITIONED_TABLE))
3680  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3681  else
3682  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3683 
3684  /*
3685  * Insert relation physical and logical identifiers (OIDs) into the right
3686  * places. For a mapped relation, we set relfilenumber to zero and rely
3687  * on RelationInitPhysicalAddr to consult the map.
3688  */
3689  rel->rd_rel->relisshared = shared_relation;
3690 
3691  RelationGetRelid(rel) = relid;
3692 
3693  for (i = 0; i < natts; i++)
3694  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3695 
3696  rel->rd_rel->reltablespace = reltablespace;
3697 
3698  if (mapped_relation)
3699  {
3700  rel->rd_rel->relfilenode = InvalidRelFileNumber;
3701  /* Add it to the active mapping information */
3702  RelationMapUpdateMap(relid, relfilenumber, shared_relation, true);
3703  }
3704  else
3705  rel->rd_rel->relfilenode = relfilenumber;
3706 
3707  RelationInitLockInfo(rel); /* see lmgr.c */
3708 
3710 
3711  rel->rd_rel->relam = accessmtd;
3712 
3713  /*
3714  * RelationInitTableAccessMethod will do syscache lookups, so we mustn't
3715  * run it in CacheMemoryContext. Fortunately, the remaining steps don't
3716  * require a long-lived current context.
3717  */
3718  MemoryContextSwitchTo(oldcxt);
3719 
3720  if (RELKIND_HAS_TABLE_AM(relkind) || relkind == RELKIND_SEQUENCE)
3722 
3723  /*
3724  * Okay to insert into the relcache hash table.
3725  *
3726  * Ordinarily, there should certainly not be an existing hash entry for
3727  * the same OID; but during bootstrap, when we create a "real" relcache
3728  * entry for one of the bootstrap relations, we'll be overwriting the
3729  * phony one created with formrdesc. So allow that to happen for nailed
3730  * rels.
3731  */
3732  RelationCacheInsert(rel, nailit);
3733 
3734  /*
3735  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3736  * can't do this before storing relid in it.
3737  */
3738  EOXactListAdd(rel);
3739 
3740  /* It's fully valid */
3741  rel->rd_isvalid = true;
3742 
3743  /*
3744  * Caller expects us to pin the returned entry.
3745  */
3747 
3748  return rel;
3749 }
3750 
3751 
3752 /*
3753  * RelationSetNewRelfilenumber
3754  *
3755  * Assign a new relfilenumber (physical file name), and possibly a new
3756  * persistence setting, to the relation.
3757  *
3758  * This allows a full rewrite of the relation to be done with transactional
3759  * safety (since the filenumber assignment can be rolled back). Note however
3760  * that there is no simple way to access the relation's old data for the
3761  * remainder of the current transaction. This limits the usefulness to cases
3762  * such as TRUNCATE or rebuilding an index from scratch.
3763  *
3764  * Caller must already hold exclusive lock on the relation.
3765  */
3766 void
3767 RelationSetNewRelfilenumber(Relation relation, char persistence)
3768 {
3769  RelFileNumber newrelfilenumber;
3770  Relation pg_class;
3771  ItemPointerData otid;
3772  HeapTuple tuple;
3773  Form_pg_class classform;
3774  MultiXactId minmulti = InvalidMultiXactId;
3775  TransactionId freezeXid = InvalidTransactionId;
3776  RelFileLocator newrlocator;
3777 
3778  if (!IsBinaryUpgrade)
3779  {
3780  /* Allocate a new relfilenumber */
3781  newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace,
3782  NULL, persistence);
3783  }
3784  else if (relation->rd_rel->relkind == RELKIND_INDEX)
3785  {
3787  ereport(ERROR,
3788  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3789  errmsg("index relfilenumber value not set when in binary upgrade mode")));
3790 
3793  }
3794  else if (relation->rd_rel->relkind == RELKIND_RELATION)
3795  {
3797  ereport(ERROR,
3798  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3799  errmsg("heap relfilenumber value not set when in binary upgrade mode")));
3800 
3803  }
3804  else
3805  ereport(ERROR,
3806  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3807  errmsg("unexpected request for new relfilenumber in binary upgrade mode")));
3808 
3809  /*
3810  * Get a writable copy of the pg_class tuple for the given relation.
3811  */
3812  pg_class = table_open(RelationRelationId, RowExclusiveLock);
3813 
3814  tuple = SearchSysCacheLockedCopy1(RELOID,
3815  ObjectIdGetDatum(RelationGetRelid(relation)));
3816  if (!HeapTupleIsValid(tuple))
3817  elog(ERROR, "could not find tuple for relation %u",
3818  RelationGetRelid(relation));
3819  otid = tuple->t_self;
3820  classform = (Form_pg_class) GETSTRUCT(tuple);
3821 
3822  /*
3823  * Schedule unlinking of the old storage at transaction commit, except
3824  * when performing a binary upgrade, when we must do it immediately.
3825  */
3826  if (IsBinaryUpgrade)
3827  {
3828  SMgrRelation srel;
3829 
3830  /*
3831  * During a binary upgrade, we use this code path to ensure that
3832  * pg_largeobject and its index have the same relfilenumbers as in the
3833  * old cluster. This is necessary because pg_upgrade treats
3834  * pg_largeobject like a user table, not a system table. It is however
3835  * possible that a table or index may need to end up with the same
3836  * relfilenumber in the new cluster as what it had in the old cluster.
3837  * Hence, we can't wait until commit time to remove the old storage.
3838  *
3839  * In general, this function needs to have transactional semantics,
3840  * and removing the old storage before commit time surely isn't.
3841  * However, it doesn't really matter, because if a binary upgrade
3842  * fails at this stage, the new cluster will need to be recreated
3843  * anyway.
3844  */
3845  srel = smgropen(relation->rd_locator, relation->rd_backend);
3846  smgrdounlinkall(&srel, 1, false);
3847  smgrclose(srel);
3848  }
3849  else
3850  {
3851  /* Not a binary upgrade, so just schedule it to happen later. */
3852  RelationDropStorage(relation);
3853  }
3854 
3855  /*
3856  * Create storage for the main fork of the new relfilenumber. If it's a
3857  * table-like object, call into the table AM to do so, which'll also
3858  * create the table's init fork if needed.
3859  *
3860  * NOTE: If relevant for the AM, any conflict in relfilenumber value will
3861  * be caught here, if GetNewRelFileNumber messes up for any reason.
3862  */
3863  newrlocator = relation->rd_locator;
3864  newrlocator.relNumber = newrelfilenumber;
3865 
3866  if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind))
3867  {
3868  table_relation_set_new_filelocator(relation, &newrlocator,
3869  persistence,
3870  &freezeXid, &minmulti);
3871  }
3872  else if (RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
3873  {
3874  /* handle these directly, at least for now */
3875  SMgrRelation srel;
3876 
3877  srel = RelationCreateStorage(newrlocator, persistence, true);
3878  smgrclose(srel);
3879  }
3880  else
3881  {
3882  /* we shouldn't be called for anything else */
3883  elog(ERROR, "relation \"%s\" does not have storage",
3884  RelationGetRelationName(relation));
3885  }
3886 
3887  /*
3888  * If we're dealing with a mapped index, pg_class.relfilenode doesn't
3889  * change; instead we have to send the update to the relation mapper.
3890  *
3891  * For mapped indexes, we don't actually change the pg_class entry at all;
3892  * this is essential when reindexing pg_class itself. That leaves us with
3893  * possibly-inaccurate values of relpages etc, but those will be fixed up
3894  * later.
3895  */
3896  if (RelationIsMapped(relation))
3897  {
3898  /* This case is only supported for indexes */
3899  Assert(relation->rd_rel->relkind == RELKIND_INDEX);
3900 
3901  /* Since we're not updating pg_class, these had better not change */
3902  Assert(classform->relfrozenxid == freezeXid);
3903  Assert(classform->relminmxid == minmulti);
3904  Assert(classform->relpersistence == persistence);
3905 
3906  /*
3907  * In some code paths it's possible that the tuple update we'd
3908  * otherwise do here is the only thing that would assign an XID for
3909  * the current transaction. However, we must have an XID to delete
3910  * files, so make sure one is assigned.
3911  */
3912  (void) GetCurrentTransactionId();
3913 
3914  /* Do the deed */
3916  newrelfilenumber,
3917  relation->rd_rel->relisshared,
3918  false);
3919 
3920  /* Since we're not updating pg_class, must trigger inval manually */
3921  CacheInvalidateRelcache(relation);
3922  }
3923  else
3924  {
3925  /* Normal case, update the pg_class entry */
3926  classform->relfilenode = newrelfilenumber;
3927 
3928  /* relpages etc. never change for sequences */
3929  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3930  {
3931  classform->relpages = 0; /* it's empty until further notice */
3932  classform->reltuples = -1;
3933  classform->relallvisible = 0;
3934  }
3935  classform->relfrozenxid = freezeXid;
3936  classform->relminmxid = minmulti;
3937  classform->relpersistence = persistence;
3938 
3939  CatalogTupleUpdate(pg_class, &otid, tuple);
3940  }
3941 
3942  UnlockTuple(pg_class, &otid, InplaceUpdateTupleLock);
3943  heap_freetuple(tuple);
3944 
3945  table_close(pg_class, RowExclusiveLock);
3946 
3947  /*
3948  * Make the pg_class row change or relation map change visible. This will
3949  * cause the relcache entry to get updated, too.
3950  */
3952 
3954 }
3955 
3956 /*
3957  * RelationAssumeNewRelfilelocator
3958  *
3959  * Code that modifies pg_class.reltablespace or pg_class.relfilenode must call
3960  * this. The call shall precede any code that might insert WAL records whose
3961  * replay would modify bytes in the new RelFileLocator, and the call shall follow
3962  * any WAL modifying bytes in the prior RelFileLocator. See struct RelationData.
3963  * Ideally, call this as near as possible to the CommandCounterIncrement()
3964  * that makes the pg_class change visible (before it or after it); that
3965  * minimizes the chance of future development adding a forbidden WAL insertion
3966  * between RelationAssumeNewRelfilelocator() and CommandCounterIncrement().
3967  */
3968 void
3970 {
3974 
3975  /* Flag relation as needing eoxact cleanup (to clear these fields) */
3976  EOXactListAdd(relation);
3977 }
3978 
3979 
3980 /*
3981  * RelationCacheInitialize
3982  *
3983  * This initializes the relation descriptor cache. At the time
3984  * that this is invoked, we can't do database access yet (mainly
3985  * because the transaction subsystem is not up); all we are doing
3986  * is making an empty cache hashtable. This must be done before
3987  * starting the initialization transaction, because otherwise
3988  * AtEOXact_RelationCache would crash if that transaction aborts
3989  * before we can get the relcache set up.
3990  */
3991 
3992 #define INITRELCACHESIZE 400
3993 
3994 void
3996 {
3997  HASHCTL ctl;
3998  int allocsize;
3999 
4000  /*
4001  * make sure cache memory context exists
4002  */
4003  if (!CacheMemoryContext)
4005 
4006  /*
4007  * create hashtable that indexes the relcache
4008  */
4009  ctl.keysize = sizeof(Oid);
4010  ctl.entrysize = sizeof(RelIdCacheEnt);
4011  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
4012  &ctl, HASH_ELEM | HASH_BLOBS);
4013 
4014  /*
4015  * reserve enough in_progress_list slots for many cases
4016  */
4017  allocsize = 4;
4020  allocsize * sizeof(*in_progress_list));
4021  in_progress_list_maxlen = allocsize;
4022 
4023  /*
4024  * relation mapper needs to be initialized too
4025  */
4027 }
4028 
4029 /*
4030  * RelationCacheInitializePhase2
4031  *
4032  * This is called to prepare for access to shared catalogs during startup.
4033  * We must at least set up nailed reldescs for pg_database, pg_authid,
4034  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
4035  * for their indexes, too. We attempt to load this information from the
4036  * shared relcache init file. If that's missing or broken, just make
4037  * phony entries for the catalogs themselves.
4038  * RelationCacheInitializePhase3 will clean up as needed.
4039  */
4040 void
4042 {
4043  MemoryContext oldcxt;
4044 
4045  /*
4046  * relation mapper needs initialized too
4047  */
4049 
4050  /*
4051  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
4052  * nothing.
4053  */
4055  return;
4056 
4057  /*
4058  * switch to cache memory context
4059  */
4061 
4062  /*
4063  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
4064  * the cache with pre-made descriptors for the critical shared catalogs.
4065  */
4066  if (!load_relcache_init_file(true))
4067  {
4068  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
4069  Natts_pg_database, Desc_pg_database);
4070  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
4071  Natts_pg_authid, Desc_pg_authid);
4072  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
4073  Natts_pg_auth_members, Desc_pg_auth_members);
4074  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
4075  Natts_pg_shseclabel, Desc_pg_shseclabel);
4076  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
4077  Natts_pg_subscription, Desc_pg_subscription);
4078 
4079 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
4080  }
4081 
4082  MemoryContextSwitchTo(oldcxt);
4083 }
4084 
4085 /*
4086  * RelationCacheInitializePhase3
4087  *
4088  * This is called as soon as the catcache and transaction system
4089  * are functional and we have determined MyDatabaseId. At this point
4090  * we can actually read data from the database's system catalogs.
4091  * We first try to read pre-computed relcache entries from the local
4092  * relcache init file. If that's missing or broken, make phony entries
4093  * for the minimum set of nailed-in-cache relations. Then (unless
4094  * bootstrapping) make sure we have entries for the critical system
4095  * indexes. Once we've done all this, we have enough infrastructure to
4096  * open any system catalog or use any catcache. The last step is to
4097  * rewrite the cache files if needed.
4098  */
4099 void
4101 {
4102  HASH_SEQ_STATUS status;
4103  RelIdCacheEnt *idhentry;
4104  MemoryContext oldcxt;
4105  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
4106 
4107  /*
4108  * relation mapper needs initialized too
4109  */
4111 
4112  /*
4113  * switch to cache memory context
4114  */
4116 
4117  /*
4118  * Try to load the local relcache cache file. If unsuccessful, bootstrap
4119  * the cache with pre-made descriptors for the critical "nailed-in" system
4120  * catalogs.
4121  */
4122  if (IsBootstrapProcessingMode() ||
4123  !load_relcache_init_file(false))
4124  {
4125  needNewCacheFile = true;
4126 
4127  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
4128  Natts_pg_class, Desc_pg_class);
4129  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
4130  Natts_pg_attribute, Desc_pg_attribute);
4131  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
4132  Natts_pg_proc, Desc_pg_proc);
4133  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
4134  Natts_pg_type, Desc_pg_type);
4135 
4136 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
4137  }
4138 
4139  MemoryContextSwitchTo(oldcxt);
4140 
4141  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
4143  return;
4144 
4145  /*
4146  * If we didn't get the critical system indexes loaded into relcache, do
4147  * so now. These are critical because the catcache and/or opclass cache
4148  * depend on them for fetches done during relcache load. Thus, we have an
4149  * infinite-recursion problem. We can break the recursion by doing
4150  * heapscans instead of indexscans at certain key spots. To avoid hobbling
4151  * performance, we only want to do that until we have the critical indexes
4152  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
4153  * decide whether to do heapscan or indexscan at the key spots, and we set
4154  * it true after we've loaded the critical indexes.
4155  *
4156  * The critical indexes are marked as "nailed in cache", partly to make it
4157  * easy for load_relcache_init_file to count them, but mainly because we
4158  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
4159  * true. (NOTE: perhaps it would be possible to reload them by
4160  * temporarily setting criticalRelcachesBuilt to false again. For now,
4161  * though, we just nail 'em in.)
4162  *
4163  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
4164  * in the same way as the others, because the critical catalogs don't
4165  * (currently) have any rules or triggers, and so these indexes can be
4166  * rebuilt without inducing recursion. However they are used during
4167  * relcache load when a rel does have rules or triggers, so we choose to
4168  * nail them for performance reasons.
4169  */
4171  {
4172  load_critical_index(ClassOidIndexId,
4173  RelationRelationId);
4174  load_critical_index(AttributeRelidNumIndexId,
4175  AttributeRelationId);
4176  load_critical_index(IndexRelidIndexId,
4177  IndexRelationId);
4178  load_critical_index(OpclassOidIndexId,
4179  OperatorClassRelationId);
4180  load_critical_index(AccessMethodProcedureIndexId,
4181  AccessMethodProcedureRelationId);
4182  load_critical_index(RewriteRelRulenameIndexId,
4183  RewriteRelationId);
4184  load_critical_index(TriggerRelidNameIndexId,
4185  TriggerRelationId);
4186 
4187 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
4188 
4189  criticalRelcachesBuilt = true;
4190  }
4191 
4192  /*
4193  * Process critical shared indexes too.
4194  *
4195  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
4196  * initial lookup of MyDatabaseId, without which we'll never find any
4197  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
4198  * database OID, so it instead depends on DatabaseOidIndexId. We also
4199  * need to nail up some indexes on pg_authid and pg_auth_members for use
4200  * during client authentication. SharedSecLabelObjectIndexId isn't
4201  * critical for the core system, but authentication hooks might be
4202  * interested in it.
4203  */
4205  {
4206  load_critical_index(DatabaseNameIndexId,
4207  DatabaseRelationId);
4208  load_critical_index(DatabaseOidIndexId,
4209  DatabaseRelationId);
4210  load_critical_index(AuthIdRolnameIndexId,
4211  AuthIdRelationId);
4212  load_critical_index(AuthIdOidIndexId,
4213  AuthIdRelationId);
4214  load_critical_index(AuthMemMemRoleIndexId,
4215  AuthMemRelationId);
4216  load_critical_index(SharedSecLabelObjectIndexId,
4217  SharedSecLabelRelationId);
4218 
4219 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
4220 
4222  }
4223 
4224  /*
4225  * Now, scan all the relcache entries and update anything that might be
4226  * wrong in the results from formrdesc or the relcache cache file. If we
4227  * faked up relcache entries using formrdesc, then read the real pg_class
4228  * rows and replace the fake entries with them. Also, if any of the
4229  * relcache entries have rules, triggers, or security policies, load that
4230  * info the hard way since it isn't recorded in the cache file.
4231  *
4232  * Whenever we access the catalogs to read data, there is a possibility of
4233  * a shared-inval cache flush causing relcache entries to be removed.
4234  * Since hash_seq_search only guarantees to still work after the *current*
4235  * entry is removed, it's unsafe to continue the hashtable scan afterward.
4236  * We handle this by restarting the scan from scratch after each access.
4237  * This is theoretically O(N^2), but the number of entries that actually
4238  * need to be fixed is small enough that it doesn't matter.
4239  */
4240  hash_seq_init(&status, RelationIdCache);
4241 
4242  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
4243  {
4244  Relation relation = idhentry->reldesc;
4245  bool restart = false;
4246 
4247  /*
4248  * Make sure *this* entry doesn't get flushed while we work with it.
4249  */
4251 
4252  /*
4253  * If it's a faked-up entry, read the real pg_class tuple.
4254  */
4255  if (relation->rd_rel->relowner == InvalidOid)
4256  {
4257  HeapTuple htup;
4258  Form_pg_class relp;
4259 
4260  htup = SearchSysCache1(RELOID,
4261  ObjectIdGetDatum(RelationGetRelid(relation)));
4262  if (!HeapTupleIsValid(htup))
4263  ereport(FATAL,
4264  errcode(ERRCODE_UNDEFINED_OBJECT),
4265  errmsg_internal("cache lookup failed for relation %u",
4266  RelationGetRelid(relation)));
4267  relp = (Form_pg_class) GETSTRUCT(htup);
4268 
4269  /*
4270  * Copy tuple to relation->rd_rel. (See notes in
4271  * AllocateRelationDesc())
4272  */
4273  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
4274 
4275  /* Update rd_options while we have the tuple */
4276  if (relation->rd_options)
4277  pfree(relation->rd_options);
4278  RelationParseRelOptions(relation, htup);
4279 
4280  /*
4281  * Check the values in rd_att were set up correctly. (We cannot
4282  * just copy them over now: formrdesc must have set up the rd_att
4283  * data correctly to start with, because it may already have been
4284  * copied into one or more catcache entries.)
4285  */
4286  Assert(relation->rd_att->tdtypeid == relp->reltype);
4287  Assert(relation->rd_att->tdtypmod == -1);
4288 
4289  ReleaseSysCache(htup);
4290 
4291  /* relowner had better be OK now, else we'll loop forever */
4292  if (relation->rd_rel->relowner == InvalidOid)
4293  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
4294  RelationGetRelationName(relation));
4295 
4296  restart = true;
4297  }
4298 
4299  /*
4300  * Fix data that isn't saved in relcache cache file.
4301  *
4302  * relhasrules or relhastriggers could possibly be wrong or out of
4303  * date. If we don't actually find any rules or triggers, clear the
4304  * local copy of the flag so that we don't get into an infinite loop
4305  * here. We don't make any attempt to fix the pg_class entry, though.
4306  */
4307  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
4308  {
4309  RelationBuildRuleLock(relation);
4310  if (relation->rd_rules == NULL)
4311  relation->rd_rel->relhasrules = false;
4312  restart = true;
4313  }
4314  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
4315  {
4316  RelationBuildTriggers(relation);
4317  if (relation->trigdesc == NULL)
4318  relation->rd_rel->relhastriggers = false;
4319  restart = true;
4320  }
4321 
4322  /*
4323  * Re-load the row security policies if the relation has them, since
4324  * they are not preserved in the cache. Note that we can never NOT
4325  * have a policy while relrowsecurity is true,
4326  * RelationBuildRowSecurity will create a single default-deny policy
4327  * if there is no policy defined in pg_policy.
4328  */
4329  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
4330  {
4331  RelationBuildRowSecurity(relation);
4332 
4333  Assert(relation->rd_rsdesc != NULL);
4334  restart = true;
4335  }
4336 
4337  /* Reload tableam data if needed */
4338  if (relation->rd_tableam == NULL &&
4339  (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) || relation->rd_rel->relkind == RELKIND_SEQUENCE))
4340  {
4342  Assert(relation->rd_tableam != NULL);
4343 
4344  restart = true;
4345  }
4346 
4347  /* Release hold on the relation */
4349 
4350  /* Now, restart the hashtable scan if needed */
4351  if (restart)
4352  {
4353  hash_seq_term(&status);
4354  hash_seq_init(&status, RelationIdCache);
4355  }
4356  }
4357 
4358  /*
4359  * Lastly, write out new relcache cache files if needed. We don't bother
4360  * to distinguish cases where only one of the two needs an update.
4361  */
4362  if (needNewCacheFile)
4363  {
4364  /*
4365  * Force all the catcaches to finish initializing and thereby open the
4366  * catalogs and indexes they use. This will preload the relcache with
4367  * entries for all the most important system catalogs and indexes, so
4368  * that the init files will be most useful for future backends.
4369  */
4371 
4372  /* now write the files */
4374  write_relcache_init_file(false);
4375  }
4376 }
4377 
4378 /*
4379  * Load one critical system index into the relcache
4380  *
4381  * indexoid is the OID of the target index, heapoid is the OID of the catalog
4382  * it belongs to.
4383  */
4384 static void
4385 load_critical_index(Oid indexoid, Oid heapoid)
4386 {
4387  Relation ird;
4388 
4389  /*
4390  * We must lock the underlying catalog before locking the index to avoid
4391  * deadlock, since RelationBuildDesc might well need to read the catalog,
4392  * and if anyone else is exclusive-locking this catalog and index they'll
4393  * be doing it in that order.
4394  */
4395  LockRelationOid(heapoid, AccessShareLock);
4396  LockRelationOid(indexoid, AccessShareLock);
4397  ird = RelationBuildDesc(indexoid, true);
4398  if (ird == NULL)
4399  ereport(PANIC,
4401  errmsg_internal("could not open critical system index %u", indexoid));
4402  ird->rd_isnailed = true;
4403  ird->rd_refcnt = 1;
4406 
4407  (void) RelationGetIndexAttOptions(ird, false);
4408 }
4409 
4410 /*
4411  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
4412  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
4413  *
4414  * We need this kluge because we have to be able to access non-fixed-width
4415  * fields of pg_class and pg_index before we have the standard catalog caches
4416  * available. We use predefined data that's set up in just the same way as
4417  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
4418  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
4419  * does it have a TupleConstr field. But it's good enough for the purpose of
4420  * extracting fields.
4421  */
4422 static TupleDesc
4424 {
4425  TupleDesc result;
4426  MemoryContext oldcxt;
4427  int i;
4428 
4430 
4431  result = CreateTemplateTupleDesc(natts);
4432  result->tdtypeid = RECORDOID; /* not right, but we don't care */
4433  result->tdtypmod = -1;
4434 
4435  for (i = 0; i < natts; i++)
4436  {
4437  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
4438  /* make sure attcacheoff is valid */
4439  TupleDescAttr(result, i)->attcacheoff = -1;
4440  }
4441 
4442  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
4443  TupleDescAttr(result, 0)->attcacheoff = 0;
4444 
4445  /* Note: we don't bother to set up a TupleConstr entry */
4446 
4447  MemoryContextSwitchTo(oldcxt);
4448 
4449  return result;
4450 }
4451 
4452 static TupleDesc
4454 {
4455  static TupleDesc pgclassdesc = NULL;
4456 
4457  /* Already done? */
4458  if (pgclassdesc == NULL)
4459  pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
4460  Desc_pg_class);
4461 
4462  return pgclassdesc;
4463 }
4464 
4465 static TupleDesc
4467 {
4468  static TupleDesc pgindexdesc = NULL;
4469 
4470  /* Already done? */
4471  if (pgindexdesc == NULL)
4472  pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4473  Desc_pg_index);
4474 
4475  return pgindexdesc;
4476 }
4477 
4478 /*
4479  * Load any default attribute value definitions for the relation.
4480  *
4481  * ndef is the number of attributes that were marked atthasdef.
4482  *
4483  * Note: we don't make it a hard error to be missing some pg_attrdef records.
4484  * We can limp along as long as nothing needs to use the default value. Code
4485  * that fails to find an expected AttrDefault record should throw an error.
4486  */
4487 static void
4488 AttrDefaultFetch(Relation relation, int ndef)
4489 {
4490  AttrDefault *attrdef;
4491  Relation adrel;
4492  SysScanDesc adscan;
4493  ScanKeyData skey;
4494  HeapTuple htup;
4495  int found = 0;
4496 
4497  /* Allocate array with room for as many entries as expected */
4498  attrdef = (AttrDefault *)
4500  ndef * sizeof(AttrDefault));
4501 
4502  /* Search pg_attrdef for relevant entries */
4503  ScanKeyInit(&skey,
4504  Anum_pg_attrdef_adrelid,
4505  BTEqualStrategyNumber, F_OIDEQ,
4506  ObjectIdGetDatum(RelationGetRelid(relation)));
4507 
4508  adrel = table_open(AttrDefaultRelationId, AccessShareLock);
4509  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4510  NULL, 1, &skey);
4511 
4512  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4513  {
4514  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4515  Datum val;
4516  bool isnull;
4517 
4518  /* protect limited size of array */
4519  if (found >= ndef)
4520  {
4521  elog(WARNING, "unexpected pg_attrdef record found for attribute %d of relation \"%s\"",
4522  adform->adnum, RelationGetRelationName(relation));
4523  break;
4524  }
4525 
4526  val = fastgetattr(htup,
4527  Anum_pg_attrdef_adbin,
4528  adrel->rd_att, &isnull);
4529  if (isnull)
4530  elog(WARNING, "null adbin for attribute %d of relation \"%s\"",
4531  adform->adnum, RelationGetRelationName(relation));
4532  else
4533  {
4534  /* detoast and convert to cstring in caller's context */
4535  char *s = TextDatumGetCString(val);
4536 
4537  attrdef[found].adnum = adform->adnum;
4538  attrdef[found].adbin = MemoryContextStrdup(CacheMemoryContext, s);
4539  pfree(s);
4540  found++;
4541  }
4542  }
4543 
4544  systable_endscan(adscan);
4545  table_close(adrel, AccessShareLock);
4546 
4547  if (found != ndef)
4548  elog(WARNING, "%d pg_attrdef record(s) missing for relation \"%s\"",
4549  ndef - found, RelationGetRelationName(relation));
4550 
4551  /*
4552  * Sort the AttrDefault entries by adnum, for the convenience of
4553  * equalTupleDescs(). (Usually, they already will be in order, but this
4554  * might not be so if systable_getnext isn't using an index.)
4555  */
4556  if (found > 1)
4557  qsort(attrdef, found, sizeof(AttrDefault), AttrDefaultCmp);
4558 
4559  /* Install array only after it's fully valid */
4560  relation->rd_att->constr->defval = attrdef;
4561  relation->rd_att->constr->num_defval = found;
4562 }
4563 
4564 /*
4565  * qsort comparator to sort AttrDefault entries by adnum
4566  */
4567 static int
4568 AttrDefaultCmp(const void *a, const void *b)
4569 {
4570  const AttrDefault *ada = (const AttrDefault *) a;
4571  const AttrDefault *adb = (const AttrDefault *) b;
4572 
4573  return pg_cmp_s16(ada->adnum, adb->adnum);
4574 }
4575 
4576 /*
4577  * Load any check constraints for the relation.
4578  *
4579  * As with defaults, if we don't find the expected number of them, just warn
4580  * here. The executor should throw an error if an INSERT/UPDATE is attempted.
4581  */
4582 static void
4584 {
4585  ConstrCheck *check;
4586  int ncheck = relation->rd_rel->relchecks;
4587  Relation conrel;
4588  SysScanDesc conscan;
4589  ScanKeyData skey[1];
4590  HeapTuple htup;
4591  int found = 0;
4592 
4593  /* Allocate array with room for as many entries as expected */
4594  check = (ConstrCheck *)
4596  ncheck * sizeof(ConstrCheck));
4597 
4598  /* Search pg_constraint for relevant entries */
4599  ScanKeyInit(&skey[0],
4600  Anum_pg_constraint_conrelid,
4601  BTEqualStrategyNumber, F_OIDEQ,
4602  ObjectIdGetDatum(RelationGetRelid(relation)));
4603 
4604  conrel = table_open(ConstraintRelationId, AccessShareLock);
4605  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4606  NULL, 1, skey);
4607 
4608  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4609  {
4611  Datum val;
4612  bool isnull;
4613 
4614  /* We want check constraints only */
4615  if (conform->contype != CONSTRAINT_CHECK)
4616  continue;
4617 
4618  /* protect limited size of array */
4619  if (found >= ncheck)
4620  {
4621  elog(WARNING, "unexpected pg_constraint record found for relation \"%s\"",
4622  RelationGetRelationName(relation));
4623  break;
4624  }
4625 
4626  check[found].ccvalid = conform->convalidated;
4627  check[found].ccnoinherit = conform->connoinherit;
4629  NameStr(conform->conname));
4630 
4631  /* Grab and test conbin is actually set */
4632  val = fastgetattr(htup,
4633  Anum_pg_constraint_conbin,
4634  conrel->rd_att, &isnull);
4635  if (isnull)
4636  elog(WARNING, "null conbin for relation \"%s\"",
4637  RelationGetRelationName(relation));
4638  else
4639  {
4640  /* detoast and convert to cstring in caller's context */
4641  char *s = TextDatumGetCString(val);
4642 
4643  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4644  pfree(s);
4645  found++;
4646  }
4647  }
4648 
4649  systable_endscan(conscan);
4650  table_close(conrel, AccessShareLock);
4651 
4652  if (found != ncheck)
4653  elog(WARNING, "%d pg_constraint record(s) missing for relation \"%s\"",
4654  ncheck - found, RelationGetRelationName(relation));
4655 
4656  /*
4657  * Sort the records by name. This ensures that CHECKs are applied in a
4658  * deterministic order, and it also makes equalTupleDescs() faster.
4659  */
4660  if (found > 1)
4661  qsort(check, found, sizeof(ConstrCheck), CheckConstraintCmp);
4662 
4663  /* Install array only after it's fully valid */
4664  relation->rd_att->constr->check = check;
4665  relation->rd_att->constr->num_check = found;
4666 }
4667 
4668 /*
4669  * qsort comparator to sort ConstrCheck entries by name
4670  */
4671 static int
4672 CheckConstraintCmp(const void *a, const void *b)
4673 {
4674  const ConstrCheck *ca = (const ConstrCheck *) a;
4675  const ConstrCheck *cb = (const ConstrCheck *) b;
4676 
4677  return strcmp(ca->ccname, cb->ccname);
4678 }
4679 
4680 /*
4681  * RelationGetFKeyList -- get a list of foreign key info for the relation
4682  *
4683  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4684  * the given relation. This data is a direct copy of relevant fields from
4685  * pg_constraint. The list items are in no particular order.
4686  *
4687  * CAUTION: the returned list is part of the relcache's data, and could
4688  * vanish in a relcache entry reset. Callers must inspect or copy it
4689  * before doing anything that might trigger a cache flush, such as
4690  * system catalog accesses. copyObject() can be used if desired.
4691  * (We define it this way because current callers want to filter and
4692  * modify the list entries anyway, so copying would be a waste of time.)
4693  */
4694 List *
4696 {
4697  List *result;
4698  Relation conrel;
4699  SysScanDesc conscan;
4700  ScanKeyData skey;
4701  HeapTuple htup;
4702  List *oldlist;
4703  MemoryContext oldcxt;
4704 
4705  /* Quick exit if we already computed the list. */
4706  if (relation->rd_fkeyvalid)
4707  return relation->rd_fkeylist;
4708 
4709  /* Fast path: non-partitioned tables without triggers can't have FKs */
4710  if (!relation->rd_rel->relhastriggers &&
4711  relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
4712  return NIL;
4713 
4714  /*
4715  * We build the list we intend to return (in the caller's context) while
4716  * doing the scan. After successfully completing the scan, we copy that
4717  * list into the relcache entry. This avoids cache-context memory leakage
4718  * if we get some sort of error partway through.
4719  */
4720  result = NIL;
4721 
4722  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4723  ScanKeyInit(&skey,
4724  Anum_pg_constraint_conrelid,
4725  BTEqualStrategyNumber, F_OIDEQ,
4726  ObjectIdGetDatum(RelationGetRelid(relation)));
4727 
4728  conrel = table_open(ConstraintRelationId, AccessShareLock);
4729  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4730  NULL, 1, &skey);
4731 
4732  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4733  {
4734  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4735  ForeignKeyCacheInfo *info;
4736 
4737  /* consider only foreign keys */
4738  if (constraint->contype != CONSTRAINT_FOREIGN)
4739  continue;
4740 
4741  info = makeNode(ForeignKeyCacheInfo);
4742  info->conoid = constraint->oid;
4743  info->conrelid = constraint->conrelid;
4744  info->confrelid = constraint->confrelid;
4745 
4746  DeconstructFkConstraintRow(htup, &info->nkeys,
4747  info->conkey,
4748  info->confkey,
4749  info->conpfeqop,
4750  NULL, NULL, NULL, NULL);
4751 
4752  /* Add FK's node to the result list */
4753  result = lappend(result, info);
4754  }
4755 
4756  systable_endscan(conscan);
4757  table_close(conrel, AccessShareLock);
4758 
4759  /* Now save a copy of the completed list in the relcache entry. */
4761  oldlist = relation->rd_fkeylist;
4762  relation->rd_fkeylist = copyObject(result);
4763  relation->rd_fkeyvalid = true;
4764  MemoryContextSwitchTo(oldcxt);
4765 
4766  /* Don't leak the old list, if there is one */
4767  list_free_deep(oldlist);
4768 
4769  return result;
4770 }
4771 
4772 /*
4773  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4774  *
4775  * The index list is created only if someone requests it. We scan pg_index
4776  * to find relevant indexes, and add the list to the relcache entry so that
4777  * we won't have to compute it again. Note that shared cache inval of a
4778  * relcache entry will delete the old list and set rd_indexvalid to false,
4779  * so that we must recompute the index list on next request. This handles
4780  * creation or deletion of an index.
4781  *
4782  * Indexes that are marked not indislive are omitted from the returned list.
4783  * Such indexes are expected to be dropped momentarily, and should not be
4784  * touched at all by any caller of this function.
4785  *
4786  * The returned list is guaranteed to be sorted in order by OID. This is
4787  * needed by the executor, since for index types that we obtain exclusive
4788  * locks on when updating the index, all backends must lock the indexes in
4789  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4790  * consistent ordering would do, but ordering by OID is easy.
4791  *
4792  * Since shared cache inval causes the relcache's copy of the list to go away,
4793  * we return a copy of the list palloc'd in the caller's context. The caller
4794  * may list_free() the returned list after scanning it. This is necessary
4795  * since the caller will typically be doing syscache lookups on the relevant
4796  * indexes, and syscache lookup could cause SI messages to be processed!
4797  *
4798  * In exactly the same way, we update rd_pkindex, which is the OID of the
4799  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4800  * which is the pg_class OID of an index to be used as the relation's
4801  * replication identity index, or InvalidOid if there is no such index.
4802  */
4803 List *
4805 {
4806  Relation indrel;
4807  SysScanDesc indscan;
4808  ScanKeyData skey;
4809  HeapTuple htup;
4810  List *result;
4811  List *oldlist;
4812  char replident = relation->rd_rel->relreplident;
4813  Oid pkeyIndex = InvalidOid;
4814  Oid candidateIndex = InvalidOid;
4815  bool pkdeferrable = false;
4816  MemoryContext oldcxt;
4817 
4818  /* Quick exit if we already computed the list. */
4819  if (relation->rd_indexvalid)
4820  return list_copy(relation->rd_indexlist);
4821 
4822  /*
4823  * We build the list we intend to return (in the caller's context) while
4824  * doing the scan. After successfully completing the scan, we copy that
4825  * list into the relcache entry. This avoids cache-context memory leakage
4826  * if we get some sort of error partway through.
4827  */
4828  result = NIL;
4829 
4830  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4831  ScanKeyInit(&skey,
4832  Anum_pg_index_indrelid,
4833  BTEqualStrategyNumber, F_OIDEQ,
4834  ObjectIdGetDatum(RelationGetRelid(relation)));
4835 
4836  indrel = table_open(IndexRelationId, AccessShareLock);
4837  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4838  NULL, 1, &skey);
4839 
4840  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4841  {
4843 
4844  /*
4845  * Ignore any indexes that are currently being dropped. This will
4846  * prevent them from being searched, inserted into, or considered in
4847  * HOT-safety decisions. It's unsafe to touch such an index at all
4848  * since its catalog entries could disappear at any instant.
4849  */
4850  if (!index->indislive)
4851  continue;
4852 
4853  /* add index's OID to result list */
4854  result = lappend_oid(result, index->indexrelid);
4855 
4856  /*
4857  * Invalid, non-unique, non-immediate or predicate indexes aren't
4858  * interesting for either oid indexes or replication identity indexes,
4859  * so don't check them.
4860  */
4861  if (!index->indisvalid || !index->indisunique ||
4862  !index->indimmediate ||
4863  !heap_attisnull(htup, Anum_pg_index_indpred, NULL))
4864  continue;
4865 
4866  /* remember primary key index if any */
4867  if (index->indisprimary)
4868  pkeyIndex = index->indexrelid;
4869 
4870  /* remember explicitly chosen replica index */
4871  if (index->indisreplident)
4872  candidateIndex = index->indexrelid;
4873  }
4874 
4875  systable_endscan(indscan);
4876 
4877  table_close(indrel, AccessShareLock);
4878 
4879  /* Sort the result list into OID order, per API spec. */
4880  list_sort(result, list_oid_cmp);
4881 
4882  /* Now save a copy of the completed list in the relcache entry. */
4884  oldlist = relation->rd_indexlist;
4885  relation->rd_indexlist = list_copy(result);
4886  relation->rd_pkindex = pkeyIndex;
4887  relation->rd_ispkdeferrable = pkdeferrable;
4888  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex) && !pkdeferrable)
4889  relation->rd_replidindex = pkeyIndex;
4890  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4891  relation->rd_replidindex = candidateIndex;
4892  else
4893  relation->rd_replidindex = InvalidOid;
4894  relation->rd_indexvalid = true;
4895  MemoryContextSwitchTo(oldcxt);
4896 
4897  /* Don't leak the old list, if there is one */
4898  list_free(oldlist);
4899 
4900  return result;
4901 }
4902 
4903 /*
4904  * RelationGetStatExtList
4905  * get a list of OIDs of statistics objects on this relation
4906  *
4907  * The statistics list is created only if someone requests it, in a way
4908  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4909  * relevant statistics, and add the list to the relcache entry so that we
4910  * won't have to compute it again. Note that shared cache inval of a
4911  * relcache entry will delete the old list and set rd_statvalid to 0,
4912  * so that we must recompute the statistics list on next request. This
4913  * handles creation or deletion of a statistics object.
4914  *
4915  * The returned list is guaranteed to be sorted in order by OID, although
4916  * this is not currently needed.
4917  *
4918  * Since shared cache inval causes the relcache's copy of the list to go away,
4919  * we return a copy of the list palloc'd in the caller's context. The caller
4920  * may list_free() the returned list after scanning it. This is necessary
4921  * since the caller will typically be doing syscache lookups on the relevant
4922  * statistics, and syscache lookup could cause SI messages to be processed!
4923  */
4924 List *
4926 {
4927  Relation indrel;
4928  SysScanDesc indscan;
4929  ScanKeyData skey;
4930  HeapTuple htup;
4931  List *result;
4932  List *oldlist;
4933  MemoryContext oldcxt;
4934 
4935  /* Quick exit if we already computed the list. */
4936  if (relation->rd_statvalid != 0)
4937  return list_copy(relation->rd_statlist);
4938 
4939  /*
4940  * We build the list we intend to return (in the caller's context) while
4941  * doing the scan. After successfully completing the scan, we copy that
4942  * list into the relcache entry. This avoids cache-context memory leakage
4943  * if we get some sort of error partway through.
4944  */
4945  result = NIL;
4946 
4947  /*
4948  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4949  * rel.
4950  */
4951  ScanKeyInit(&skey,
4952  Anum_pg_statistic_ext_stxrelid,
4953  BTEqualStrategyNumber, F_OIDEQ,
4954  ObjectIdGetDatum(RelationGetRelid(relation)));
4955 
4956  indrel = table_open(StatisticExtRelationId, AccessShareLock);
4957  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4958  NULL, 1, &skey);
4959 
4960  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4961  {
4962  Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;
4963 
4964  result = lappend_oid(result, oid);
4965  }
4966 
4967  systable_endscan(indscan);
4968 
4969  table_close(indrel, AccessShareLock);
4970 
4971  /* Sort the result list into OID order, per API spec. */
4972  list_sort(result, list_oid_cmp);
4973 
4974  /* Now save a copy of the completed list in the relcache entry. */
4976  oldlist = relation->rd_statlist;
4977  relation->rd_statlist = list_copy(result);
4978 
4979  relation->rd_statvalid = true;
4980  MemoryContextSwitchTo(oldcxt);
4981 
4982  /* Don't leak the old list, if there is one */
4983  list_free(oldlist);
4984 
4985  return result;
4986 }
4987 
4988 /*
4989  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4990  *
4991  * Returns InvalidOid if there is no such index, or if the primary key is
4992  * DEFERRABLE.
4993  */
4994 Oid
4996 {
4997  List *ilist;
4998 
4999  if (!relation->rd_indexvalid)
5000  {
5001  /* RelationGetIndexList does the heavy lifting. */
5002  ilist = RelationGetIndexList(relation);
5003  list_free(ilist);
5004  Assert(relation->rd_indexvalid);
5005  }
5006 
5007  return relation->rd_ispkdeferrable ? InvalidOid : relation->rd_pkindex;
5008 }
5009 
5010 /*
5011  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
5012  *
5013  * Returns InvalidOid if there is no such index.
5014  */
5015 Oid
5017 {
5018  List *ilist;
5019 
5020  if (!relation->rd_indexvalid)
5021  {
5022  /* RelationGetIndexList does the heavy lifting. */
5023  ilist = RelationGetIndexList(relation);
5024  list_free(ilist);
5025  Assert(relation->rd_indexvalid);
5026  }
5027 
5028  return relation->rd_replidindex;
5029 }
5030 
5031 /*
5032  * RelationGetIndexExpressions -- get the index expressions for an index
5033  *
5034  * We cache the result of transforming pg_index.indexprs into a node tree.
5035  * If the rel is not an index or has no expressional columns, we return NIL.
5036  * Otherwise, the returned tree is copied into the caller's memory context.
5037  * (We don't want to return a pointer to the relcache copy, since it could
5038  * disappear due to relcache invalidation.)
5039  */
5040 List *
5042 {
5043  List *result;
5044  Datum exprsDatum;
5045  bool isnull;
5046  char *exprsString;
5047  MemoryContext oldcxt;
5048 
5049  /* Quick exit if we already computed the result. */
5050  if (relation->rd_indexprs)
5051  return copyObject(relation->rd_indexprs);
5052 
5053  /* Quick exit if there is nothing to do. */
5054  if (relation->rd_indextuple == NULL ||
5055  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
5056  return NIL;
5057 
5058  /*
5059  * We build the tree we intend to return in the caller's context. After
5060  * successfully completing the work, we copy it into the relcache entry.
5061  * This avoids problems if we get some sort of error partway through.
5062  */
5063  exprsDatum = heap_getattr(relation->rd_indextuple,
5064  Anum_pg_index_indexprs,
5066  &isnull);
5067  Assert(!isnull);
5068  exprsString = TextDatumGetCString(exprsDatum);
5069  result = (List *) stringToNode(exprsString);
5070  pfree(exprsString);
5071 
5072  /*
5073  * Run the expressions through eval_const_expressions. This is not just an
5074  * optimization, but is necessary, because the planner will be comparing
5075  * them to similarly-processed qual clauses, and may fail to detect valid
5076  * matches without this. We must not use canonicalize_qual, however,
5077  * since these aren't qual expressions.
5078  */
5079  result = (List *) eval_const_expressions(NULL, (Node *) result);
5080 
5081  /* May as well fix opfuncids too */
5082  fix_opfuncids((Node *) result);
5083 
5084  /* Now save a copy of the completed tree in the relcache entry. */
5085  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
5086  relation->rd_indexprs = copyObject(result);
5087  MemoryContextSwitchTo(oldcxt);
5088 
5089  return result;
5090 }
5091 
5092 /*
5093  * RelationGetDummyIndexExpressions -- get dummy expressions for an index
5094  *
5095  * Return a list of dummy expressions (just Const nodes) with the same
5096  * types/typmods/collations as the index's real expressions. This is
5097  * useful in situations where we don't want to run any user-defined code.
5098  */
5099 List *
5101 {
5102  List *result;
5103  Datum exprsDatum;
5104  bool isnull;
5105  char *exprsString;
5106  List *rawExprs;
5107  ListCell *lc;
5108 
5109  /* Quick exit if there is nothing to do. */
5110  if (relation->rd_indextuple == NULL ||
5111  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
5112  return NIL;
5113 
5114  /* Extract raw node tree(s) from index tuple. */
5115  exprsDatum = heap_getattr(relation->rd_indextuple,
5116  Anum_pg_index_indexprs,
5118  &isnull);
5119  Assert(!isnull);
5120  exprsString = TextDatumGetCString(exprsDatum);
5121  rawExprs = (List *) stringToNode(exprsString);
5122  pfree(exprsString);
5123 
5124  /* Construct null Consts; the typlen and typbyval are arbitrary. */
5125  result = NIL;
5126  foreach(lc, rawExprs)
5127  {
5128  Node *rawExpr = (Node *) lfirst(lc);
5129 
5130  result = lappend(result,
5131  makeConst(exprType(rawExpr),
5132  exprTypmod(rawExpr),
5133  exprCollation(rawExpr),
5134  1,
5135  (Datum) 0,
5136  true,
5137  true));
5138  }
5139 
5140  return result;
5141 }
5142 
5143 /*
5144  * RelationGetIndexPredicate -- get the index predicate for an index
5145  *
5146  * We cache the result of transforming pg_index.indpred into an implicit-AND
5147  * node tree (suitable for use in planning).
5148  * If the rel is not an index or has no predicate, we return NIL.
5149  * Otherwise, the returned tree is copied into the caller's memory context.
5150  * (We don't want to return a pointer to the relcache copy, since it could
5151  * disappear due to relcache invalidation.)
5152  */
5153 List *
5155 {
5156  List *result;
5157  Datum predDatum;
5158  bool isnull;
5159  char *predString;
5160  MemoryContext oldcxt;
5161 
5162  /* Quick exit if we already computed the result. */
5163  if (relation->rd_indpred)
5164  return copyObject(relation->rd_indpred);
5165 
5166  /* Quick exit if there is nothing to do. */
5167  if (relation->rd_indextuple == NULL ||
5168  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL))
5169  return NIL;
5170 
5171  /*
5172  * We build the tree we intend to return in the caller's context. After
5173  * successfully completing the work, we copy it into the relcache entry.
5174  * This avoids problems if we get some sort of error partway through.
5175  */
5176  predDatum = heap_getattr(relation->rd_indextuple,
5177  Anum_pg_index_indpred,
5179  &isnull);
5180  Assert(!isnull);
5181  predString = TextDatumGetCString(predDatum);
5182  result = (List *) stringToNode(predString);
5183  pfree(predString);
5184 
5185  /*
5186  * Run the expression through const-simplification and canonicalization.
5187  * This is not just an optimization, but is necessary, because the planner
5188  * will be comparing it to similarly-processed qual clauses, and may fail
5189  * to detect valid matches without this. This must match the processing
5190  * done to qual clauses in preprocess_expression()! (We can skip the
5191  * stuff involving subqueries, however, since we don't allow any in index
5192  * predicates.)
5193  */
5194  result = (List *) eval_const_expressions(NULL, (Node *) result);
5195 
5196  result = (List *) canonicalize_qual((Expr *) result, false);
5197 
5198  /* Also convert to implicit-AND format */
5199  result = make_ands_implicit((Expr *) result);
5200 
5201  /* May as well fix opfuncids too */
5202  fix_opfuncids((Node *) result);
5203 
5204  /* Now save a copy of the completed tree in the relcache entry. */
5205  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
5206  relation->rd_indpred = copyObject(result);
5207  MemoryContextSwitchTo(oldcxt);
5208 
5209  return result;
5210 }
5211 
5212 /*
5213  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
5214  *
5215  * The result has a bit set for each attribute used anywhere in the index
5216  * definitions of all the indexes on this relation. (This includes not only
5217  * simple index keys, but attributes used in expressions and partial-index
5218  * predicates.)
5219  *
5220  * Depending on attrKind, a bitmap covering attnums for certain columns is
5221  * returned:
5222  * INDEX_ATTR_BITMAP_KEY Columns in non-partial unique indexes not
5223  * in expressions (i.e., usable for FKs)
5224  * INDEX_ATTR_BITMAP_PRIMARY_KEY Columns in the table's primary key
5225  * (beware: even if PK is deferrable!)
5226  * INDEX_ATTR_BITMAP_IDENTITY_KEY Columns in the table's replica identity
5227  * index (empty if FULL)
5228  * INDEX_ATTR_BITMAP_HOT_BLOCKING Columns that block updates from being HOT
5229  * INDEX_ATTR_BITMAP_SUMMARIZED Columns included in summarizing indexes
5230  *
5231  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
5232  * we can include system attributes (e.g., OID) in the bitmap representation.
5233  *
5234  * Deferred indexes are considered for the primary key, but not for replica
5235  * identity.
5236  *
5237  * Caller had better hold at least RowExclusiveLock on the target relation
5238  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
5239  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
5240  * that lock level doesn't guarantee a stable set of indexes, so we have to
5241  * be prepared to retry here in case of a change in the set of indexes.
5242  *
5243  * The returned result is palloc'd in the caller's memory context and should
5244  * be bms_free'd when not needed anymore.
5245  */
5246 Bitmapset *
5248 {
5249  Bitmapset *uindexattrs; /* columns in unique indexes */
5250  Bitmapset *pkindexattrs; /* columns in the primary index */
5251  Bitmapset *idindexattrs; /* columns in the replica identity */
5252  Bitmapset *hotblockingattrs; /* columns with HOT blocking indexes */
5253  Bitmapset *summarizedattrs; /* columns with summarizing indexes */
5254  List *indexoidlist;
5255  List *newindexoidlist;
5256  Oid relpkindex;
5257  Oid relreplindex;
5258  ListCell *l;
5259  MemoryContext oldcxt;
5260 
5261  /* Quick exit if we already computed the result. */
5262  if (relation->rd_attrsvalid)
5263  {
5264  switch (attrKind)
5265  {
5266  case INDEX_ATTR_BITMAP_KEY:
5267  return bms_copy(relation->rd_keyattr);
5269  return bms_copy(relation->rd_pkattr);
5271  return bms_copy(relation->rd_idattr);
5273  return bms_copy(relation->rd_hotblockingattr);
5275  return bms_copy(relation->rd_summarizedattr);
5276  default:
5277  elog(ERROR, "unknown attrKind %u", attrKind);
5278  }
5279  }
5280 
5281  /* Fast path if definitely no indexes */
5282  if (!RelationGetForm(relation)->relhasindex)
5283  return NULL;
5284 
5285  /*
5286  * Get cached list of index OIDs. If we have to start over, we do so here.
5287  */
5288 restart:
5289  indexoidlist = RelationGetIndexList(relation);
5290 
5291  /* Fall out if no indexes (but relhasindex was set) */
5292  if (indexoidlist == NIL)
5293  return NULL;
5294 
5295  /*
5296  * Copy the rd_pkindex and rd_replidindex values computed by
5297  * RelationGetIndexList before proceeding. This is needed because a
5298  * relcache flush could occur inside index_open below, resetting the
5299  * fields managed by RelationGetIndexList. We need to do the work with
5300  * stable values of these fields.
5301  */
5302  relpkindex = relation->rd_pkindex;
5303  relreplindex = relation->rd_replidindex;
5304 
5305  /*
5306  * For each index, add referenced attributes to indexattrs.
5307  *
5308  * Note: we consider all indexes returned by RelationGetIndexList, even if
5309  * they are not indisready or indisvalid. This is important because an
5310  * index for which CREATE INDEX CONCURRENTLY has just started must be
5311  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
5312  * CONCURRENTLY is far enough along that we should ignore the index, it
5313  * won't be returned at all by RelationGetIndexList.
5314  */
5315  uindexattrs = NULL;
5316  pkindexattrs = NULL;
5317  idindexattrs = NULL;
5318  hotblockingattrs = NULL;
5319  summarizedattrs = NULL;
5320  foreach(l, indexoidlist)
5321  {
5322  Oid indexOid = lfirst_oid(l);
5323  Relation indexDesc;
5324  Datum datum;
5325  bool isnull;
5326  Node *indexExpressions;
5327  Node *indexPredicate;
5328  int i;
5329  bool isKey; /* candidate key */
5330  bool isPK; /* primary key */
5331  bool isIDKey; /* replica identity index */
5332  Bitmapset **attrs;
5333 
5334  indexDesc = index_open(indexOid, AccessShareLock);
5335 
5336  /*
5337  * Extract index expressions and index predicate. Note: Don't use
5338  * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
5339  * those might run constant expressions evaluation, which needs a
5340  * snapshot, which we might not have here. (Also, it's probably more
5341  * sound to collect the bitmaps before any transformations that might
5342  * eliminate columns, but the practical impact of this is limited.)
5343  */
5344 
5345  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs,
5346  GetPgIndexDescriptor(), &isnull);
5347  if (!isnull)
5348  indexExpressions = stringToNode(TextDatumGetCString(datum));
5349  else
5350  indexExpressions = NULL;
5351 
5352  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred,
5353  GetPgIndexDescriptor(), &isnull);
5354  if (!isnull)
5355  indexPredicate = stringToNode(TextDatumGetCString(datum));
5356  else
5357  indexPredicate = NULL;
5358 
5359  /* Can this index be referenced by a foreign key? */
5360  isKey = indexDesc->rd_index->indisunique &&
5361  indexExpressions == NULL &&
5362  indexPredicate == NULL;
5363 
5364  /* Is this a primary key? */
5365  isPK = (indexOid == relpkindex);
5366 
5367  /* Is this index the configured (or default) replica identity? */
5368  isIDKey = (indexOid == relreplindex);
5369 
5370  /*
5371  * If the index is summarizing, it doesn't block HOT updates, but we
5372  * may still need to update it (if the attributes were modified). So
5373  * decide which bitmap we'll update in the following loop.
5374  */
5375  if (indexDesc->rd_indam->amsummarizing)
5376  attrs = &summarizedattrs;
5377  else
5378  attrs = &hotblockingattrs;
5379 
5380  /* Collect simple attribute references */
5381  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
5382  {
5383  int attrnum = indexDesc->rd_index->indkey.values[i];
5384 
5385  /*
5386  * Since we have covering indexes with non-key columns, we must
5387  * handle them accurately here. non-key columns must be added into
5388  * hotblockingattrs or summarizedattrs, since they are in index,
5389  * and update shouldn't miss them.
5390  *
5391  * Summarizing indexes do not block HOT, but do need to be updated
5392  * when the column value changes, thus require a separate
5393  * attribute bitmapset.
5394  *
5395  * Obviously, non-key columns couldn't be referenced by foreign
5396  * key or identity key. Hence we do not include them into
5397  * uindexattrs, pkindexattrs and idindexattrs bitmaps.
5398  */
5399  if (attrnum != 0)
5400  {
5401  *attrs = bms_add_member(*attrs,
5403 
5404  if (isKey && i < indexDesc->rd_index->indnkeyatts)
5405  uindexattrs = bms_add_member(uindexattrs,
5407 
5408  if (isPK && i < indexDesc->rd_index->indnkeyatts)
5409  pkindexattrs = bms_add_member(pkindexattrs,
5411 
5412  if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
5413  idindexattrs = bms_add_member(idindexattrs,
5415  }
5416  }
5417 
5418  /* Collect all attributes used in expressions, too */
5419  pull_varattnos(indexExpressions, 1, attrs);
5420 
5421  /* Collect all attributes in the index predicate, too */
5422  pull_varattnos(indexPredicate, 1, attrs);
5423 
5424  index_close(indexDesc, AccessShareLock);
5425  }
5426 
5427  /*
5428  * During one of the index_opens in the above loop, we might have received
5429  * a relcache flush event on this relcache entry, which might have been
5430  * signaling a change in the rel's index list. If so, we'd better start
5431  * over to ensure we deliver up-to-date attribute bitmaps.
5432  */
5433  newindexoidlist = RelationGetIndexList(relation);
5434  if (equal(indexoidlist, newindexoidlist) &&
5435  relpkindex == relation->rd_pkindex &&
5436  relreplindex == relation->rd_replidindex)
5437  {
5438  /* Still the same index set, so proceed */
5439  list_free(newindexoidlist);
5440  list_free(indexoidlist);
5441  }
5442  else
5443  {
5444  /* Gotta do it over ... might as well not leak memory */
5445  list_free(newindexoidlist);
5446  list_free(indexoidlist);
5447  bms_free(uindexattrs);
5448  bms_free(pkindexattrs);
5449  bms_free(idindexattrs);
5450  bms_free(hotblockingattrs);
5451  bms_free(summarizedattrs);
5452 
5453  goto restart;
5454  }
5455 
5456  /* Don't leak the old values of these bitmaps, if any */
5457  relation->rd_attrsvalid = false;
5458  bms_free(relation->rd_keyattr);
5459  relation->rd_keyattr = NULL;
5460  bms_free(relation->rd_pkattr);
5461  relation->rd_pkattr = NULL;
5462  bms_free(relation->rd_idattr);
5463  relation->rd_idattr = NULL;
5464  bms_free(relation->rd_hotblockingattr);
5465  relation->rd_hotblockingattr = NULL;
5466  bms_free(relation->rd_summarizedattr);
5467  relation->rd_summarizedattr = NULL;
5468 
5469  /*
5470  * Now save copies of the bitmaps in the relcache entry. We intentionally
5471  * set rd_attrsvalid last, because that's the one that signals validity of
5472  * the values; if we run out of memory before making that copy, we won't
5473  * leave the relcache entry looking like the other ones are valid but
5474  * empty.
5475  */
5477  relation->rd_keyattr = bms_copy(uindexattrs);
5478  relation->rd_pkattr = bms_copy(pkindexattrs);
5479  relation->rd_idattr = bms_copy(idindexattrs);
5480  relation->rd_hotblockingattr = bms_copy(hotblockingattrs);
5481  relation->rd_summarizedattr = bms_copy(summarizedattrs);
5482  relation->rd_attrsvalid = true;
5483  MemoryContextSwitchTo(oldcxt);
5484 
5485  /* We return our original working copy for caller to play with */
5486  switch (attrKind)
5487  {
5488  case INDEX_ATTR_BITMAP_KEY:
5489  return uindexattrs;
5491  return pkindexattrs;
5493  return idindexattrs;
5495  return hotblockingattrs;
5497  return summarizedattrs;
5498  default:
5499  elog(ERROR, "unknown attrKind %u", attrKind);
5500  return NULL;
5501  }
5502 }
5503 
5504 /*
5505  * RelationGetIdentityKeyBitmap -- get a bitmap of replica identity attribute
5506  * numbers
5507  *
5508  * A bitmap of index attribute numbers for the configured replica identity
5509  * index is returned.
5510  *
5511  * See also comments of RelationGetIndexAttrBitmap().
5512  *
5513  * This is a special purpose function used during logical replication. Here,
5514  * unlike RelationGetIndexAttrBitmap(), we don't acquire a lock on the required
5515  * index as we build the cache entry using a historic snapshot and all the
5516  * later changes are absorbed while decoding WAL. Due to this reason, we don't
5517  * need to retry here in case of a change in the set of indexes.
5518  */
5519 Bitmapset *
5521 {
5522  Bitmapset *idindexattrs = NULL; /* columns in the replica identity */
5523  Relation indexDesc;
5524  int i;
5525  Oid replidindex;
5526  MemoryContext oldcxt;
5527 
5528  /* Quick exit if we already computed the result */
5529  if (relation->rd_idattr != NULL)
5530  return bms_copy(relation->rd_idattr);
5531 
5532  /* Fast path if definitely no indexes */
5533  if (!RelationGetForm(relation)->relhasindex)
5534  return NULL;
5535 
5536  /* Historic snapshot must be set. */
5538 
5539  replidindex = RelationGetReplicaIndex(relation);
5540 
5541  /* Fall out if there is no replica identity index */
5542  if (!OidIsValid(replidindex))
5543  return NULL;
5544 
5545  /* Look up the description for the replica identity index */
5546  indexDesc = RelationIdGetRelation(replidindex);
5547 
5548  if (!RelationIsValid(indexDesc))
5549  elog(ERROR, "could not open relation with OID %u",
5550  relation->rd_replidindex);
5551 
5552  /* Add referenced attributes to idindexattrs */
5553  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
5554  {
5555  int attrnum = indexDesc->rd_index->indkey.values[i];
5556 
5557  /*
5558  * We don't include non-key columns into idindexattrs bitmaps. See
5559  * RelationGetIndexAttrBitmap.
5560  */
5561  if (attrnum != 0)
5562  {
5563  if (i < indexDesc->rd_index->indnkeyatts)
5564  idindexattrs = bms_add_member(idindexattrs,
5566  }
5567  }
5568 
5569  RelationClose(indexDesc);
5570 
5571  /* Don't leak the old values of these bitmaps, if any */
5572  bms_free(relation->rd_idattr);
5573  relation->rd_idattr = NULL;
5574 
5575  /* Now save copy of the bitmap in the relcache entry */
5577  relation->rd_idattr = bms_copy(idindexattrs);
5578  MemoryContextSwitchTo(oldcxt);
5579 
5580  /* We return our original working copy for caller to play with */
5581  return idindexattrs;
5582 }
5583 
5584 /*
5585  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5586  *
5587  * This should be called only for an index that is known to have an associated
5588  * exclusion constraint or primary key/unique constraint using WITHOUT
5589  * OVERLAPS.
5590 
5591  * It returns arrays (palloc'd in caller's context) of the exclusion operator
5592  * OIDs, their underlying functions' OIDs, and their strategy numbers in the
5593  * index's opclasses. We cache all this information since it requires a fair
5594  * amount of work to get.
5595  */
5596 void
5598  Oid **operators,
5599  Oid **procs,
5600  uint16 **strategies)
5601 {
5602  int indnkeyatts;
5603  Oid *ops;
5604  Oid *funcs;
5605  uint16 *strats;
5606  Relation conrel;
5607  SysScanDesc conscan;
5608  ScanKeyData skey[1];
5609  HeapTuple htup;
5610  bool found;
5611  MemoryContext oldcxt;
5612  int i;
5613 
5614  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
5615 
5616  /* Allocate result space in caller context */
5617  *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5618  *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5619  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5620 
5621  /* Quick exit if we have the data cached already */
5622  if (indexRelation->rd_exclstrats != NULL)
5623  {
5624  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts);
5625  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts);
5626  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts);
5627  return;
5628  }
5629 
5630  /*
5631  * Search pg_constraint for the constraint associated with the index. To
5632  * make this not too painfully slow, we use the index on conrelid; that
5633  * will hold the parent relation's OID not the index's own OID.
5634  *
5635  * Note: if we wanted to rely on the constraint name matching the index's
5636  * name, we could just do a direct lookup using pg_constraint's unique
5637  * index. For the moment it doesn't seem worth requiring that.
5638  */
5639  ScanKeyInit(&skey[0],
5640  Anum_pg_constraint_conrelid,
5641  BTEqualStrategyNumber, F_OIDEQ,
5642  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5643 
5644  conrel = table_open(ConstraintRelationId, AccessShareLock);
5645  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
5646  NULL, 1, skey);
5647  found = false;
5648 
5649  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5650  {
5652  Datum val;
5653  bool isnull;
5654  ArrayType *arr;
5655  int nelem;
5656 
5657  /* We want the exclusion constraint owning the index */
5658  if ((conform->contype != CONSTRAINT_EXCLUSION &&
5659  !(conform->conperiod && (
5660  conform->contype == CONSTRAINT_PRIMARY
5661  || conform->contype == CONSTRAINT_UNIQUE))) ||
5662  conform->conindid != RelationGetRelid(indexRelation))
5663  continue;
5664 
5665  /* There should be only one */
5666  if (found)
5667  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5668  RelationGetRelationName(indexRelation));
5669  found = true;
5670 
5671  /* Extract the operator OIDS from conexclop */
5672  val = fastgetattr(htup,
5673  Anum_pg_constraint_conexclop,
5674  conrel->rd_att, &isnull);
5675  if (isnull)
5676  elog(ERROR, "null conexclop for rel %s",
5677  RelationGetRelationName(indexRelation));
5678 
5679  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5680  nelem = ARR_DIMS(arr)[0];
5681  if (ARR_NDIM(arr) != 1 ||
5682  nelem != indnkeyatts ||
5683  ARR_HASNULL(arr) ||
5684  ARR_ELEMTYPE(arr) != OIDOID)
5685  elog(ERROR, "conexclop is not a 1-D Oid array");
5686 
5687  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts);
5688  }
5689 
5690  systable_endscan(conscan);
5691  table_close(conrel, AccessShareLock);
5692 
5693  if (!found)
5694  elog(ERROR, "exclusion constraint record missing for rel %s",
5695  RelationGetRelationName(indexRelation));
5696 
5697  /* We need the func OIDs and strategy numbers too */
5698  for (i = 0; i < indnkeyatts; i++)
5699  {
5700  funcs[i] = get_opcode(ops[i]);
5701  strats[i] = get_op_opfamily_strategy(ops[i],
5702  indexRelation->rd_opfamily[i]);
5703  /* shouldn't fail, since it was checked at index creation */
5704  if (strats[i] == InvalidStrategy)
5705  elog(ERROR, "could not find strategy for operator %u in family %u",
5706  ops[i], indexRelation->rd_opfamily[i]);
5707  }
5708 
5709  /* Save a copy of the results in the relcache entry. */
5710  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5711  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5712  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5713  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5714  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts);
5715  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts);
5716  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts);
5717  MemoryContextSwitchTo(oldcxt);
5718 }
5719 
5720 /*
5721  * Get the publication information for the given relation.
5722  *
5723  * Traverse all the publications which the relation is in to get the
5724  * publication actions and validate the row filter expressions for such
5725  * publications if any. We consider the row filter expression as invalid if it
5726  * references any column which is not part of REPLICA IDENTITY.
5727  *
5728  * To avoid fetching the publication information repeatedly, we cache the
5729  * publication actions and row filter validation information.
5730  */
5731 void
5733 {
5734  List *puboids;
5735  ListCell *lc;
5736  MemoryContext oldcxt;
5737  Oid schemaid;
5738  List *ancestors = NIL;
5739  Oid relid = RelationGetRelid(relation);
5740 
5741  /*
5742  * If not publishable, it publishes no actions. (pgoutput_change() will
5743  * ignore it.)
5744  */
5745  if (!is_publishable_relation(relation))
5746  {
5747  memset(pubdesc, 0, sizeof(PublicationDesc));
5748  pubdesc->rf_valid_for_update = true;
5749  pubdesc->rf_valid_for_delete = true;
5750  pubdesc->cols_valid_for_update = true;
5751  pubdesc->cols_valid_for_delete = true;
5752  return;
5753  }
5754 
5755  if (relation->rd_pubdesc)
5756  {
5757  memcpy(pubdesc, relation->rd_pubdesc, sizeof(PublicationDesc));
5758  return;
5759  }
5760 
5761  memset(pubdesc, 0, sizeof(PublicationDesc));
5762  pubdesc->rf_valid_for_update = true;
5763  pubdesc->rf_valid_for_delete = true;
5764  pubdesc->cols_valid_for_update = true;
5765  pubdesc->cols_valid_for_delete = true;
5766 
5767  /* Fetch the publication membership info. */
5768  puboids = GetRelationPublications(relid);
5769  schemaid = RelationGetNamespace(relation);
5770  puboids = list_concat_unique_oid(puboids, GetSchemaPublications(schemaid));
5771 
5772  if (relation->rd_rel->relispartition)
5773  {
5774  /* Add publications that the ancestors are in too. */
5775  ancestors = get_partition_ancestors(relid);
5776 
5777  foreach(lc, ancestors)
5778  {
5779  Oid ancestor = lfirst_oid(lc);
5780 
5781  puboids = list_concat_unique_oid(puboids,
5782  GetRelationPublications(ancestor));
5783  schemaid = get_rel_namespace(ancestor);
5784  puboids = list_concat_unique_oid(puboids,
5785  GetSchemaPublications(schemaid));
5786  }
5787  }
5788  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5789 
5790  foreach(lc, puboids)
5791  {
5792  Oid pubid = lfirst_oid(lc);
5793  HeapTuple tup;
5794  Form_pg_publication pubform;
5795 
5796  tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
5797 
5798  if (!HeapTupleIsValid(tup))
5799  elog(ERROR, "cache lookup failed for publication %u", pubid);
5800 
5801  pubform = (Form_pg_publication) GETSTRUCT(tup);
5802 
5803  pubdesc->pubactions.pubinsert |= pubform->pubinsert;
5804  pubdesc->pubactions.pubupdate |= pubform->pubupdate;
5805  pubdesc->pubactions.pubdelete |= pubform->pubdelete;
5806  pubdesc->pubactions.pubtruncate |= pubform->pubtruncate;
5807 
5808  /*
5809  * Check if all columns referenced in the filter expression are part
5810  * of the REPLICA IDENTITY index or not.
5811  *
5812  * If the publication is FOR ALL TABLES then it means the table has no
5813  * row filters and we can skip the validation.
5814  */
5815  if (!pubform->puballtables &&
5816  (pubform->pubupdate || pubform->pubdelete) &&
5817  pub_rf_contains_invalid_column(pubid, relation, ancestors,
5818  pubform->pubviaroot))
5819  {
5820  if (pubform->pubupdate)
5821  pubdesc->rf_valid_for_update = false;
5822  if (pubform->pubdelete)
5823  pubdesc->rf_valid_for_delete = false;
5824  }
5825 
5826  /*
5827  * Check if all columns are part of the REPLICA IDENTITY index or not.
5828  *
5829  * If the publication is FOR ALL TABLES then it means the table has no
5830  * column list and we can skip the validation.
5831  */
5832  if (!pubform->puballtables &&
5833  (pubform->pubupda