PostgreSQL Source Code  git master
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/nbtree.h"
36 #include "access/parallel.h"
37 #include "access/reloptions.h"
38 #include "access/sysattr.h"
39 #include "access/table.h"
40 #include "access/tableam.h"
41 #include "access/tupdesc_details.h"
42 #include "access/xact.h"
43 #include "access/xlog.h"
44 #include "catalog/catalog.h"
45 #include "catalog/indexing.h"
46 #include "catalog/namespace.h"
47 #include "catalog/partition.h"
48 #include "catalog/pg_am.h"
49 #include "catalog/pg_amproc.h"
50 #include "catalog/pg_attrdef.h"
52 #include "catalog/pg_authid.h"
53 #include "catalog/pg_constraint.h"
54 #include "catalog/pg_database.h"
55 #include "catalog/pg_namespace.h"
56 #include "catalog/pg_opclass.h"
57 #include "catalog/pg_proc.h"
58 #include "catalog/pg_publication.h"
59 #include "catalog/pg_rewrite.h"
60 #include "catalog/pg_shseclabel.h"
63 #include "catalog/pg_tablespace.h"
64 #include "catalog/pg_trigger.h"
65 #include "catalog/pg_type.h"
66 #include "catalog/schemapg.h"
67 #include "catalog/storage.h"
68 #include "commands/policy.h"
69 #include "commands/trigger.h"
70 #include "miscadmin.h"
71 #include "nodes/makefuncs.h"
72 #include "nodes/nodeFuncs.h"
73 #include "optimizer/optimizer.h"
74 #include "rewrite/rewriteDefine.h"
75 #include "rewrite/rowsecurity.h"
76 #include "storage/lmgr.h"
77 #include "storage/smgr.h"
78 #include "utils/array.h"
79 #include "utils/builtins.h"
80 #include "utils/datum.h"
81 #include "utils/fmgroids.h"
82 #include "utils/inval.h"
83 #include "utils/lsyscache.h"
84 #include "utils/memutils.h"
85 #include "utils/relmapper.h"
86 #include "utils/resowner_private.h"
87 #include "utils/snapmgr.h"
88 #include "utils/syscache.h"
89 
90 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
91 
92 /*
93  * Whether to bother checking if relation cache memory needs to be freed
94  * eagerly. See also RelationBuildDesc() and pg_config_manual.h.
95  */
96 #if defined(RECOVER_RELATION_BUILD_MEMORY) && (RECOVER_RELATION_BUILD_MEMORY != 0)
97 #define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1
98 #else
99 #define RECOVER_RELATION_BUILD_MEMORY 0
100 #ifdef DISCARD_CACHES_ENABLED
101 #define MAYBE_RECOVER_RELATION_BUILD_MEMORY 1
102 #endif
103 #endif
104 
105 /*
106  * hardcoded tuple descriptors, contents generated by genbki.pl
107  */
108 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
109 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
110 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
111 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
112 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
113 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
114 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
115 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
116 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
117 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
118 
119 /*
120  * Hash tables that index the relation cache
121  *
122  * We used to index the cache by both name and OID, but now there
123  * is only an index by OID.
124  */
125 typedef struct relidcacheent
126 {
129 } RelIdCacheEnt;
130 
132 
133 /*
134  * This flag is false until we have prepared the critical relcache entries
135  * that are needed to do indexscans on the tables read by relcache building.
136  */
138 
139 /*
140  * This flag is false until we have prepared the critical relcache entries
141  * for shared catalogs (which are the tables needed for login).
142  */
144 
145 /*
146  * This counter counts relcache inval events received since backend startup
147  * (but only for rels that are actually in cache). Presently, we use it only
148  * to detect whether data about to be written by write_relcache_init_file()
149  * might already be obsolete.
150  */
151 static long relcacheInvalsReceived = 0L;
152 
153 /*
154  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
155  * cleanup work. This list intentionally has limited size; if it overflows,
156  * we fall back to scanning the whole hashtable. There is no value in a very
157  * large list because (1) at some point, a hash_seq_search scan is faster than
158  * retail lookups, and (2) the value of this is to reduce EOXact work for
159  * short transactions, which can't have dirtied all that many tables anyway.
160  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
161  * cleanup processing must be idempotent.
162  */
163 #define MAX_EOXACT_LIST 32
165 static int eoxact_list_len = 0;
166 static bool eoxact_list_overflowed = false;
167 
168 #define EOXactListAdd(rel) \
169  do { \
170  if (eoxact_list_len < MAX_EOXACT_LIST) \
171  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
172  else \
173  eoxact_list_overflowed = true; \
174  } while (0)
175 
176 /*
177  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
178  * cleanup work. The array expands as needed; there is no hashtable because
179  * we don't need to access individual items except at EOXact.
180  */
182 static int NextEOXactTupleDescNum = 0;
183 static int EOXactTupleDescArrayLen = 0;
184 
185 /*
186  * macros to manipulate the lookup hashtable
187  */
188 #define RelationCacheInsert(RELATION, replace_allowed) \
189 do { \
190  RelIdCacheEnt *hentry; bool found; \
191  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
192  (void *) &((RELATION)->rd_id), \
193  HASH_ENTER, &found); \
194  if (found) \
195  { \
196  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
197  Relation _old_rel = hentry->reldesc; \
198  Assert(replace_allowed); \
199  hentry->reldesc = (RELATION); \
200  if (RelationHasReferenceCountZero(_old_rel)) \
201  RelationDestroyRelation(_old_rel, false); \
202  else if (!IsBootstrapProcessingMode()) \
203  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
204  RelationGetRelationName(_old_rel)); \
205  } \
206  else \
207  hentry->reldesc = (RELATION); \
208 } while(0)
209 
210 #define RelationIdCacheLookup(ID, RELATION) \
211 do { \
212  RelIdCacheEnt *hentry; \
213  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
214  (void *) &(ID), \
215  HASH_FIND, NULL); \
216  if (hentry) \
217  RELATION = hentry->reldesc; \
218  else \
219  RELATION = NULL; \
220 } while(0)
221 
222 #define RelationCacheDelete(RELATION) \
223 do { \
224  RelIdCacheEnt *hentry; \
225  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
226  (void *) &((RELATION)->rd_id), \
227  HASH_REMOVE, NULL); \
228  if (hentry == NULL) \
229  elog(WARNING, "failed to delete relcache entry for OID %u", \
230  (RELATION)->rd_id); \
231 } while(0)
232 
233 
234 /*
235  * Special cache for opclass-related information
236  *
237  * Note: only default support procs get cached, ie, those with
238  * lefttype = righttype = opcintype.
239  */
240 typedef struct opclasscacheent
241 {
242  Oid opclassoid; /* lookup key: OID of opclass */
243  bool valid; /* set true after successful fill-in */
244  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
245  Oid opcfamily; /* OID of opclass's family */
246  Oid opcintype; /* OID of opclass's declared input type */
247  RegProcedure *supportProcs; /* OIDs of support procedures */
249 
250 static HTAB *OpClassCache = NULL;
251 
252 
253 /* non-export function prototypes */
254 
255 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
256 static void RelationClearRelation(Relation relation, bool rebuild);
257 
258 static void RelationReloadIndexInfo(Relation relation);
259 static void RelationReloadNailed(Relation relation);
260 static void RelationFlushRelation(Relation relation);
262 #ifdef USE_ASSERT_CHECKING
263 static void AssertPendingSyncConsistency(Relation relation);
264 #endif
265 static void AtEOXact_cleanup(Relation relation, bool isCommit);
266 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
267  SubTransactionId mySubid, SubTransactionId parentSubid);
268 static bool load_relcache_init_file(bool shared);
269 static void write_relcache_init_file(bool shared);
270 static void write_item(const void *data, Size len, FILE *fp);
271 
272 static void formrdesc(const char *relationName, Oid relationReltype,
273  bool isshared, int natts, const FormData_pg_attribute *attrs);
274 
275 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
277 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
278 static void RelationBuildTupleDesc(Relation relation);
279 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
280 static void RelationInitPhysicalAddr(Relation relation);
281 static void load_critical_index(Oid indexoid, Oid heapoid);
282 static TupleDesc GetPgClassDescriptor(void);
283 static TupleDesc GetPgIndexDescriptor(void);
284 static void AttrDefaultFetch(Relation relation, int ndef);
285 static int AttrDefaultCmp(const void *a, const void *b);
286 static void CheckConstraintFetch(Relation relation);
287 static int CheckConstraintCmp(const void *a, const void *b);
288 static void InitIndexAmRoutine(Relation relation);
289 static void IndexSupportInitialize(oidvector *indclass,
290  RegProcedure *indexSupport,
291  Oid *opFamily,
292  Oid *opcInType,
293  StrategyNumber maxSupportNumber,
294  AttrNumber maxAttributeNumber);
295 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
296  StrategyNumber numSupport);
297 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
298 static void unlink_initfile(const char *initfilename, int elevel);
299 
300 
301 /*
302  * ScanPgRelation
303  *
304  * This is used by RelationBuildDesc to find a pg_class
305  * tuple matching targetRelId. The caller must hold at least
306  * AccessShareLock on the target relid to prevent concurrent-update
307  * scenarios; it isn't guaranteed that all scans used to build the
308  * relcache entry will use the same snapshot. If, for example,
309  * an attribute were to be added after scanning pg_class and before
310  * scanning pg_attribute, relnatts wouldn't match.
311  *
312  * NB: the returned tuple has been copied into palloc'd storage
313  * and must eventually be freed with heap_freetuple.
314  */
315 static HeapTuple
316 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
317 {
318  HeapTuple pg_class_tuple;
319  Relation pg_class_desc;
320  SysScanDesc pg_class_scan;
321  ScanKeyData key[1];
322  Snapshot snapshot = NULL;
323 
324  /*
325  * If something goes wrong during backend startup, we might find ourselves
326  * trying to read pg_class before we've selected a database. That ain't
327  * gonna work, so bail out with a useful error message. If this happens,
328  * it probably means a relcache entry that needs to be nailed isn't.
329  */
330  if (!OidIsValid(MyDatabaseId))
331  elog(FATAL, "cannot read pg_class without having selected a database");
332 
333  /*
334  * form a scan key
335  */
336  ScanKeyInit(&key[0],
337  Anum_pg_class_oid,
338  BTEqualStrategyNumber, F_OIDEQ,
339  ObjectIdGetDatum(targetRelId));
340 
341  /*
342  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
343  * built the critical relcache entries (this includes initdb and startup
344  * without a pg_internal.init file). The caller can also force a heap
345  * scan by setting indexOK == false.
346  */
347  pg_class_desc = table_open(RelationRelationId, AccessShareLock);
348 
349  /*
350  * The caller might need a tuple that's newer than the one the historic
351  * snapshot; currently the only case requiring to do so is looking up the
352  * relfilenode of non mapped system relations during decoding. That
353  * snapshot can't change in the midst of a relcache build, so there's no
354  * need to register the snapshot.
355  */
356  if (force_non_historic)
357  snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
358 
359  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
360  indexOK && criticalRelcachesBuilt,
361  snapshot,
362  1, key);
363 
364  pg_class_tuple = systable_getnext(pg_class_scan);
365 
366  /*
367  * Must copy tuple before releasing buffer.
368  */
369  if (HeapTupleIsValid(pg_class_tuple))
370  pg_class_tuple = heap_copytuple(pg_class_tuple);
371 
372  /* all done */
373  systable_endscan(pg_class_scan);
374  table_close(pg_class_desc, AccessShareLock);
375 
376  return pg_class_tuple;
377 }
378 
379 /*
380  * AllocateRelationDesc
381  *
382  * This is used to allocate memory for a new relation descriptor
383  * and initialize the rd_rel field from the given pg_class tuple.
384  */
385 static Relation
387 {
388  Relation relation;
389  MemoryContext oldcxt;
390  Form_pg_class relationForm;
391 
392  /* Relcache entries must live in CacheMemoryContext */
394 
395  /*
396  * allocate and zero space for new relation descriptor
397  */
398  relation = (Relation) palloc0(sizeof(RelationData));
399 
400  /* make sure relation is marked as having no open file yet */
401  relation->rd_smgr = NULL;
402 
403  /*
404  * Copy the relation tuple form
405  *
406  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
407  * variable-length fields (relacl, reloptions) are NOT stored in the
408  * relcache --- there'd be little point in it, since we don't copy the
409  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
410  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
411  * it from the syscache if you need it. The same goes for the original
412  * form of reloptions (however, we do store the parsed form of reloptions
413  * in rd_options).
414  */
415  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
416 
417  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
418 
419  /* initialize relation tuple form */
420  relation->rd_rel = relationForm;
421 
422  /* and allocate attribute tuple form storage */
423  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
424  /* which we mark as a reference-counted tupdesc */
425  relation->rd_att->tdrefcount = 1;
426 
427  MemoryContextSwitchTo(oldcxt);
428 
429  return relation;
430 }
431 
432 /*
433  * RelationParseRelOptions
434  * Convert pg_class.reloptions into pre-parsed rd_options
435  *
436  * tuple is the real pg_class tuple (not rd_rel!) for relation
437  *
438  * Note: rd_rel and (if an index) rd_indam must be valid already
439  */
440 static void
442 {
443  bytea *options;
444  amoptions_function amoptsfn;
445 
446  relation->rd_options = NULL;
447 
448  /*
449  * Look up any AM-specific parse function; fall out if relkind should not
450  * have options.
451  */
452  switch (relation->rd_rel->relkind)
453  {
454  case RELKIND_RELATION:
455  case RELKIND_TOASTVALUE:
456  case RELKIND_VIEW:
457  case RELKIND_MATVIEW:
458  case RELKIND_PARTITIONED_TABLE:
459  amoptsfn = NULL;
460  break;
461  case RELKIND_INDEX:
462  case RELKIND_PARTITIONED_INDEX:
463  amoptsfn = relation->rd_indam->amoptions;
464  break;
465  default:
466  return;
467  }
468 
469  /*
470  * Fetch reloptions from tuple; have to use a hardwired descriptor because
471  * we might not have any other for pg_class yet (consider executing this
472  * code for pg_class itself)
473  */
474  options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
475 
476  /*
477  * Copy parsed data into CacheMemoryContext. To guard against the
478  * possibility of leaks in the reloptions code, we want to do the actual
479  * parsing in the caller's memory context and copy the results into
480  * CacheMemoryContext after the fact.
481  */
482  if (options)
483  {
485  VARSIZE(options));
486  memcpy(relation->rd_options, options, VARSIZE(options));
487  pfree(options);
488  }
489 }
490 
491 /*
492  * RelationBuildTupleDesc
493  *
494  * Form the relation's tuple descriptor from information in
495  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
496  */
497 static void
499 {
500  HeapTuple pg_attribute_tuple;
501  Relation pg_attribute_desc;
502  SysScanDesc pg_attribute_scan;
503  ScanKeyData skey[2];
504  int need;
505  TupleConstr *constr;
506  AttrMissing *attrmiss = NULL;
507  int ndef = 0;
508 
509  /* fill rd_att's type ID fields (compare heap.c's AddNewRelationTuple) */
510  relation->rd_att->tdtypeid =
511  relation->rd_rel->reltype ? relation->rd_rel->reltype : RECORDOID;
512  relation->rd_att->tdtypmod = -1; /* just to be sure */
513 
515  sizeof(TupleConstr));
516  constr->has_not_null = false;
517  constr->has_generated_stored = false;
518 
519  /*
520  * Form a scan key that selects only user attributes (attnum > 0).
521  * (Eliminating system attribute rows at the index level is lots faster
522  * than fetching them.)
523  */
524  ScanKeyInit(&skey[0],
525  Anum_pg_attribute_attrelid,
526  BTEqualStrategyNumber, F_OIDEQ,
528  ScanKeyInit(&skey[1],
529  Anum_pg_attribute_attnum,
530  BTGreaterStrategyNumber, F_INT2GT,
531  Int16GetDatum(0));
532 
533  /*
534  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
535  * built the critical relcache entries (this includes initdb and startup
536  * without a pg_internal.init file).
537  */
538  pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock);
539  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
540  AttributeRelidNumIndexId,
542  NULL,
543  2, skey);
544 
545  /*
546  * add attribute data to relation->rd_att
547  */
548  need = RelationGetNumberOfAttributes(relation);
549 
550  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
551  {
552  Form_pg_attribute attp;
553  int attnum;
554 
555  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
556 
557  attnum = attp->attnum;
558  if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation))
559  elog(ERROR, "invalid attribute number %d for relation \"%s\"",
560  attp->attnum, RelationGetRelationName(relation));
561 
562  memcpy(TupleDescAttr(relation->rd_att, attnum - 1),
563  attp,
565 
566  /* Update constraint/default info */
567  if (attp->attnotnull)
568  constr->has_not_null = true;
569  if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED)
570  constr->has_generated_stored = true;
571  if (attp->atthasdef)
572  ndef++;
573 
574  /* If the column has a "missing" value, put it in the attrmiss array */
575  if (attp->atthasmissing)
576  {
577  Datum missingval;
578  bool missingNull;
579 
580  /* Do we have a missing value? */
581  missingval = heap_getattr(pg_attribute_tuple,
582  Anum_pg_attribute_attmissingval,
583  pg_attribute_desc->rd_att,
584  &missingNull);
585  if (!missingNull)
586  {
587  /* Yes, fetch from the array */
588  MemoryContext oldcxt;
589  bool is_null;
590  int one = 1;
591  Datum missval;
592 
593  if (attrmiss == NULL)
594  attrmiss = (AttrMissing *)
596  relation->rd_rel->relnatts *
597  sizeof(AttrMissing));
598 
599  missval = array_get_element(missingval,
600  1,
601  &one,
602  -1,
603  attp->attlen,
604  attp->attbyval,
605  attp->attalign,
606  &is_null);
607  Assert(!is_null);
608  if (attp->attbyval)
609  {
610  /* for copy by val just copy the datum direct */
611  attrmiss[attnum - 1].am_value = missval;
612  }
613  else
614  {
615  /* otherwise copy in the correct context */
617  attrmiss[attnum - 1].am_value = datumCopy(missval,
618  attp->attbyval,
619  attp->attlen);
620  MemoryContextSwitchTo(oldcxt);
621  }
622  attrmiss[attnum - 1].am_present = true;
623  }
624  }
625  need--;
626  if (need == 0)
627  break;
628  }
629 
630  /*
631  * end the scan and close the attribute relation
632  */
633  systable_endscan(pg_attribute_scan);
634  table_close(pg_attribute_desc, AccessShareLock);
635 
636  if (need != 0)
637  elog(ERROR, "pg_attribute catalog is missing %d attribute(s) for relation OID %u",
638  need, RelationGetRelid(relation));
639 
640  /*
641  * The attcacheoff values we read from pg_attribute should all be -1
642  * ("unknown"). Verify this if assert checking is on. They will be
643  * computed when and if needed during tuple access.
644  */
645 #ifdef USE_ASSERT_CHECKING
646  {
647  int i;
648 
649  for (i = 0; i < RelationGetNumberOfAttributes(relation); i++)
650  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
651  }
652 #endif
653 
654  /*
655  * However, we can easily set the attcacheoff value for the first
656  * attribute: it must be zero. This eliminates the need for special cases
657  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
658  */
659  if (RelationGetNumberOfAttributes(relation) > 0)
660  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
661 
662  /*
663  * Set up constraint/default info
664  */
665  if (constr->has_not_null ||
666  constr->has_generated_stored ||
667  ndef > 0 ||
668  attrmiss ||
669  relation->rd_rel->relchecks > 0)
670  {
671  relation->rd_att->constr = constr;
672 
673  if (ndef > 0) /* DEFAULTs */
674  AttrDefaultFetch(relation, ndef);
675  else
676  constr->num_defval = 0;
677 
678  constr->missing = attrmiss;
679 
680  if (relation->rd_rel->relchecks > 0) /* CHECKs */
681  CheckConstraintFetch(relation);
682  else
683  constr->num_check = 0;
684  }
685  else
686  {
687  pfree(constr);
688  relation->rd_att->constr = NULL;
689  }
690 }
691 
692 /*
693  * RelationBuildRuleLock
694  *
695  * Form the relation's rewrite rules from information in
696  * the pg_rewrite system catalog.
697  *
698  * Note: The rule parsetrees are potentially very complex node structures.
699  * To allow these trees to be freed when the relcache entry is flushed,
700  * we make a private memory context to hold the RuleLock information for
701  * each relcache entry that has associated rules. The context is used
702  * just for rule info, not for any other subsidiary data of the relcache
703  * entry, because that keeps the update logic in RelationClearRelation()
704  * manageable. The other subsidiary data structures are simple enough
705  * to be easy to free explicitly, anyway.
706  */
707 static void
709 {
710  MemoryContext rulescxt;
711  MemoryContext oldcxt;
712  HeapTuple rewrite_tuple;
713  Relation rewrite_desc;
714  TupleDesc rewrite_tupdesc;
715  SysScanDesc rewrite_scan;
717  RuleLock *rulelock;
718  int numlocks;
719  RewriteRule **rules;
720  int maxlocks;
721 
722  /*
723  * Make the private context. Assume it'll not contain much data.
724  */
726  "relation rules",
728  relation->rd_rulescxt = rulescxt;
730  RelationGetRelationName(relation));
731 
732  /*
733  * allocate an array to hold the rewrite rules (the array is extended if
734  * necessary)
735  */
736  maxlocks = 4;
737  rules = (RewriteRule **)
738  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
739  numlocks = 0;
740 
741  /*
742  * form a scan key
743  */
744  ScanKeyInit(&key,
745  Anum_pg_rewrite_ev_class,
746  BTEqualStrategyNumber, F_OIDEQ,
748 
749  /*
750  * open pg_rewrite and begin a scan
751  *
752  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
753  * be reading the rules in name order, except possibly during
754  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
755  * ensures that rules will be fired in name order.
756  */
757  rewrite_desc = table_open(RewriteRelationId, AccessShareLock);
758  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
759  rewrite_scan = systable_beginscan(rewrite_desc,
760  RewriteRelRulenameIndexId,
761  true, NULL,
762  1, &key);
763 
764  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
765  {
766  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
767  bool isnull;
768  Datum rule_datum;
769  char *rule_str;
770  RewriteRule *rule;
771 
772  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
773  sizeof(RewriteRule));
774 
775  rule->ruleId = rewrite_form->oid;
776 
777  rule->event = rewrite_form->ev_type - '0';
778  rule->enabled = rewrite_form->ev_enabled;
779  rule->isInstead = rewrite_form->is_instead;
780 
781  /*
782  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
783  * rule strings are often large enough to be toasted. To avoid
784  * leaking memory in the caller's context, do the detoasting here so
785  * we can free the detoasted version.
786  */
787  rule_datum = heap_getattr(rewrite_tuple,
788  Anum_pg_rewrite_ev_action,
789  rewrite_tupdesc,
790  &isnull);
791  Assert(!isnull);
792  rule_str = TextDatumGetCString(rule_datum);
793  oldcxt = MemoryContextSwitchTo(rulescxt);
794  rule->actions = (List *) stringToNode(rule_str);
795  MemoryContextSwitchTo(oldcxt);
796  pfree(rule_str);
797 
798  rule_datum = heap_getattr(rewrite_tuple,
799  Anum_pg_rewrite_ev_qual,
800  rewrite_tupdesc,
801  &isnull);
802  Assert(!isnull);
803  rule_str = TextDatumGetCString(rule_datum);
804  oldcxt = MemoryContextSwitchTo(rulescxt);
805  rule->qual = (Node *) stringToNode(rule_str);
806  MemoryContextSwitchTo(oldcxt);
807  pfree(rule_str);
808 
809  /*
810  * We want the rule's table references to be checked as though by the
811  * table owner, not the user referencing the rule. Therefore, scan
812  * through the rule's actions and set the checkAsUser field on all
813  * rtable entries. We have to look at the qual as well, in case it
814  * contains sublinks.
815  *
816  * The reason for doing this when the rule is loaded, rather than when
817  * it is stored, is that otherwise ALTER TABLE OWNER would have to
818  * grovel through stored rules to update checkAsUser fields. Scanning
819  * the rule tree during load is relatively cheap (compared to
820  * constructing it in the first place), so we do it here.
821  */
822  setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
823  setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
824 
825  if (numlocks >= maxlocks)
826  {
827  maxlocks *= 2;
828  rules = (RewriteRule **)
829  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
830  }
831  rules[numlocks++] = rule;
832  }
833 
834  /*
835  * end the scan and close the attribute relation
836  */
837  systable_endscan(rewrite_scan);
838  table_close(rewrite_desc, AccessShareLock);
839 
840  /*
841  * there might not be any rules (if relhasrules is out-of-date)
842  */
843  if (numlocks == 0)
844  {
845  relation->rd_rules = NULL;
846  relation->rd_rulescxt = NULL;
847  MemoryContextDelete(rulescxt);
848  return;
849  }
850 
851  /*
852  * form a RuleLock and insert into relation
853  */
854  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
855  rulelock->numLocks = numlocks;
856  rulelock->rules = rules;
857 
858  relation->rd_rules = rulelock;
859 }
860 
861 /*
862  * equalRuleLocks
863  *
864  * Determine whether two RuleLocks are equivalent
865  *
866  * Probably this should be in the rules code someplace...
867  */
868 static bool
870 {
871  int i;
872 
873  /*
874  * As of 7.3 we assume the rule ordering is repeatable, because
875  * RelationBuildRuleLock should read 'em in a consistent order. So just
876  * compare corresponding slots.
877  */
878  if (rlock1 != NULL)
879  {
880  if (rlock2 == NULL)
881  return false;
882  if (rlock1->numLocks != rlock2->numLocks)
883  return false;
884  for (i = 0; i < rlock1->numLocks; i++)
885  {
886  RewriteRule *rule1 = rlock1->rules[i];
887  RewriteRule *rule2 = rlock2->rules[i];
888 
889  if (rule1->ruleId != rule2->ruleId)
890  return false;
891  if (rule1->event != rule2->event)
892  return false;
893  if (rule1->enabled != rule2->enabled)
894  return false;
895  if (rule1->isInstead != rule2->isInstead)
896  return false;
897  if (!equal(rule1->qual, rule2->qual))
898  return false;
899  if (!equal(rule1->actions, rule2->actions))
900  return false;
901  }
902  }
903  else if (rlock2 != NULL)
904  return false;
905  return true;
906 }
907 
908 /*
909  * equalPolicy
910  *
911  * Determine whether two policies are equivalent
912  */
913 static bool
915 {
916  int i;
917  Oid *r1,
918  *r2;
919 
920  if (policy1 != NULL)
921  {
922  if (policy2 == NULL)
923  return false;
924 
925  if (policy1->polcmd != policy2->polcmd)
926  return false;
927  if (policy1->hassublinks != policy2->hassublinks)
928  return false;
929  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
930  return false;
931  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
932  return false;
933 
934  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
935  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
936 
937  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
938  {
939  if (r1[i] != r2[i])
940  return false;
941  }
942 
943  if (!equal(policy1->qual, policy2->qual))
944  return false;
945  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
946  return false;
947  }
948  else if (policy2 != NULL)
949  return false;
950 
951  return true;
952 }
953 
954 /*
955  * equalRSDesc
956  *
957  * Determine whether two RowSecurityDesc's are equivalent
958  */
959 static bool
961 {
962  ListCell *lc,
963  *rc;
964 
965  if (rsdesc1 == NULL && rsdesc2 == NULL)
966  return true;
967 
968  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
969  (rsdesc1 == NULL && rsdesc2 != NULL))
970  return false;
971 
972  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
973  return false;
974 
975  /* RelationBuildRowSecurity should build policies in order */
976  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
977  {
980 
981  if (!equalPolicy(l, r))
982  return false;
983  }
984 
985  return true;
986 }
987 
988 /*
989  * RelationBuildDesc
990  *
991  * Build a relation descriptor. The caller must hold at least
992  * AccessShareLock on the target relid.
993  *
994  * The new descriptor is inserted into the hash table if insertIt is true.
995  *
996  * Returns NULL if no pg_class row could be found for the given relid
997  * (suggesting we are trying to access a just-deleted relation).
998  * Any other error is reported via elog.
999  */
1000 static Relation
1001 RelationBuildDesc(Oid targetRelId, bool insertIt)
1002 {
1003  Relation relation;
1004  Oid relid;
1005  HeapTuple pg_class_tuple;
1006  Form_pg_class relp;
1007 
1008  /*
1009  * This function and its subroutines can allocate a good deal of transient
1010  * data in CurrentMemoryContext. Traditionally we've just leaked that
1011  * data, reasoning that the caller's context is at worst of transaction
1012  * scope, and relcache loads shouldn't happen so often that it's essential
1013  * to recover transient data before end of statement/transaction. However
1014  * that's definitely not true when debug_discard_caches is active, and
1015  * perhaps it's not true in other cases.
1016  *
1017  * When debug_discard_caches is active or when forced to by
1018  * RECOVER_RELATION_BUILD_MEMORY=1, arrange to allocate the junk in a
1019  * temporary context that we'll free before returning. Make it a child of
1020  * caller's context so that it will get cleaned up appropriately if we
1021  * error out partway through.
1022  */
1023 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1024  MemoryContext tmpcxt = NULL;
1025  MemoryContext oldcxt = NULL;
1026 
1028  {
1030  "RelationBuildDesc workspace",
1032  oldcxt = MemoryContextSwitchTo(tmpcxt);
1033  }
1034 #endif
1035 
1036  /*
1037  * find the tuple in pg_class corresponding to the given relation id
1038  */
1039  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1040 
1041  /*
1042  * if no such tuple exists, return NULL
1043  */
1044  if (!HeapTupleIsValid(pg_class_tuple))
1045  {
1046 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1047  if (tmpcxt)
1048  {
1049  /* Return to caller's context, and blow away the temporary context */
1050  MemoryContextSwitchTo(oldcxt);
1051  MemoryContextDelete(tmpcxt);
1052  }
1053 #endif
1054  return NULL;
1055  }
1056 
1057  /*
1058  * get information from the pg_class_tuple
1059  */
1060  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1061  relid = relp->oid;
1062  Assert(relid == targetRelId);
1063 
1064  /*
1065  * allocate storage for the relation descriptor, and copy pg_class_tuple
1066  * to relation->rd_rel.
1067  */
1068  relation = AllocateRelationDesc(relp);
1069 
1070  /*
1071  * initialize the relation's relation id (relation->rd_id)
1072  */
1073  RelationGetRelid(relation) = relid;
1074 
1075  /*
1076  * Normal relations are not nailed into the cache. Since we don't flush
1077  * new relations, it won't be new. It could be temp though.
1078  */
1079  relation->rd_refcnt = 0;
1080  relation->rd_isnailed = false;
1085  switch (relation->rd_rel->relpersistence)
1086  {
1087  case RELPERSISTENCE_UNLOGGED:
1088  case RELPERSISTENCE_PERMANENT:
1089  relation->rd_backend = InvalidBackendId;
1090  relation->rd_islocaltemp = false;
1091  break;
1092  case RELPERSISTENCE_TEMP:
1093  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1094  {
1095  relation->rd_backend = BackendIdForTempRelations();
1096  relation->rd_islocaltemp = true;
1097  }
1098  else
1099  {
1100  /*
1101  * If it's a temp table, but not one of ours, we have to use
1102  * the slow, grotty method to figure out the owning backend.
1103  *
1104  * Note: it's possible that rd_backend gets set to MyBackendId
1105  * here, in case we are looking at a pg_class entry left over
1106  * from a crashed backend that coincidentally had the same
1107  * BackendId we're using. We should *not* consider such a
1108  * table to be "ours"; this is why we need the separate
1109  * rd_islocaltemp flag. The pg_class entry will get flushed
1110  * if/when we clean out the corresponding temp table namespace
1111  * in preparation for using it.
1112  */
1113  relation->rd_backend =
1114  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1115  Assert(relation->rd_backend != InvalidBackendId);
1116  relation->rd_islocaltemp = false;
1117  }
1118  break;
1119  default:
1120  elog(ERROR, "invalid relpersistence: %c",
1121  relation->rd_rel->relpersistence);
1122  break;
1123  }
1124 
1125  /*
1126  * initialize the tuple descriptor (relation->rd_att).
1127  */
1128  RelationBuildTupleDesc(relation);
1129 
1130  /*
1131  * Fetch rules and triggers that affect this relation
1132  */
1133  if (relation->rd_rel->relhasrules)
1134  RelationBuildRuleLock(relation);
1135  else
1136  {
1137  relation->rd_rules = NULL;
1138  relation->rd_rulescxt = NULL;
1139  }
1140 
1141  if (relation->rd_rel->relhastriggers)
1142  RelationBuildTriggers(relation);
1143  else
1144  relation->trigdesc = NULL;
1145 
1146  if (relation->rd_rel->relrowsecurity)
1147  RelationBuildRowSecurity(relation);
1148  else
1149  relation->rd_rsdesc = NULL;
1150 
1151  /* foreign key data is not loaded till asked for */
1152  relation->rd_fkeylist = NIL;
1153  relation->rd_fkeyvalid = false;
1154 
1155  /* partitioning data is not loaded till asked for */
1156  relation->rd_partkey = NULL;
1157  relation->rd_partkeycxt = NULL;
1158  relation->rd_partdesc = NULL;
1159  relation->rd_partdesc_nodetached = NULL;
1161  relation->rd_pdcxt = NULL;
1162  relation->rd_pddcxt = NULL;
1163  relation->rd_partcheck = NIL;
1164  relation->rd_partcheckvalid = false;
1165  relation->rd_partcheckcxt = NULL;
1166 
1167  /*
1168  * initialize access method information
1169  */
1170  switch (relation->rd_rel->relkind)
1171  {
1172  case RELKIND_INDEX:
1173  case RELKIND_PARTITIONED_INDEX:
1174  Assert(relation->rd_rel->relam != InvalidOid);
1175  RelationInitIndexAccessInfo(relation);
1176  break;
1177  case RELKIND_RELATION:
1178  case RELKIND_TOASTVALUE:
1179  case RELKIND_MATVIEW:
1180  Assert(relation->rd_rel->relam != InvalidOid);
1182  break;
1183  case RELKIND_SEQUENCE:
1184  Assert(relation->rd_rel->relam == InvalidOid);
1186  break;
1187  case RELKIND_VIEW:
1188  case RELKIND_COMPOSITE_TYPE:
1189  case RELKIND_FOREIGN_TABLE:
1190  case RELKIND_PARTITIONED_TABLE:
1191  Assert(relation->rd_rel->relam == InvalidOid);
1192  break;
1193  }
1194 
1195  /* extract reloptions if any */
1196  RelationParseRelOptions(relation, pg_class_tuple);
1197 
1198  /*
1199  * initialize the relation lock manager information
1200  */
1201  RelationInitLockInfo(relation); /* see lmgr.c */
1202 
1203  /*
1204  * initialize physical addressing information for the relation
1205  */
1206  RelationInitPhysicalAddr(relation);
1207 
1208  /* make sure relation is marked as having no open file yet */
1209  relation->rd_smgr = NULL;
1210 
1211  /*
1212  * now we can free the memory allocated for pg_class_tuple
1213  */
1214  heap_freetuple(pg_class_tuple);
1215 
1216  /*
1217  * Insert newly created relation into relcache hash table, if requested.
1218  *
1219  * There is one scenario in which we might find a hashtable entry already
1220  * present, even though our caller failed to find it: if the relation is a
1221  * system catalog or index that's used during relcache load, we might have
1222  * recursively created the same relcache entry during the preceding steps.
1223  * So allow RelationCacheInsert to delete any already-present relcache
1224  * entry for the same OID. The already-present entry should have refcount
1225  * zero (else somebody forgot to close it); in the event that it doesn't,
1226  * we'll elog a WARNING and leak the already-present entry.
1227  */
1228  if (insertIt)
1229  RelationCacheInsert(relation, true);
1230 
1231  /* It's fully valid */
1232  relation->rd_isvalid = true;
1233 
1234 #ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
1235  if (tmpcxt)
1236  {
1237  /* Return to caller's context, and blow away the temporary context */
1238  MemoryContextSwitchTo(oldcxt);
1239  MemoryContextDelete(tmpcxt);
1240  }
1241 #endif
1242 
1243  return relation;
1244 }
1245 
1246 /*
1247  * Initialize the physical addressing info (RelFileNode) for a relcache entry
1248  *
1249  * Note: at the physical level, relations in the pg_global tablespace must
1250  * be treated as shared, even if relisshared isn't set. Hence we do not
1251  * look at relisshared here.
1252  */
1253 static void
1255 {
1256  Oid oldnode = relation->rd_node.relNode;
1257 
1258  /* these relations kinds never have storage */
1259  if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
1260  return;
1261 
1262  if (relation->rd_rel->reltablespace)
1263  relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1264  else
1265  relation->rd_node.spcNode = MyDatabaseTableSpace;
1266  if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1267  relation->rd_node.dbNode = InvalidOid;
1268  else
1269  relation->rd_node.dbNode = MyDatabaseId;
1270 
1271  if (relation->rd_rel->relfilenode)
1272  {
1273  /*
1274  * Even if we are using a decoding snapshot that doesn't represent the
1275  * current state of the catalog we need to make sure the filenode
1276  * points to the current file since the older file will be gone (or
1277  * truncated). The new file will still contain older rows so lookups
1278  * in them will work correctly. This wouldn't work correctly if
1279  * rewrites were allowed to change the schema in an incompatible way,
1280  * but those are prevented both on catalog tables and on user tables
1281  * declared as additional catalog tables.
1282  */
1285  && IsTransactionState())
1286  {
1287  HeapTuple phys_tuple;
1288  Form_pg_class physrel;
1289 
1290  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1291  RelationGetRelid(relation) != ClassOidIndexId,
1292  true);
1293  if (!HeapTupleIsValid(phys_tuple))
1294  elog(ERROR, "could not find pg_class entry for %u",
1295  RelationGetRelid(relation));
1296  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1297 
1298  relation->rd_rel->reltablespace = physrel->reltablespace;
1299  relation->rd_rel->relfilenode = physrel->relfilenode;
1300  heap_freetuple(phys_tuple);
1301  }
1302 
1303  relation->rd_node.relNode = relation->rd_rel->relfilenode;
1304  }
1305  else
1306  {
1307  /* Consult the relation mapper */
1308  relation->rd_node.relNode =
1309  RelationMapOidToFilenode(relation->rd_id,
1310  relation->rd_rel->relisshared);
1311  if (!OidIsValid(relation->rd_node.relNode))
1312  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1313  RelationGetRelationName(relation), relation->rd_id);
1314  }
1315 
1316  /*
1317  * For RelationNeedsWAL() to answer correctly on parallel workers, restore
1318  * rd_firstRelfilenodeSubid. No subtransactions start or end while in
1319  * parallel mode, so the specific SubTransactionId does not matter.
1320  */
1321  if (IsParallelWorker() && oldnode != relation->rd_node.relNode)
1322  {
1323  if (RelFileNodeSkippingWAL(relation->rd_node))
1325  else
1327  }
1328 }
1329 
1330 /*
1331  * Fill in the IndexAmRoutine for an index relation.
1332  *
1333  * relation's rd_amhandler and rd_indexcxt must be valid already.
1334  */
1335 static void
1337 {
1338  IndexAmRoutine *cached,
1339  *tmp;
1340 
1341  /*
1342  * Call the amhandler in current, short-lived memory context, just in case
1343  * it leaks anything (it probably won't, but let's be paranoid).
1344  */
1345  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1346 
1347  /* OK, now transfer the data into relation's rd_indexcxt. */
1348  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1349  sizeof(IndexAmRoutine));
1350  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1351  relation->rd_indam = cached;
1352 
1353  pfree(tmp);
1354 }
1355 
1356 /*
1357  * Initialize index-access-method support data for an index relation
1358  */
1359 void
1361 {
1362  HeapTuple tuple;
1363  Form_pg_am aform;
1364  Datum indcollDatum;
1365  Datum indclassDatum;
1366  Datum indoptionDatum;
1367  bool isnull;
1368  oidvector *indcoll;
1369  oidvector *indclass;
1370  int2vector *indoption;
1371  MemoryContext indexcxt;
1372  MemoryContext oldcontext;
1373  int indnatts;
1374  int indnkeyatts;
1375  uint16 amsupport;
1376 
1377  /*
1378  * Make a copy of the pg_index entry for the index. Since pg_index
1379  * contains variable-length and possibly-null fields, we have to do this
1380  * honestly rather than just treating it as a Form_pg_index struct.
1381  */
1382  tuple = SearchSysCache1(INDEXRELID,
1383  ObjectIdGetDatum(RelationGetRelid(relation)));
1384  if (!HeapTupleIsValid(tuple))
1385  elog(ERROR, "cache lookup failed for index %u",
1386  RelationGetRelid(relation));
1388  relation->rd_indextuple = heap_copytuple(tuple);
1389  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1390  MemoryContextSwitchTo(oldcontext);
1391  ReleaseSysCache(tuple);
1392 
1393  /*
1394  * Look up the index's access method, save the OID of its handler function
1395  */
1396  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1397  if (!HeapTupleIsValid(tuple))
1398  elog(ERROR, "cache lookup failed for access method %u",
1399  relation->rd_rel->relam);
1400  aform = (Form_pg_am) GETSTRUCT(tuple);
1401  relation->rd_amhandler = aform->amhandler;
1402  ReleaseSysCache(tuple);
1403 
1404  indnatts = RelationGetNumberOfAttributes(relation);
1405  if (indnatts != IndexRelationGetNumberOfAttributes(relation))
1406  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1407  RelationGetRelid(relation));
1408  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation);
1409 
1410  /*
1411  * Make the private context to hold index access info. The reason we need
1412  * a context, and not just a couple of pallocs, is so that we won't leak
1413  * any subsidiary info attached to fmgr lookup records.
1414  */
1416  "index info",
1418  relation->rd_indexcxt = indexcxt;
1420  RelationGetRelationName(relation));
1421 
1422  /*
1423  * Now we can fetch the index AM's API struct
1424  */
1425  InitIndexAmRoutine(relation);
1426 
1427  /*
1428  * Allocate arrays to hold data. Opclasses are not used for included
1429  * columns, so allocate them for indnkeyatts only.
1430  */
1431  relation->rd_opfamily = (Oid *)
1432  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1433  relation->rd_opcintype = (Oid *)
1434  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1435 
1436  amsupport = relation->rd_indam->amsupport;
1437  if (amsupport > 0)
1438  {
1439  int nsupport = indnatts * amsupport;
1440 
1441  relation->rd_support = (RegProcedure *)
1442  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1443  relation->rd_supportinfo = (FmgrInfo *)
1444  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1445  }
1446  else
1447  {
1448  relation->rd_support = NULL;
1449  relation->rd_supportinfo = NULL;
1450  }
1451 
1452  relation->rd_indcollation = (Oid *)
1453  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1454 
1455  relation->rd_indoption = (int16 *)
1456  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16));
1457 
1458  /*
1459  * indcollation cannot be referenced directly through the C struct,
1460  * because it comes after the variable-width indkey field. Must extract
1461  * the datum the hard way...
1462  */
1463  indcollDatum = fastgetattr(relation->rd_indextuple,
1464  Anum_pg_index_indcollation,
1466  &isnull);
1467  Assert(!isnull);
1468  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1469  memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid));
1470 
1471  /*
1472  * indclass cannot be referenced directly through the C struct, because it
1473  * comes after the variable-width indkey field. Must extract the datum
1474  * the hard way...
1475  */
1476  indclassDatum = fastgetattr(relation->rd_indextuple,
1477  Anum_pg_index_indclass,
1479  &isnull);
1480  Assert(!isnull);
1481  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1482 
1483  /*
1484  * Fill the support procedure OID array, as well as the info about
1485  * opfamilies and opclass input types. (aminfo and supportinfo are left
1486  * as zeroes, and are filled on-the-fly when used)
1487  */
1488  IndexSupportInitialize(indclass, relation->rd_support,
1489  relation->rd_opfamily, relation->rd_opcintype,
1490  amsupport, indnkeyatts);
1491 
1492  /*
1493  * Similarly extract indoption and copy it to the cache entry
1494  */
1495  indoptionDatum = fastgetattr(relation->rd_indextuple,
1496  Anum_pg_index_indoption,
1498  &isnull);
1499  Assert(!isnull);
1500  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1501  memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16));
1502 
1503  (void) RelationGetIndexAttOptions(relation, false);
1504 
1505  /*
1506  * expressions, predicate, exclusion caches will be filled later
1507  */
1508  relation->rd_indexprs = NIL;
1509  relation->rd_indpred = NIL;
1510  relation->rd_exclops = NULL;
1511  relation->rd_exclprocs = NULL;
1512  relation->rd_exclstrats = NULL;
1513  relation->rd_amcache = NULL;
1514 }
1515 
1516 /*
1517  * IndexSupportInitialize
1518  * Initializes an index's cached opclass information,
1519  * given the index's pg_index.indclass entry.
1520  *
1521  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1522  * which are arrays allocated by the caller.
1523  *
1524  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1525  * indicate the size of the arrays it has allocated --- but in practice these
1526  * numbers must always match those obtainable from the system catalog entries
1527  * for the index and access method.
1528  */
1529 static void
1531  RegProcedure *indexSupport,
1532  Oid *opFamily,
1533  Oid *opcInType,
1534  StrategyNumber maxSupportNumber,
1535  AttrNumber maxAttributeNumber)
1536 {
1537  int attIndex;
1538 
1539  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1540  {
1541  OpClassCacheEnt *opcentry;
1542 
1543  if (!OidIsValid(indclass->values[attIndex]))
1544  elog(ERROR, "bogus pg_index tuple");
1545 
1546  /* look up the info for this opclass, using a cache */
1547  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1548  maxSupportNumber);
1549 
1550  /* copy cached data into relcache entry */
1551  opFamily[attIndex] = opcentry->opcfamily;
1552  opcInType[attIndex] = opcentry->opcintype;
1553  if (maxSupportNumber > 0)
1554  memcpy(&indexSupport[attIndex * maxSupportNumber],
1555  opcentry->supportProcs,
1556  maxSupportNumber * sizeof(RegProcedure));
1557  }
1558 }
1559 
1560 /*
1561  * LookupOpclassInfo
1562  *
1563  * This routine maintains a per-opclass cache of the information needed
1564  * by IndexSupportInitialize(). This is more efficient than relying on
1565  * the catalog cache, because we can load all the info about a particular
1566  * opclass in a single indexscan of pg_amproc.
1567  *
1568  * The information from pg_am about expected range of support function
1569  * numbers is passed in, rather than being looked up, mainly because the
1570  * caller will have it already.
1571  *
1572  * Note there is no provision for flushing the cache. This is OK at the
1573  * moment because there is no way to ALTER any interesting properties of an
1574  * existing opclass --- all you can do is drop it, which will result in
1575  * a useless but harmless dead entry in the cache. To support altering
1576  * opclass membership (not the same as opfamily membership!), we'd need to
1577  * be able to flush this cache as well as the contents of relcache entries
1578  * for indexes.
1579  */
1580 static OpClassCacheEnt *
1581 LookupOpclassInfo(Oid operatorClassOid,
1582  StrategyNumber numSupport)
1583 {
1584  OpClassCacheEnt *opcentry;
1585  bool found;
1586  Relation rel;
1587  SysScanDesc scan;
1588  ScanKeyData skey[3];
1589  HeapTuple htup;
1590  bool indexOK;
1591 
1592  if (OpClassCache == NULL)
1593  {
1594  /* First time through: initialize the opclass cache */
1595  HASHCTL ctl;
1596 
1597  /* Also make sure CacheMemoryContext exists */
1598  if (!CacheMemoryContext)
1600 
1601  ctl.keysize = sizeof(Oid);
1602  ctl.entrysize = sizeof(OpClassCacheEnt);
1603  OpClassCache = hash_create("Operator class cache", 64,
1604  &ctl, HASH_ELEM | HASH_BLOBS);
1605  }
1606 
1607  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1608  (void *) &operatorClassOid,
1609  HASH_ENTER, &found);
1610 
1611  if (!found)
1612  {
1613  /* Initialize new entry */
1614  opcentry->valid = false; /* until known OK */
1615  opcentry->numSupport = numSupport;
1616  opcentry->supportProcs = NULL; /* filled below */
1617  }
1618  else
1619  {
1620  Assert(numSupport == opcentry->numSupport);
1621  }
1622 
1623  /*
1624  * When aggressively testing cache-flush hazards, we disable the operator
1625  * class cache and force reloading of the info on each call. This models
1626  * no real-world behavior, since the cache entries are never invalidated
1627  * otherwise. However it can be helpful for detecting bugs in the cache
1628  * loading logic itself, such as reliance on a non-nailed index. Given
1629  * the limited use-case and the fact that this adds a great deal of
1630  * expense, we enable it only for high values of debug_discard_caches.
1631  */
1632 #ifdef DISCARD_CACHES_ENABLED
1633  if (debug_discard_caches > 2)
1634  opcentry->valid = false;
1635 #endif
1636 
1637  if (opcentry->valid)
1638  return opcentry;
1639 
1640  /*
1641  * Need to fill in new entry. First allocate space, unless we already did
1642  * so in some previous attempt.
1643  */
1644  if (opcentry->supportProcs == NULL && numSupport > 0)
1645  opcentry->supportProcs = (RegProcedure *)
1647  numSupport * sizeof(RegProcedure));
1648 
1649  /*
1650  * To avoid infinite recursion during startup, force heap scans if we're
1651  * looking up info for the opclasses used by the indexes we would like to
1652  * reference here.
1653  */
1654  indexOK = criticalRelcachesBuilt ||
1655  (operatorClassOid != OID_BTREE_OPS_OID &&
1656  operatorClassOid != INT2_BTREE_OPS_OID);
1657 
1658  /*
1659  * We have to fetch the pg_opclass row to determine its opfamily and
1660  * opcintype, which are needed to look up related operators and functions.
1661  * It'd be convenient to use the syscache here, but that probably doesn't
1662  * work while bootstrapping.
1663  */
1664  ScanKeyInit(&skey[0],
1665  Anum_pg_opclass_oid,
1666  BTEqualStrategyNumber, F_OIDEQ,
1667  ObjectIdGetDatum(operatorClassOid));
1668  rel = table_open(OperatorClassRelationId, AccessShareLock);
1669  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1670  NULL, 1, skey);
1671 
1672  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1673  {
1674  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1675 
1676  opcentry->opcfamily = opclassform->opcfamily;
1677  opcentry->opcintype = opclassform->opcintype;
1678  }
1679  else
1680  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1681 
1682  systable_endscan(scan);
1684 
1685  /*
1686  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1687  * the default ones (those with lefttype = righttype = opcintype).
1688  */
1689  if (numSupport > 0)
1690  {
1691  ScanKeyInit(&skey[0],
1692  Anum_pg_amproc_amprocfamily,
1693  BTEqualStrategyNumber, F_OIDEQ,
1694  ObjectIdGetDatum(opcentry->opcfamily));
1695  ScanKeyInit(&skey[1],
1696  Anum_pg_amproc_amproclefttype,
1697  BTEqualStrategyNumber, F_OIDEQ,
1698  ObjectIdGetDatum(opcentry->opcintype));
1699  ScanKeyInit(&skey[2],
1700  Anum_pg_amproc_amprocrighttype,
1701  BTEqualStrategyNumber, F_OIDEQ,
1702  ObjectIdGetDatum(opcentry->opcintype));
1703  rel = table_open(AccessMethodProcedureRelationId, AccessShareLock);
1704  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1705  NULL, 3, skey);
1706 
1707  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1708  {
1709  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1710 
1711  if (amprocform->amprocnum <= 0 ||
1712  (StrategyNumber) amprocform->amprocnum > numSupport)
1713  elog(ERROR, "invalid amproc number %d for opclass %u",
1714  amprocform->amprocnum, operatorClassOid);
1715 
1716  opcentry->supportProcs[amprocform->amprocnum - 1] =
1717  amprocform->amproc;
1718  }
1719 
1720  systable_endscan(scan);
1722  }
1723 
1724  opcentry->valid = true;
1725  return opcentry;
1726 }
1727 
1728 /*
1729  * Fill in the TableAmRoutine for a relation
1730  *
1731  * relation's rd_amhandler must be valid already.
1732  */
1733 static void
1735 {
1736  relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
1737 }
1738 
1739 /*
1740  * Initialize table access method support for a table like relation
1741  */
1742 void
1744 {
1745  HeapTuple tuple;
1746  Form_pg_am aform;
1747 
1748  if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
1749  {
1750  /*
1751  * Sequences are currently accessed like heap tables, but it doesn't
1752  * seem prudent to show that in the catalog. So just overwrite it
1753  * here.
1754  */
1755  relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER;
1756  }
1757  else if (IsCatalogRelation(relation))
1758  {
1759  /*
1760  * Avoid doing a syscache lookup for catalog tables.
1761  */
1762  Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
1763  relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER;
1764  }
1765  else
1766  {
1767  /*
1768  * Look up the table access method, save the OID of its handler
1769  * function.
1770  */
1771  Assert(relation->rd_rel->relam != InvalidOid);
1772  tuple = SearchSysCache1(AMOID,
1773  ObjectIdGetDatum(relation->rd_rel->relam));
1774  if (!HeapTupleIsValid(tuple))
1775  elog(ERROR, "cache lookup failed for access method %u",
1776  relation->rd_rel->relam);
1777  aform = (Form_pg_am) GETSTRUCT(tuple);
1778  relation->rd_amhandler = aform->amhandler;
1779  ReleaseSysCache(tuple);
1780  }
1781 
1782  /*
1783  * Now we can fetch the table AM's API struct
1784  */
1785  InitTableAmRoutine(relation);
1786 }
1787 
1788 /*
1789  * formrdesc
1790  *
1791  * This is a special cut-down version of RelationBuildDesc(),
1792  * used while initializing the relcache.
1793  * The relation descriptor is built just from the supplied parameters,
1794  * without actually looking at any system table entries. We cheat
1795  * quite a lot since we only need to work for a few basic system
1796  * catalogs.
1797  *
1798  * The catalogs this is used for can't have constraints (except attnotnull),
1799  * default values, rules, or triggers, since we don't cope with any of that.
1800  * (Well, actually, this only matters for properties that need to be valid
1801  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1802  * these properties matter then...)
1803  *
1804  * NOTE: we assume we are already switched into CacheMemoryContext.
1805  */
1806 static void
1807 formrdesc(const char *relationName, Oid relationReltype,
1808  bool isshared,
1809  int natts, const FormData_pg_attribute *attrs)
1810 {
1811  Relation relation;
1812  int i;
1813  bool has_not_null;
1814 
1815  /*
1816  * allocate new relation desc, clear all fields of reldesc
1817  */
1818  relation = (Relation) palloc0(sizeof(RelationData));
1819 
1820  /* make sure relation is marked as having no open file yet */
1821  relation->rd_smgr = NULL;
1822 
1823  /*
1824  * initialize reference count: 1 because it is nailed in cache
1825  */
1826  relation->rd_refcnt = 1;
1827 
1828  /*
1829  * all entries built with this routine are nailed-in-cache; none are for
1830  * new or temp relations.
1831  */
1832  relation->rd_isnailed = true;
1837  relation->rd_backend = InvalidBackendId;
1838  relation->rd_islocaltemp = false;
1839 
1840  /*
1841  * initialize relation tuple form
1842  *
1843  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1844  * get us launched. RelationCacheInitializePhase3() will read the real
1845  * data from pg_class and replace what we've done here. Note in
1846  * particular that relowner is left as zero; this cues
1847  * RelationCacheInitializePhase3 that the real data isn't there yet.
1848  */
1850 
1851  namestrcpy(&relation->rd_rel->relname, relationName);
1852  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1853  relation->rd_rel->reltype = relationReltype;
1854 
1855  /*
1856  * It's important to distinguish between shared and non-shared relations,
1857  * even at bootstrap time, to make sure we know where they are stored.
1858  */
1859  relation->rd_rel->relisshared = isshared;
1860  if (isshared)
1861  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1862 
1863  /* formrdesc is used only for permanent relations */
1864  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1865 
1866  /* ... and they're always populated, too */
1867  relation->rd_rel->relispopulated = true;
1868 
1869  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1870  relation->rd_rel->relpages = 0;
1871  relation->rd_rel->reltuples = -1;
1872  relation->rd_rel->relallvisible = 0;
1873  relation->rd_rel->relkind = RELKIND_RELATION;
1874  relation->rd_rel->relnatts = (int16) natts;
1875  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1876 
1877  /*
1878  * initialize attribute tuple form
1879  *
1880  * Unlike the case with the relation tuple, this data had better be right
1881  * because it will never be replaced. The data comes from
1882  * src/include/catalog/ headers via genbki.pl.
1883  */
1884  relation->rd_att = CreateTemplateTupleDesc(natts);
1885  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1886 
1887  relation->rd_att->tdtypeid = relationReltype;
1888  relation->rd_att->tdtypmod = -1; /* just to be sure */
1889 
1890  /*
1891  * initialize tuple desc info
1892  */
1893  has_not_null = false;
1894  for (i = 0; i < natts; i++)
1895  {
1896  memcpy(TupleDescAttr(relation->rd_att, i),
1897  &attrs[i],
1899  has_not_null |= attrs[i].attnotnull;
1900  /* make sure attcacheoff is valid */
1901  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1902  }
1903 
1904  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1905  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1906 
1907  /* mark not-null status */
1908  if (has_not_null)
1909  {
1910  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1911 
1912  constr->has_not_null = true;
1913  relation->rd_att->constr = constr;
1914  }
1915 
1916  /*
1917  * initialize relation id from info in att array (my, this is ugly)
1918  */
1919  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
1920 
1921  /*
1922  * All relations made with formrdesc are mapped. This is necessarily so
1923  * because there is no other way to know what filenode they currently
1924  * have. In bootstrap mode, add them to the initial relation mapper data,
1925  * specifying that the initial filenode is the same as the OID.
1926  */
1927  relation->rd_rel->relfilenode = InvalidOid;
1930  RelationGetRelid(relation),
1931  isshared, true);
1932 
1933  /*
1934  * initialize the relation lock manager information
1935  */
1936  RelationInitLockInfo(relation); /* see lmgr.c */
1937 
1938  /*
1939  * initialize physical addressing information for the relation
1940  */
1941  RelationInitPhysicalAddr(relation);
1942 
1943  /*
1944  * initialize the table am handler
1945  */
1946  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1947  relation->rd_tableam = GetHeapamTableAmRoutine();
1948 
1949  /*
1950  * initialize the rel-has-index flag, using hardwired knowledge
1951  */
1953  {
1954  /* In bootstrap mode, we have no indexes */
1955  relation->rd_rel->relhasindex = false;
1956  }
1957  else
1958  {
1959  /* Otherwise, all the rels formrdesc is used for have indexes */
1960  relation->rd_rel->relhasindex = true;
1961  }
1962 
1963  /*
1964  * add new reldesc to relcache
1965  */
1966  RelationCacheInsert(relation, false);
1967 
1968  /* It's fully valid */
1969  relation->rd_isvalid = true;
1970 }
1971 
1972 
1973 /* ----------------------------------------------------------------
1974  * Relation Descriptor Lookup Interface
1975  * ----------------------------------------------------------------
1976  */
1977 
1978 /*
1979  * RelationIdGetRelation
1980  *
1981  * Lookup a reldesc by OID; make one if not already in cache.
1982  *
1983  * Returns NULL if no pg_class row could be found for the given relid
1984  * (suggesting we are trying to access a just-deleted relation).
1985  * Any other error is reported via elog.
1986  *
1987  * NB: caller should already have at least AccessShareLock on the
1988  * relation ID, else there are nasty race conditions.
1989  *
1990  * NB: relation ref count is incremented, or set to 1 if new entry.
1991  * Caller should eventually decrement count. (Usually,
1992  * that happens by calling RelationClose().)
1993  */
1994 Relation
1996 {
1997  Relation rd;
1998 
1999  /* Make sure we're in an xact, even if this ends up being a cache hit */
2001 
2002  /*
2003  * first try to find reldesc in the cache
2004  */
2005  RelationIdCacheLookup(relationId, rd);
2006 
2007  if (RelationIsValid(rd))
2008  {
2009  /* return NULL for dropped relations */
2011  {
2012  Assert(!rd->rd_isvalid);
2013  return NULL;
2014  }
2015 
2017  /* revalidate cache entry if necessary */
2018  if (!rd->rd_isvalid)
2019  {
2020  /*
2021  * Indexes only have a limited number of possible schema changes,
2022  * and we don't want to use the full-blown procedure because it's
2023  * a headache for indexes that reload itself depends on.
2024  */
2025  if (rd->rd_rel->relkind == RELKIND_INDEX ||
2026  rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2028  else
2029  RelationClearRelation(rd, true);
2030 
2031  /*
2032  * Normally entries need to be valid here, but before the relcache
2033  * has been initialized, not enough infrastructure exists to
2034  * perform pg_class lookups. The structure of such entries doesn't
2035  * change, but we still want to update the rd_rel entry. So
2036  * rd_isvalid = false is left in place for a later lookup.
2037  */
2038  Assert(rd->rd_isvalid ||
2040  }
2041  return rd;
2042  }
2043 
2044  /*
2045  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2046  * it.
2047  */
2048  rd = RelationBuildDesc(relationId, true);
2049  if (RelationIsValid(rd))
2051  return rd;
2052 }
2053 
2054 /* ----------------------------------------------------------------
2055  * cache invalidation support routines
2056  * ----------------------------------------------------------------
2057  */
2058 
2059 /*
2060  * RelationIncrementReferenceCount
2061  * Increments relation reference count.
2062  *
2063  * Note: bootstrap mode has its own weird ideas about relation refcount
2064  * behavior; we ought to fix it someday, but for now, just disable
2065  * reference count ownership tracking in bootstrap mode.
2066  */
2067 void
2069 {
2071  rel->rd_refcnt += 1;
2074 }
2075 
2076 /*
2077  * RelationDecrementReferenceCount
2078  * Decrements relation reference count.
2079  */
2080 void
2082 {
2083  Assert(rel->rd_refcnt > 0);
2084  rel->rd_refcnt -= 1;
2087 }
2088 
2089 /*
2090  * RelationClose - close an open relation
2091  *
2092  * Actually, we just decrement the refcount.
2093  *
2094  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2095  * will be freed as soon as their refcount goes to zero. In combination
2096  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2097  * to catch references to already-released relcache entries. It slows
2098  * things down quite a bit, however.
2099  */
2100 void
2102 {
2103  /* Note: no locking manipulations needed */
2105 
2106  /*
2107  * If the relation is no longer open in this session, we can clean up any
2108  * stale partition descriptors it has. This is unlikely, so check to see
2109  * if there are child contexts before expending a call to mcxt.c.
2110  */
2111  if (RelationHasReferenceCountZero(relation))
2112  {
2113  if (relation->rd_pdcxt != NULL &&
2114  relation->rd_pdcxt->firstchild != NULL)
2116 
2117  if (relation->rd_pddcxt != NULL &&
2118  relation->rd_pddcxt->firstchild != NULL)
2120  }
2121 
2122 #ifdef RELCACHE_FORCE_RELEASE
2123  if (RelationHasReferenceCountZero(relation) &&
2124  relation->rd_createSubid == InvalidSubTransactionId &&
2126  RelationClearRelation(relation, false);
2127 #endif
2128 }
2129 
2130 /*
2131  * RelationReloadIndexInfo - reload minimal information for an open index
2132  *
2133  * This function is used only for indexes. A relcache inval on an index
2134  * can mean that its pg_class or pg_index row changed. There are only
2135  * very limited changes that are allowed to an existing index's schema,
2136  * so we can update the relcache entry without a complete rebuild; which
2137  * is fortunate because we can't rebuild an index entry that is "nailed"
2138  * and/or in active use. We support full replacement of the pg_class row,
2139  * as well as updates of a few simple fields of the pg_index row.
2140  *
2141  * We can't necessarily reread the catalog rows right away; we might be
2142  * in a failed transaction when we receive the SI notification. If so,
2143  * RelationClearRelation just marks the entry as invalid by setting
2144  * rd_isvalid to false. This routine is called to fix the entry when it
2145  * is next needed.
2146  *
2147  * We assume that at the time we are called, we have at least AccessShareLock
2148  * on the target index. (Note: in the calls from RelationClearRelation,
2149  * this is legitimate because we know the rel has positive refcount.)
2150  *
2151  * If the target index is an index on pg_class or pg_index, we'd better have
2152  * previously gotten at least AccessShareLock on its underlying catalog,
2153  * else we are at risk of deadlock against someone trying to exclusive-lock
2154  * the heap and index in that order. This is ensured in current usage by
2155  * only applying this to indexes being opened or having positive refcount.
2156  */
2157 static void
2159 {
2160  bool indexOK;
2161  HeapTuple pg_class_tuple;
2162  Form_pg_class relp;
2163 
2164  /* Should be called only for invalidated, live indexes */
2165  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
2166  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2167  !relation->rd_isvalid &&
2169 
2170  /* Ensure it's closed at smgr level */
2171  RelationCloseSmgr(relation);
2172 
2173  /* Must free any AM cached data upon relcache flush */
2174  if (relation->rd_amcache)
2175  pfree(relation->rd_amcache);
2176  relation->rd_amcache = NULL;
2177 
2178  /*
2179  * If it's a shared index, we might be called before backend startup has
2180  * finished selecting a database, in which case we have no way to read
2181  * pg_class yet. However, a shared index can never have any significant
2182  * schema updates, so it's okay to ignore the invalidation signal. Just
2183  * mark it valid and return without doing anything more.
2184  */
2185  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2186  {
2187  relation->rd_isvalid = true;
2188  return;
2189  }
2190 
2191  /*
2192  * Read the pg_class row
2193  *
2194  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2195  * for pg_class_oid_index ...
2196  */
2197  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2198  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2199  if (!HeapTupleIsValid(pg_class_tuple))
2200  elog(ERROR, "could not find pg_class tuple for index %u",
2201  RelationGetRelid(relation));
2202  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2203  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2204  /* Reload reloptions in case they changed */
2205  if (relation->rd_options)
2206  pfree(relation->rd_options);
2207  RelationParseRelOptions(relation, pg_class_tuple);
2208  /* done with pg_class tuple */
2209  heap_freetuple(pg_class_tuple);
2210  /* We must recalculate physical address in case it changed */
2211  RelationInitPhysicalAddr(relation);
2212 
2213  /*
2214  * For a non-system index, there are fields of the pg_index row that are
2215  * allowed to change, so re-read that row and update the relcache entry.
2216  * Most of the info derived from pg_index (such as support function lookup
2217  * info) cannot change, and indeed the whole point of this routine is to
2218  * update the relcache entry without clobbering that data; so wholesale
2219  * replacement is not appropriate.
2220  */
2221  if (!IsSystemRelation(relation))
2222  {
2223  HeapTuple tuple;
2225 
2226  tuple = SearchSysCache1(INDEXRELID,
2227  ObjectIdGetDatum(RelationGetRelid(relation)));
2228  if (!HeapTupleIsValid(tuple))
2229  elog(ERROR, "cache lookup failed for index %u",
2230  RelationGetRelid(relation));
2231  index = (Form_pg_index) GETSTRUCT(tuple);
2232 
2233  /*
2234  * Basically, let's just copy all the bool fields. There are one or
2235  * two of these that can't actually change in the current code, but
2236  * it's not worth it to track exactly which ones they are. None of
2237  * the array fields are allowed to change, though.
2238  */
2239  relation->rd_index->indisunique = index->indisunique;
2240  relation->rd_index->indisprimary = index->indisprimary;
2241  relation->rd_index->indisexclusion = index->indisexclusion;
2242  relation->rd_index->indimmediate = index->indimmediate;
2243  relation->rd_index->indisclustered = index->indisclustered;
2244  relation->rd_index->indisvalid = index->indisvalid;
2245  relation->rd_index->indcheckxmin = index->indcheckxmin;
2246  relation->rd_index->indisready = index->indisready;
2247  relation->rd_index->indislive = index->indislive;
2248 
2249  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2251  HeapTupleHeaderGetXmin(tuple->t_data));
2252 
2253  ReleaseSysCache(tuple);
2254  }
2255 
2256  /* Okay, now it's valid again */
2257  relation->rd_isvalid = true;
2258 }
2259 
2260 /*
2261  * RelationReloadNailed - reload minimal information for nailed relations.
2262  *
2263  * The structure of a nailed relation can never change (which is good, because
2264  * we rely on knowing their structure to be able to read catalog content). But
2265  * some parts, e.g. pg_class.relfrozenxid, are still important to have
2266  * accurate content for. Therefore those need to be reloaded after the arrival
2267  * of invalidations.
2268  */
2269 static void
2271 {
2272  Assert(relation->rd_isnailed);
2273 
2274  /*
2275  * Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2276  * mapping changed.
2277  */
2278  RelationInitPhysicalAddr(relation);
2279 
2280  /* flag as needing to be revalidated */
2281  relation->rd_isvalid = false;
2282 
2283  /*
2284  * Can only reread catalog contents if in a transaction. If the relation
2285  * is currently open (not counting the nailed refcount), do so
2286  * immediately. Otherwise we've already marked the entry as possibly
2287  * invalid, and it'll be fixed when next opened.
2288  */
2289  if (!IsTransactionState() || relation->rd_refcnt <= 1)
2290  return;
2291 
2292  if (relation->rd_rel->relkind == RELKIND_INDEX)
2293  {
2294  /*
2295  * If it's a nailed-but-not-mapped index, then we need to re-read the
2296  * pg_class row to see if its relfilenode changed.
2297  */
2298  RelationReloadIndexInfo(relation);
2299  }
2300  else
2301  {
2302  /*
2303  * Reload a non-index entry. We can't easily do so if relcaches
2304  * aren't yet built, but that's fine because at that stage the
2305  * attributes that need to be current (like relfrozenxid) aren't yet
2306  * accessed. To ensure the entry will later be revalidated, we leave
2307  * it in invalid state, but allow use (cf. RelationIdGetRelation()).
2308  */
2310  {
2311  HeapTuple pg_class_tuple;
2312  Form_pg_class relp;
2313 
2314  /*
2315  * NB: Mark the entry as valid before starting to scan, to avoid
2316  * self-recursion when re-building pg_class.
2317  */
2318  relation->rd_isvalid = true;
2319 
2320  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2321  true, false);
2322  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2323  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2324  heap_freetuple(pg_class_tuple);
2325 
2326  /*
2327  * Again mark as valid, to protect against concurrently arriving
2328  * invalidations.
2329  */
2330  relation->rd_isvalid = true;
2331  }
2332  }
2333 }
2334 
2335 /*
2336  * RelationDestroyRelation
2337  *
2338  * Physically delete a relation cache entry and all subsidiary data.
2339  * Caller must already have unhooked the entry from the hash table.
2340  */
2341 static void
2342 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2343 {
2345 
2346  /*
2347  * Make sure smgr and lower levels close the relation's files, if they
2348  * weren't closed already. (This was probably done by caller, but let's
2349  * just be real sure.)
2350  */
2351  RelationCloseSmgr(relation);
2352 
2353  /*
2354  * Free all the subsidiary data structures of the relcache entry, then the
2355  * entry itself.
2356  */
2357  if (relation->rd_rel)
2358  pfree(relation->rd_rel);
2359  /* can't use DecrTupleDescRefCount here */
2360  Assert(relation->rd_att->tdrefcount > 0);
2361  if (--relation->rd_att->tdrefcount == 0)
2362  {
2363  /*
2364  * If we Rebuilt a relcache entry during a transaction then its
2365  * possible we did that because the TupDesc changed as the result of
2366  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2367  * possible someone copied that TupDesc, in which case the copy would
2368  * point to free'd memory. So if we rebuild an entry we keep the
2369  * TupDesc around until end of transaction, to be safe.
2370  */
2371  if (remember_tupdesc)
2373  else
2374  FreeTupleDesc(relation->rd_att);
2375  }
2376  FreeTriggerDesc(relation->trigdesc);
2377  list_free_deep(relation->rd_fkeylist);
2378  list_free(relation->rd_indexlist);
2379  list_free(relation->rd_statlist);
2380  bms_free(relation->rd_indexattr);
2381  bms_free(relation->rd_keyattr);
2382  bms_free(relation->rd_pkattr);
2383  bms_free(relation->rd_idattr);
2384  if (relation->rd_pubactions)
2385  pfree(relation->rd_pubactions);
2386  if (relation->rd_options)
2387  pfree(relation->rd_options);
2388  if (relation->rd_indextuple)
2389  pfree(relation->rd_indextuple);
2390  if (relation->rd_amcache)
2391  pfree(relation->rd_amcache);
2392  if (relation->rd_fdwroutine)
2393  pfree(relation->rd_fdwroutine);
2394  if (relation->rd_indexcxt)
2395  MemoryContextDelete(relation->rd_indexcxt);
2396  if (relation->rd_rulescxt)
2397  MemoryContextDelete(relation->rd_rulescxt);
2398  if (relation->rd_rsdesc)
2399  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2400  if (relation->rd_partkeycxt)
2402  if (relation->rd_pdcxt)
2403  MemoryContextDelete(relation->rd_pdcxt);
2404  if (relation->rd_pddcxt)
2405  MemoryContextDelete(relation->rd_pddcxt);
2406  if (relation->rd_partcheckcxt)
2408  pfree(relation);
2409 }
2410 
2411 /*
2412  * RelationClearRelation
2413  *
2414  * Physically blow away a relation cache entry, or reset it and rebuild
2415  * it from scratch (that is, from catalog entries). The latter path is
2416  * used when we are notified of a change to an open relation (one with
2417  * refcount > 0).
2418  *
2419  * NB: when rebuilding, we'd better hold some lock on the relation,
2420  * else the catalog data we need to read could be changing under us.
2421  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2422  * a sinval reset could happen while we're accessing the catalogs, and
2423  * the rel would get blown away underneath us by RelationCacheInvalidate
2424  * if it has zero refcnt.
2425  *
2426  * The "rebuild" parameter is redundant in current usage because it has
2427  * to match the relation's refcnt status, but we keep it as a crosscheck
2428  * that we're doing what the caller expects.
2429  */
2430 static void
2431 RelationClearRelation(Relation relation, bool rebuild)
2432 {
2433  /*
2434  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2435  * course it would be an equally bad idea to blow away one with nonzero
2436  * refcnt, since that would leave someone somewhere with a dangling
2437  * pointer. All callers are expected to have verified that this holds.
2438  */
2439  Assert(rebuild ?
2440  !RelationHasReferenceCountZero(relation) :
2441  RelationHasReferenceCountZero(relation));
2442 
2443  /*
2444  * Make sure smgr and lower levels close the relation's files, if they
2445  * weren't closed already. If the relation is not getting deleted, the
2446  * next smgr access should reopen the files automatically. This ensures
2447  * that the low-level file access state is updated after, say, a vacuum
2448  * truncation.
2449  */
2450  RelationCloseSmgr(relation);
2451 
2452  /* Free AM cached data, if any */
2453  if (relation->rd_amcache)
2454  pfree(relation->rd_amcache);
2455  relation->rd_amcache = NULL;
2456 
2457  /*
2458  * Treat nailed-in system relations separately, they always need to be
2459  * accessible, so we can't blow them away.
2460  */
2461  if (relation->rd_isnailed)
2462  {
2463  RelationReloadNailed(relation);
2464  return;
2465  }
2466 
2467  /* Mark it invalid until we've finished rebuild */
2468  relation->rd_isvalid = false;
2469 
2470  /* See RelationForgetRelation(). */
2471  if (relation->rd_droppedSubid != InvalidSubTransactionId)
2472  return;
2473 
2474  /*
2475  * Even non-system indexes should not be blown away if they are open and
2476  * have valid index support information. This avoids problems with active
2477  * use of the index support information. As with nailed indexes, we
2478  * re-read the pg_class row to handle possible physical relocation of the
2479  * index, and we check for pg_index updates too.
2480  */
2481  if ((relation->rd_rel->relkind == RELKIND_INDEX ||
2482  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2483  relation->rd_refcnt > 0 &&
2484  relation->rd_indexcxt != NULL)
2485  {
2486  if (IsTransactionState())
2487  RelationReloadIndexInfo(relation);
2488  return;
2489  }
2490 
2491  /*
2492  * If we're really done with the relcache entry, blow it away. But if
2493  * someone is still using it, reconstruct the whole deal without moving
2494  * the physical RelationData record (so that the someone's pointer is
2495  * still valid).
2496  */
2497  if (!rebuild)
2498  {
2499  /* Remove it from the hash table */
2500  RelationCacheDelete(relation);
2501 
2502  /* And release storage */
2503  RelationDestroyRelation(relation, false);
2504  }
2505  else if (!IsTransactionState())
2506  {
2507  /*
2508  * If we're not inside a valid transaction, we can't do any catalog
2509  * access so it's not possible to rebuild yet. Just exit, leaving
2510  * rd_isvalid = false so that the rebuild will occur when the entry is
2511  * next opened.
2512  *
2513  * Note: it's possible that we come here during subtransaction abort,
2514  * and the reason for wanting to rebuild is that the rel is open in
2515  * the outer transaction. In that case it might seem unsafe to not
2516  * rebuild immediately, since whatever code has the rel already open
2517  * will keep on using the relcache entry as-is. However, in such a
2518  * case the outer transaction should be holding a lock that's
2519  * sufficient to prevent any significant change in the rel's schema,
2520  * so the existing entry contents should be good enough for its
2521  * purposes; at worst we might be behind on statistics updates or the
2522  * like. (See also CheckTableNotInUse() and its callers.) These same
2523  * remarks also apply to the cases above where we exit without having
2524  * done RelationReloadIndexInfo() yet.
2525  */
2526  return;
2527  }
2528  else
2529  {
2530  /*
2531  * Our strategy for rebuilding an open relcache entry is to build a
2532  * new entry from scratch, swap its contents with the old entry, and
2533  * finally delete the new entry (along with any infrastructure swapped
2534  * over from the old entry). This is to avoid trouble in case an
2535  * error causes us to lose control partway through. The old entry
2536  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2537  * on next access. Meanwhile it's not any less valid than it was
2538  * before, so any code that might expect to continue accessing it
2539  * isn't hurt by the rebuild failure. (Consider for example a
2540  * subtransaction that ALTERs a table and then gets canceled partway
2541  * through the cache entry rebuild. The outer transaction should
2542  * still see the not-modified cache entry as valid.) The worst
2543  * consequence of an error is leaking the necessarily-unreferenced new
2544  * entry, and this shouldn't happen often enough for that to be a big
2545  * problem.
2546  *
2547  * When rebuilding an open relcache entry, we must preserve ref count,
2548  * rd_*Subid, and rd_toastoid state. Also attempt to preserve the
2549  * pg_class entry (rd_rel), tupledesc, rewrite-rule, partition key,
2550  * and partition descriptor substructures in place, because various
2551  * places assume that these structures won't move while they are
2552  * working with an open relcache entry. (Note: the refcount
2553  * mechanism for tupledescs might someday allow us to remove this hack
2554  * for the tupledesc.)
2555  *
2556  * Note that this process does not touch CurrentResourceOwner; which
2557  * is good because whatever ref counts the entry may have do not
2558  * necessarily belong to that resource owner.
2559  */
2560  Relation newrel;
2561  Oid save_relid = RelationGetRelid(relation);
2562  bool keep_tupdesc;
2563  bool keep_rules;
2564  bool keep_policies;
2565  bool keep_partkey;
2566 
2567  /* Build temporary entry, but don't link it into hashtable */
2568  newrel = RelationBuildDesc(save_relid, false);
2569  if (newrel == NULL)
2570  {
2571  /*
2572  * We can validly get here, if we're using a historic snapshot in
2573  * which a relation, accessed from outside logical decoding, is
2574  * still invisible. In that case it's fine to just mark the
2575  * relation as invalid and return - it'll fully get reloaded by
2576  * the cache reset at the end of logical decoding (or at the next
2577  * access). During normal processing we don't want to ignore this
2578  * case as it shouldn't happen there, as explained below.
2579  */
2580  if (HistoricSnapshotActive())
2581  return;
2582 
2583  /*
2584  * This shouldn't happen as dropping a relation is intended to be
2585  * impossible if still referenced (cf. CheckTableNotInUse()). But
2586  * if we get here anyway, we can't just delete the relcache entry,
2587  * as it possibly could get accessed later (as e.g. the error
2588  * might get trapped and handled via a subtransaction rollback).
2589  */
2590  elog(ERROR, "relation %u deleted while still in use", save_relid);
2591  }
2592 
2593  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2594  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2595  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2596  /* partkey is immutable once set up, so we can always keep it */
2597  keep_partkey = (relation->rd_partkey != NULL);
2598 
2599  /*
2600  * Perform swapping of the relcache entry contents. Within this
2601  * process the old entry is momentarily invalid, so there *must* be no
2602  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2603  * all-in-line code for safety.
2604  *
2605  * Since the vast majority of fields should be swapped, our method is
2606  * to swap the whole structures and then re-swap those few fields we
2607  * didn't want swapped.
2608  */
2609 #define SWAPFIELD(fldtype, fldname) \
2610  do { \
2611  fldtype _tmp = newrel->fldname; \
2612  newrel->fldname = relation->fldname; \
2613  relation->fldname = _tmp; \
2614  } while (0)
2615 
2616  /* swap all Relation struct fields */
2617  {
2618  RelationData tmpstruct;
2619 
2620  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2621  memcpy(newrel, relation, sizeof(RelationData));
2622  memcpy(relation, &tmpstruct, sizeof(RelationData));
2623  }
2624 
2625  /* rd_smgr must not be swapped, due to back-links from smgr level */
2626  SWAPFIELD(SMgrRelation, rd_smgr);
2627  /* rd_refcnt must be preserved */
2628  SWAPFIELD(int, rd_refcnt);
2629  /* isnailed shouldn't change */
2630  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2631  /* creation sub-XIDs must be preserved */
2632  SWAPFIELD(SubTransactionId, rd_createSubid);
2633  SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2634  SWAPFIELD(SubTransactionId, rd_firstRelfilenodeSubid);
2635  SWAPFIELD(SubTransactionId, rd_droppedSubid);
2636  /* un-swap rd_rel pointers, swap contents instead */
2637  SWAPFIELD(Form_pg_class, rd_rel);
2638  /* ... but actually, we don't have to update newrel->rd_rel */
2639  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2640  /* preserve old tupledesc, rules, policies if no logical change */
2641  if (keep_tupdesc)
2642  SWAPFIELD(TupleDesc, rd_att);
2643  if (keep_rules)
2644  {
2645  SWAPFIELD(RuleLock *, rd_rules);
2646  SWAPFIELD(MemoryContext, rd_rulescxt);
2647  }
2648  if (keep_policies)
2649  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2650  /* toast OID override must be preserved */
2651  SWAPFIELD(Oid, rd_toastoid);
2652  /* pgstat_info must be preserved */
2653  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2654  /* preserve old partition key if we have one */
2655  if (keep_partkey)
2656  {
2657  SWAPFIELD(PartitionKey, rd_partkey);
2658  SWAPFIELD(MemoryContext, rd_partkeycxt);
2659  }
2660  if (newrel->rd_pdcxt != NULL || newrel->rd_pddcxt != NULL)
2661  {
2662  /*
2663  * We are rebuilding a partitioned relation with a non-zero
2664  * reference count, so we must keep the old partition descriptor
2665  * around, in case there's a PartitionDirectory with a pointer to
2666  * it. This means we can't free the old rd_pdcxt yet. (This is
2667  * necessary because RelationGetPartitionDesc hands out direct
2668  * pointers to the relcache's data structure, unlike our usual
2669  * practice which is to hand out copies. We'd have the same
2670  * problem with rd_partkey, except that we always preserve that
2671  * once created.)
2672  *
2673  * To ensure that it's not leaked completely, re-attach it to the
2674  * new reldesc, or make it a child of the new reldesc's rd_pdcxt
2675  * in the unlikely event that there is one already. (Compare hack
2676  * in RelationBuildPartitionDesc.) RelationClose will clean up
2677  * any such contexts once the reference count reaches zero.
2678  *
2679  * In the case where the reference count is zero, this code is not
2680  * reached, which should be OK because in that case there should
2681  * be no PartitionDirectory with a pointer to the old entry.
2682  *
2683  * Note that newrel and relation have already been swapped, so the
2684  * "old" partition descriptor is actually the one hanging off of
2685  * newrel.
2686  */
2687  relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */
2688  relation->rd_partdesc_nodetached = NULL;
2690  if (relation->rd_pdcxt != NULL) /* probably never happens */
2691  MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
2692  else
2693  relation->rd_pdcxt = newrel->rd_pdcxt;
2694  if (relation->rd_pddcxt != NULL)
2695  MemoryContextSetParent(newrel->rd_pddcxt, relation->rd_pddcxt);
2696  else
2697  relation->rd_pddcxt = newrel->rd_pddcxt;
2698  /* drop newrel's pointers so we don't destroy it below */
2699  newrel->rd_partdesc = NULL;
2700  newrel->rd_partdesc_nodetached = NULL;
2702  newrel->rd_pdcxt = NULL;
2703  newrel->rd_pddcxt = NULL;
2704  }
2705 
2706 #undef SWAPFIELD
2707 
2708  /* And now we can throw away the temporary entry */
2709  RelationDestroyRelation(newrel, !keep_tupdesc);
2710  }
2711 }
2712 
2713 /*
2714  * RelationFlushRelation
2715  *
2716  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2717  * This is used when we receive a cache invalidation event for the rel.
2718  */
2719 static void
2721 {
2722  if (relation->rd_createSubid != InvalidSubTransactionId ||
2724  {
2725  /*
2726  * New relcache entries are always rebuilt, not flushed; else we'd
2727  * forget the "new" status of the relation. Ditto for the
2728  * new-relfilenode status.
2729  *
2730  * The rel could have zero refcnt here, so temporarily increment the
2731  * refcnt to ensure it's safe to rebuild it. We can assume that the
2732  * current transaction has some lock on the rel already.
2733  */
2735  RelationClearRelation(relation, true);
2737  }
2738  else
2739  {
2740  /*
2741  * Pre-existing rels can be dropped from the relcache if not open.
2742  */
2743  bool rebuild = !RelationHasReferenceCountZero(relation);
2744 
2745  RelationClearRelation(relation, rebuild);
2746  }
2747 }
2748 
2749 /*
2750  * RelationForgetRelation - caller reports that it dropped the relation
2751  */
2752 void
2754 {
2755  Relation relation;
2756 
2757  RelationIdCacheLookup(rid, relation);
2758 
2759  if (!PointerIsValid(relation))
2760  return; /* not in cache, nothing to do */
2761 
2762  if (!RelationHasReferenceCountZero(relation))
2763  elog(ERROR, "relation %u is still open", rid);
2764 
2766  if (relation->rd_createSubid != InvalidSubTransactionId ||
2768  {
2769  /*
2770  * In the event of subtransaction rollback, we must not forget
2771  * rd_*Subid. Mark the entry "dropped" so RelationClearRelation()
2772  * invalidates it in lieu of destroying it. (If we're in a top
2773  * transaction, we could opt to destroy the entry.)
2774  */
2776  }
2777 
2778  RelationClearRelation(relation, false);
2779 }
2780 
2781 /*
2782  * RelationCacheInvalidateEntry
2783  *
2784  * This routine is invoked for SI cache flush messages.
2785  *
2786  * Any relcache entry matching the relid must be flushed. (Note: caller has
2787  * already determined that the relid belongs to our database or is a shared
2788  * relation.)
2789  *
2790  * We used to skip local relations, on the grounds that they could
2791  * not be targets of cross-backend SI update messages; but it seems
2792  * safer to process them, so that our *own* SI update messages will
2793  * have the same effects during CommandCounterIncrement for both
2794  * local and nonlocal relations.
2795  */
2796 void
2798 {
2799  Relation relation;
2800 
2801  RelationIdCacheLookup(relationId, relation);
2802 
2803  if (PointerIsValid(relation))
2804  {
2806  RelationFlushRelation(relation);
2807  }
2808 }
2809 
2810 /*
2811  * RelationCacheInvalidate
2812  * Blow away cached relation descriptors that have zero reference counts,
2813  * and rebuild those with positive reference counts. Also reset the smgr
2814  * relation cache and re-read relation mapping data.
2815  *
2816  * This is currently used only to recover from SI message buffer overflow,
2817  * so we do not touch relations having new-in-transaction relfilenodes; they
2818  * cannot be targets of cross-backend SI updates (and our own updates now go
2819  * through a separate linked list that isn't limited by the SI message
2820  * buffer size).
2821  *
2822  * We do this in two phases: the first pass deletes deletable items, and
2823  * the second one rebuilds the rebuildable items. This is essential for
2824  * safety, because hash_seq_search only copes with concurrent deletion of
2825  * the element it is currently visiting. If a second SI overflow were to
2826  * occur while we are walking the table, resulting in recursive entry to
2827  * this routine, we could crash because the inner invocation blows away
2828  * the entry next to be visited by the outer scan. But this way is OK,
2829  * because (a) during the first pass we won't process any more SI messages,
2830  * so hash_seq_search will complete safely; (b) during the second pass we
2831  * only hold onto pointers to nondeletable entries.
2832  *
2833  * The two-phase approach also makes it easy to update relfilenodes for
2834  * mapped relations before we do anything else, and to ensure that the
2835  * second pass processes nailed-in-cache items before other nondeletable
2836  * items. This should ensure that system catalogs are up to date before
2837  * we attempt to use them to reload information about other open relations.
2838  */
2839 void
2841 {
2843  RelIdCacheEnt *idhentry;
2844  Relation relation;
2845  List *rebuildFirstList = NIL;
2846  List *rebuildList = NIL;
2847  ListCell *l;
2848 
2849  /*
2850  * Reload relation mapping data before starting to reconstruct cache.
2851  */
2853 
2854  /* Phase 1 */
2855  hash_seq_init(&status, RelationIdCache);
2856 
2857  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2858  {
2859  relation = idhentry->reldesc;
2860 
2861  /* Must close all smgr references to avoid leaving dangling ptrs */
2862  RelationCloseSmgr(relation);
2863 
2864  /*
2865  * Ignore new relations; no other backend will manipulate them before
2866  * we commit. Likewise, before replacing a relation's relfilenode, we
2867  * shall have acquired AccessExclusiveLock and drained any applicable
2868  * pending invalidations.
2869  */
2870  if (relation->rd_createSubid != InvalidSubTransactionId ||
2872  continue;
2873 
2875 
2876  if (RelationHasReferenceCountZero(relation))
2877  {
2878  /* Delete this entry immediately */
2879  Assert(!relation->rd_isnailed);
2880  RelationClearRelation(relation, false);
2881  }
2882  else
2883  {
2884  /*
2885  * If it's a mapped relation, immediately update its rd_node in
2886  * case its relfilenode changed. We must do this during phase 1
2887  * in case the relation is consulted during rebuild of other
2888  * relcache entries in phase 2. It's safe since consulting the
2889  * map doesn't involve any access to relcache entries.
2890  */
2891  if (RelationIsMapped(relation))
2892  RelationInitPhysicalAddr(relation);
2893 
2894  /*
2895  * Add this entry to list of stuff to rebuild in second pass.
2896  * pg_class goes to the front of rebuildFirstList while
2897  * pg_class_oid_index goes to the back of rebuildFirstList, so
2898  * they are done first and second respectively. Other nailed
2899  * relations go to the front of rebuildList, so they'll be done
2900  * next in no particular order; and everything else goes to the
2901  * back of rebuildList.
2902  */
2903  if (RelationGetRelid(relation) == RelationRelationId)
2904  rebuildFirstList = lcons(relation, rebuildFirstList);
2905  else if (RelationGetRelid(relation) == ClassOidIndexId)
2906  rebuildFirstList = lappend(rebuildFirstList, relation);
2907  else if (relation->rd_isnailed)
2908  rebuildList = lcons(relation, rebuildList);
2909  else
2910  rebuildList = lappend(rebuildList, relation);
2911  }
2912  }
2913 
2914  /*
2915  * Now zap any remaining smgr cache entries. This must happen before we
2916  * start to rebuild entries, since that may involve catalog fetches which
2917  * will re-open catalog files.
2918  */
2919  smgrcloseall();
2920 
2921  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2922  foreach(l, rebuildFirstList)
2923  {
2924  relation = (Relation) lfirst(l);
2925  RelationClearRelation(relation, true);
2926  }
2927  list_free(rebuildFirstList);
2928  foreach(l, rebuildList)
2929  {
2930  relation = (Relation) lfirst(l);
2931  RelationClearRelation(relation, true);
2932  }
2933  list_free(rebuildList);
2934 }
2935 
2936 /*
2937  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2938  *
2939  * Needed in some cases where we are changing a relation's physical mapping.
2940  * The link will be automatically reopened on next use.
2941  */
2942 void
2944 {
2945  Relation relation;
2946 
2947  RelationIdCacheLookup(relationId, relation);
2948 
2949  if (!PointerIsValid(relation))
2950  return; /* not in cache, nothing to do */
2951 
2952  RelationCloseSmgr(relation);
2953 }
2954 
2955 static void
2957 {
2958  if (EOXactTupleDescArray == NULL)
2959  {
2960  MemoryContext oldcxt;
2961 
2963 
2964  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2967  MemoryContextSwitchTo(oldcxt);
2968  }
2970  {
2971  int32 newlen = EOXactTupleDescArrayLen * 2;
2972 
2974 
2975  EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2976  newlen * sizeof(TupleDesc));
2977  EOXactTupleDescArrayLen = newlen;
2978  }
2979 
2980  EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2981 }
2982 
2983 #ifdef USE_ASSERT_CHECKING
2984 static void
2985 AssertPendingSyncConsistency(Relation relation)
2986 {
2987  bool relcache_verdict =
2988  RelationIsPermanent(relation) &&
2989  ((relation->rd_createSubid != InvalidSubTransactionId &&
2990  RELKIND_HAS_STORAGE(relation->rd_rel->relkind)) ||
2992 
2993  Assert(relcache_verdict == RelFileNodeSkippingWAL(relation->rd_node));
2994 
2995  if (relation->rd_droppedSubid != InvalidSubTransactionId)
2996  Assert(!relation->rd_isvalid &&
2997  (relation->rd_createSubid != InvalidSubTransactionId ||
2999 }
3000 
3001 /*
3002  * AssertPendingSyncs_RelationCache
3003  *
3004  * Assert that relcache.c and storage.c agree on whether to skip WAL.
3005  */
3006 void
3008 {
3010  LOCALLOCK *locallock;
3011  Relation *rels;
3012  int maxrels;
3013  int nrels;
3014  RelIdCacheEnt *idhentry;
3015  int i;
3016 
3017  /*
3018  * Open every relation that this transaction has locked. If, for some
3019  * relation, storage.c is skipping WAL and relcache.c is not skipping WAL,
3020  * a CommandCounterIncrement() typically yields a local invalidation
3021  * message that destroys the relcache entry. By recreating such entries
3022  * here, we detect the problem.
3023  */
3025  maxrels = 1;
3026  rels = palloc(maxrels * sizeof(*rels));
3027  nrels = 0;
3028  hash_seq_init(&status, GetLockMethodLocalHash());
3029  while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
3030  {
3031  Oid relid;
3032  Relation r;
3033 
3034  if (locallock->nLocks <= 0)
3035  continue;
3036  if ((LockTagType) locallock->tag.lock.locktag_type !=
3038  continue;
3039  relid = ObjectIdGetDatum(locallock->tag.lock.locktag_field2);
3040  r = RelationIdGetRelation(relid);
3041  if (!RelationIsValid(r))
3042  continue;
3043  if (nrels >= maxrels)
3044  {
3045  maxrels *= 2;
3046  rels = repalloc(rels, maxrels * sizeof(*rels));
3047  }
3048  rels[nrels++] = r;
3049  }
3050 
3051  hash_seq_init(&status, RelationIdCache);
3052  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3053  AssertPendingSyncConsistency(idhentry->reldesc);
3054 
3055  for (i = 0; i < nrels; i++)
3056  RelationClose(rels[i]);
3058 }
3059 #endif
3060 
3061 /*
3062  * AtEOXact_RelationCache
3063  *
3064  * Clean up the relcache at main-transaction commit or abort.
3065  *
3066  * Note: this must be called *before* processing invalidation messages.
3067  * In the case of abort, we don't want to try to rebuild any invalidated
3068  * cache entries (since we can't safely do database accesses). Therefore
3069  * we must reset refcnts before handling pending invalidations.
3070  *
3071  * As of PostgreSQL 8.1, relcache refcnts should get released by the
3072  * ResourceOwner mechanism. This routine just does a debugging
3073  * cross-check that no pins remain. However, we also need to do special
3074  * cleanup when the current transaction created any relations or made use
3075  * of forced index lists.
3076  */
3077 void
3079 {
3081  RelIdCacheEnt *idhentry;
3082  int i;
3083 
3084  /*
3085  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3086  * listed in it. Otherwise fall back on a hash_seq_search scan.
3087  *
3088  * For simplicity, eoxact_list[] entries are not deleted till end of
3089  * top-level transaction, even though we could remove them at
3090  * subtransaction end in some cases, or remove relations from the list if
3091  * they are cleared for other reasons. Therefore we should expect the
3092  * case that list entries are not found in the hashtable; if not, there's
3093  * nothing to do for them.
3094  */
3096  {
3097  hash_seq_init(&status, RelationIdCache);
3098  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3099  {
3100  AtEOXact_cleanup(idhentry->reldesc, isCommit);
3101  }
3102  }
3103  else
3104  {
3105  for (i = 0; i < eoxact_list_len; i++)
3106  {
3107  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3108  (void *) &eoxact_list[i],
3109  HASH_FIND,
3110  NULL);
3111  if (idhentry != NULL)
3112  AtEOXact_cleanup(idhentry->reldesc, isCommit);
3113  }
3114  }
3115 
3116  if (EOXactTupleDescArrayLen > 0)
3117  {
3118  Assert(EOXactTupleDescArray != NULL);
3119  for (i = 0; i < NextEOXactTupleDescNum; i++)
3120  FreeTupleDesc(EOXactTupleDescArray[i]);
3121  pfree(EOXactTupleDescArray);
3122  EOXactTupleDescArray = NULL;
3123  }
3124 
3125  /* Now we're out of the transaction and can clear the lists */
3126  eoxact_list_len = 0;
3127  eoxact_list_overflowed = false;
3130 }
3131 
3132 /*
3133  * AtEOXact_cleanup
3134  *
3135  * Clean up a single rel at main-transaction commit or abort
3136  *
3137  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3138  * bother to prevent duplicate entries in eoxact_list[].
3139  */
3140 static void
3141 AtEOXact_cleanup(Relation relation, bool isCommit)
3142 {
3143  bool clear_relcache = false;
3144 
3145  /*
3146  * The relcache entry's ref count should be back to its normal
3147  * not-in-a-transaction state: 0 unless it's nailed in cache.
3148  *
3149  * In bootstrap mode, this is NOT true, so don't check it --- the
3150  * bootstrap code expects relations to stay open across start/commit
3151  * transaction calls. (That seems bogus, but it's not worth fixing.)
3152  *
3153  * Note: ideally this check would be applied to every relcache entry, not
3154  * just those that have eoxact work to do. But it's not worth forcing a
3155  * scan of the whole relcache just for this. (Moreover, doing so would
3156  * mean that assert-enabled testing never tests the hash_search code path
3157  * above, which seems a bad idea.)
3158  */
3159 #ifdef USE_ASSERT_CHECKING
3161  {
3162  int expected_refcnt;
3163 
3164  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3165  Assert(relation->rd_refcnt == expected_refcnt);
3166  }
3167 #endif
3168 
3169  /*
3170  * Is the relation live after this transaction ends?
3171  *
3172  * During commit, clear the relcache entry if it is preserved after
3173  * relation drop, in order not to orphan the entry. During rollback,
3174  * clear the relcache entry if the relation is created in the current
3175  * transaction since it isn't interesting any longer once we are out of
3176  * the transaction.
3177  */
3178  clear_relcache =
3179  (isCommit ?
3182 
3183  /*
3184  * Since we are now out of the transaction, reset the subids to zero. That
3185  * also lets RelationClearRelation() drop the relcache entry.
3186  */
3191 
3192  if (clear_relcache)
3193  {
3194  if (RelationHasReferenceCountZero(relation))
3195  {
3196  RelationClearRelation(relation, false);
3197  return;
3198  }
3199  else
3200  {
3201  /*
3202  * Hmm, somewhere there's a (leaked?) reference to the relation.
3203  * We daren't remove the entry for fear of dereferencing a
3204  * dangling pointer later. Bleat, and mark it as not belonging to
3205  * the current transaction. Hopefully it'll get cleaned up
3206  * eventually. This must be just a WARNING to avoid
3207  * error-during-error-recovery loops.
3208  */
3209  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3210  RelationGetRelationName(relation));
3211  }
3212  }
3213 }
3214 
3215 /*
3216  * AtEOSubXact_RelationCache
3217  *
3218  * Clean up the relcache at sub-transaction commit or abort.
3219  *
3220  * Note: this must be called *before* processing invalidation messages.
3221  */
3222 void
3224  SubTransactionId parentSubid)
3225 {
3227  RelIdCacheEnt *idhentry;
3228  int i;
3229 
3230  /*
3231  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3232  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3233  * logic as in AtEOXact_RelationCache.
3234  */
3236  {
3237  hash_seq_init(&status, RelationIdCache);
3238  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3239  {
3240  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3241  mySubid, parentSubid);
3242  }
3243  }
3244  else
3245  {
3246  for (i = 0; i < eoxact_list_len; i++)
3247  {
3248  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3249  (void *) &eoxact_list[i],
3250  HASH_FIND,
3251  NULL);
3252  if (idhentry != NULL)
3253  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3254  mySubid, parentSubid);
3255  }
3256  }
3257 
3258  /* Don't reset the list; we still need more cleanup later */
3259 }
3260 
3261 /*
3262  * AtEOSubXact_cleanup
3263  *
3264  * Clean up a single rel at subtransaction commit or abort
3265  *
3266  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3267  * bother to prevent duplicate entries in eoxact_list[].
3268  */
3269 static void
3270 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3271  SubTransactionId mySubid, SubTransactionId parentSubid)
3272 {
3273  /*
3274  * Is it a relation created in the current subtransaction?
3275  *
3276  * During subcommit, mark it as belonging to the parent, instead, as long
3277  * as it has not been dropped. Otherwise simply delete the relcache entry.
3278  * --- it isn't interesting any longer.
3279  */
3280  if (relation->rd_createSubid == mySubid)
3281  {
3282  /*
3283  * Valid rd_droppedSubid means the corresponding relation is dropped
3284  * but the relcache entry is preserved for at-commit pending sync. We
3285  * need to drop it explicitly here not to make the entry orphan.
3286  */
3287  Assert(relation->rd_droppedSubid == mySubid ||
3289  if (isCommit && relation->rd_droppedSubid == InvalidSubTransactionId)
3290  relation->rd_createSubid = parentSubid;
3291  else if (RelationHasReferenceCountZero(relation))
3292  {
3293  /* allow the entry to be removed */
3298  RelationClearRelation(relation, false);
3299  return;
3300  }
3301  else
3302  {
3303  /*
3304  * Hmm, somewhere there's a (leaked?) reference to the relation.
3305  * We daren't remove the entry for fear of dereferencing a
3306  * dangling pointer later. Bleat, and transfer it to the parent
3307  * subtransaction so we can try again later. This must be just a
3308  * WARNING to avoid error-during-error-recovery loops.
3309  */
3310  relation->rd_createSubid = parentSubid;
3311  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3312  RelationGetRelationName(relation));
3313  }
3314  }
3315 
3316  /*
3317  * Likewise, update or drop any new-relfilenode-in-subtransaction record
3318  * or drop record.
3319  */
3320  if (relation->rd_newRelfilenodeSubid == mySubid)
3321  {
3322  if (isCommit)
3323  relation->rd_newRelfilenodeSubid = parentSubid;
3324  else
3326  }
3327 
3328  if (relation->rd_firstRelfilenodeSubid == mySubid)
3329  {
3330  if (isCommit)
3331  relation->rd_firstRelfilenodeSubid = parentSubid;
3332  else
3334  }
3335 
3336  if (relation->rd_droppedSubid == mySubid)
3337  {
3338  if (isCommit)
3339  relation->rd_droppedSubid = parentSubid;
3340  else
3342  }
3343 }
3344 
3345 
3346 /*
3347  * RelationBuildLocalRelation
3348  * Build a relcache entry for an about-to-be-created relation,
3349  * and enter it into the relcache.
3350  */
3351 Relation
3353  Oid relnamespace,
3354  TupleDesc tupDesc,
3355  Oid relid,
3356  Oid accessmtd,
3357  Oid relfilenode,
3358  Oid reltablespace,
3359  bool shared_relation,
3360  bool mapped_relation,
3361  char relpersistence,
3362  char relkind)
3363 {
3364  Relation rel;
3365  MemoryContext oldcxt;
3366  int natts = tupDesc->natts;
3367  int i;
3368  bool has_not_null;
3369  bool nailit;
3370 
3371  AssertArg(natts >= 0);
3372 
3373  /*
3374  * check for creation of a rel that must be nailed in cache.
3375  *
3376  * XXX this list had better match the relations specially handled in
3377  * RelationCacheInitializePhase2/3.
3378  */
3379  switch (relid)
3380  {
3381  case DatabaseRelationId:
3382  case AuthIdRelationId:
3383  case AuthMemRelationId:
3384  case RelationRelationId:
3385  case AttributeRelationId:
3386  case ProcedureRelationId:
3387  case TypeRelationId:
3388  nailit = true;
3389  break;
3390  default:
3391  nailit = false;
3392  break;
3393  }
3394 
3395  /*
3396  * check that hardwired list of shared rels matches what's in the
3397  * bootstrap .bki file. If you get a failure here during initdb, you
3398  * probably need to fix IsSharedRelation() to match whatever you've done
3399  * to the set of shared relations.
3400  */
3401  if (shared_relation != IsSharedRelation(relid))
3402  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3403  relname, relid);
3404 
3405  /* Shared relations had better be mapped, too */
3406  Assert(mapped_relation || !shared_relation);
3407 
3408  /*
3409  * switch to the cache context to create the relcache entry.
3410  */
3411  if (!CacheMemoryContext)
3413 
3415 
3416  /*
3417  * allocate a new relation descriptor and fill in basic state fields.
3418  */
3419  rel = (Relation) palloc0(sizeof(RelationData));
3420 
3421  /* make sure relation is marked as having no open file yet */
3422  rel->rd_smgr = NULL;
3423 
3424  /* mark it nailed if appropriate */
3425  rel->rd_isnailed = nailit;
3426 
3427  rel->rd_refcnt = nailit ? 1 : 0;
3428 
3429  /* it's being created in this transaction */
3434 
3435  /*
3436  * create a new tuple descriptor from the one passed in. We do this
3437  * partly to copy it into the cache context, and partly because the new
3438  * relation can't have any defaults or constraints yet; they have to be
3439  * added in later steps, because they require additions to multiple system
3440  * catalogs. We can copy attnotnull constraints here, however.
3441  */
3442  rel->rd_att = CreateTupleDescCopy(tupDesc);
3443  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3444  has_not_null = false;
3445  for (i = 0; i < natts; i++)
3446  {
3447  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3448  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3449 
3450  datt->attidentity = satt->attidentity;
3451  datt->attgenerated = satt->attgenerated;
3452  datt->attnotnull = satt->attnotnull;
3453  has_not_null |= satt->attnotnull;
3454  }
3455 
3456  if (has_not_null)
3457  {
3458  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3459 
3460  constr->has_not_null = true;
3461  rel->rd_att->constr = constr;
3462  }
3463 
3464  /*
3465  * initialize relation tuple form (caller may add/override data later)
3466  */
3468 
3469  namestrcpy(&rel->rd_rel->relname, relname);
3470  rel->rd_rel->relnamespace = relnamespace;
3471 
3472  rel->rd_rel->relkind = relkind;
3473  rel->rd_rel->relnatts = natts;
3474  rel->rd_rel->reltype = InvalidOid;
3475  /* needed when bootstrapping: */
3476  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3477 
3478  /* set up persistence and relcache fields dependent on it */
3479  rel->rd_rel->relpersistence = relpersistence;
3480  switch (relpersistence)
3481  {
3482  case RELPERSISTENCE_UNLOGGED:
3483  case RELPERSISTENCE_PERMANENT:
3485  rel->rd_islocaltemp = false;
3486  break;
3487  case RELPERSISTENCE_TEMP:
3488  Assert(isTempOrTempToastNamespace(relnamespace));
3490  rel->rd_islocaltemp = true;
3491  break;
3492  default:
3493  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3494  break;
3495  }
3496 
3497  /* if it's a materialized view, it's not populated initially */
3498  if (relkind == RELKIND_MATVIEW)
3499  rel->rd_rel->relispopulated = false;
3500  else
3501  rel->rd_rel->relispopulated = true;
3502 
3503  /* set replica identity -- system catalogs and non-tables don't have one */
3504  if (!IsCatalogNamespace(relnamespace) &&
3505  (relkind == RELKIND_RELATION ||
3506  relkind == RELKIND_MATVIEW ||
3507  relkind == RELKIND_PARTITIONED_TABLE))
3508  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3509  else
3510  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3511 
3512  /*
3513  * Insert relation physical and logical identifiers (OIDs) into the right
3514  * places. For a mapped relation, we set relfilenode to zero and rely on
3515  * RelationInitPhysicalAddr to consult the map.
3516  */
3517  rel->rd_rel->relisshared = shared_relation;
3518 
3519  RelationGetRelid(rel) = relid;
3520 
3521  for (i = 0; i < natts; i++)
3522  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3523 
3524  rel->rd_rel->reltablespace = reltablespace;
3525 
3526  if (mapped_relation)
3527  {
3528  rel->rd_rel->relfilenode = InvalidOid;
3529  /* Add it to the active mapping information */
3530  RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3531  }
3532  else
3533  rel->rd_rel->relfilenode = relfilenode;
3534 
3535  RelationInitLockInfo(rel); /* see lmgr.c */
3536 
3538 
3539  rel->rd_rel->relam = accessmtd;
3540 
3541  /*
3542  * RelationInitTableAccessMethod will do syscache lookups, so we mustn't
3543  * run it in CacheMemoryContext. Fortunately, the remaining steps don't
3544  * require a long-lived current context.
3545  */
3546  MemoryContextSwitchTo(oldcxt);
3547 
3548  if (relkind == RELKIND_RELATION ||
3549  relkind == RELKIND_SEQUENCE ||
3550  relkind == RELKIND_TOASTVALUE ||
3551  relkind == RELKIND_MATVIEW)
3553 
3554  /*
3555  * Okay to insert into the relcache hash table.
3556  *
3557  * Ordinarily, there should certainly not be an existing hash entry for
3558  * the same OID; but during bootstrap, when we create a "real" relcache
3559  * entry for one of the bootstrap relations, we'll be overwriting the
3560  * phony one created with formrdesc. So allow that to happen for nailed
3561  * rels.
3562  */
3563  RelationCacheInsert(rel, nailit);
3564 
3565  /*
3566  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3567  * can't do this before storing relid in it.
3568  */
3569  EOXactListAdd(rel);
3570 
3571  /* It's fully valid */
3572  rel->rd_isvalid = true;
3573 
3574  /*
3575  * Caller expects us to pin the returned entry.
3576  */
3578 
3579  return rel;
3580 }
3581 
3582 
3583 /*
3584  * RelationSetNewRelfilenode
3585  *
3586  * Assign a new relfilenode (physical file name), and possibly a new
3587  * persistence setting, to the relation.
3588  *
3589  * This allows a full rewrite of the relation to be done with transactional
3590  * safety (since the filenode assignment can be rolled back). Note however
3591  * that there is no simple way to access the relation's old data for the
3592  * remainder of the current transaction. This limits the usefulness to cases
3593  * such as TRUNCATE or rebuilding an index from scratch.
3594  *
3595  * Caller must already hold exclusive lock on the relation.
3596  */
3597 void
3598 RelationSetNewRelfilenode(Relation relation, char persistence)
3599 {
3600  Oid newrelfilenode;
3601  Relation pg_class;
3602  HeapTuple tuple;
3603  Form_pg_class classform;
3604  MultiXactId minmulti = InvalidMultiXactId;
3605  TransactionId freezeXid = InvalidTransactionId;
3606  RelFileNode newrnode;
3607 
3608  /* Allocate a new relfilenode */
3609  newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3610  persistence);
3611 
3612  /*
3613  * Get a writable copy of the pg_class tuple for the given relation.
3614  */
3615  pg_class = table_open(RelationRelationId, RowExclusiveLock);
3616 
3617  tuple = SearchSysCacheCopy1(RELOID,
3618  ObjectIdGetDatum(RelationGetRelid(relation)));
3619  if (!HeapTupleIsValid(tuple))
3620  elog(ERROR, "could not find tuple for relation %u",
3621  RelationGetRelid(relation));
3622  classform = (Form_pg_class) GETSTRUCT(tuple);
3623 
3624  /*
3625  * Schedule unlinking of the old storage at transaction commit.
3626  */
3627  RelationDropStorage(relation);
3628 
3629  /*
3630  * Create storage for the main fork of the new relfilenode. If it's a
3631  * table-like object, call into the table AM to do so, which'll also
3632  * create the table's init fork if needed.
3633  *
3634  * NOTE: If relevant for the AM, any conflict in relfilenode value will be
3635  * caught here, if GetNewRelFileNode messes up for any reason.
3636  */
3637  newrnode = relation->rd_node;
3638  newrnode.relNode = newrelfilenode;
3639 
3640  switch (relation->rd_rel->relkind)
3641  {
3642  case RELKIND_INDEX:
3643  case RELKIND_SEQUENCE:
3644  {
3645  /* handle these directly, at least for now */
3646  SMgrRelation srel;
3647 
3648  srel = RelationCreateStorage(newrnode, persistence);
3649  smgrclose(srel);
3650  }
3651  break;
3652 
3653  case RELKIND_RELATION:
3654  case RELKIND_TOASTVALUE:
3655  case RELKIND_MATVIEW:
3656  table_relation_set_new_filenode(relation, &newrnode,
3657  persistence,
3658  &freezeXid, &minmulti);
3659  break;
3660 
3661  default:
3662  /* we shouldn't be called for anything else */
3663  elog(ERROR, "relation \"%s\" does not have storage",
3664  RelationGetRelationName(relation));
3665  break;
3666  }
3667 
3668  /*
3669  * If we're dealing with a mapped index, pg_class.relfilenode doesn't
3670  * change; instead we have to send the update to the relation mapper.
3671  *
3672  * For mapped indexes, we don't actually change the pg_class entry at all;
3673  * this is essential when reindexing pg_class itself. That leaves us with
3674  * possibly-inaccurate values of relpages etc, but those will be fixed up
3675  * later.
3676  */
3677  if (RelationIsMapped(relation))
3678  {
3679  /* This case is only supported for indexes */
3680  Assert(relation->rd_rel->relkind == RELKIND_INDEX);
3681 
3682  /* Since we're not updating pg_class, these had better not change */
3683  Assert(classform->relfrozenxid == freezeXid);
3684  Assert(classform->relminmxid == minmulti);
3685  Assert(classform->relpersistence == persistence);
3686 
3687  /*
3688  * In some code paths it's possible that the tuple update we'd
3689  * otherwise do here is the only thing that would assign an XID for
3690  * the current transaction. However, we must have an XID to delete
3691  * files, so make sure one is assigned.
3692  */
3693  (void) GetCurrentTransactionId();
3694 
3695  /* Do the deed */
3697  newrelfilenode,
3698  relation->rd_rel->relisshared,
3699  false);
3700 
3701  /* Since we're not updating pg_class, must trigger inval manually */
3702  CacheInvalidateRelcache(relation);
3703  }
3704  else
3705  {
3706  /* Normal case, update the pg_class entry */
3707  classform->relfilenode = newrelfilenode;
3708 
3709  /* relpages etc. never change for sequences */
3710  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3711  {
3712  classform->relpages = 0; /* it's empty until further notice */
3713  classform->reltuples = -1;
3714  classform->relallvisible = 0;
3715  }
3716  classform->relfrozenxid = freezeXid;
3717  classform->relminmxid = minmulti;
3718  classform->relpersistence = persistence;
3719 
3720  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3721  }
3722 
3723  heap_freetuple(tuple);
3724 
3725  table_close(pg_class, RowExclusiveLock);
3726 
3727  /*
3728  * Make the pg_class row change or relation map change visible. This will
3729  * cause the relcache entry to get updated, too.
3730  */
3732 
3733  RelationAssumeNewRelfilenode(relation);
3734 }
3735 
3736 /*
3737  * RelationAssumeNewRelfilenode
3738  *
3739  * Code that modifies pg_class.reltablespace or pg_class.relfilenode must call
3740  * this. The call shall precede any code that might insert WAL records whose
3741  * replay would modify bytes in the new RelFileNode, and the call shall follow
3742  * any WAL modifying bytes in the prior RelFileNode. See struct RelationData.
3743  * Ideally, call this as near as possible to the CommandCounterIncrement()
3744  * that makes the pg_class change visible (before it or after it); that
3745  * minimizes the chance of future development adding a forbidden WAL insertion
3746  * between RelationAssumeNewRelfilenode() and CommandCounterIncrement().
3747  */
3748 void
3750 {
3753  relation->rd_firstRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
3754 
3755  /* Flag relation as needing eoxact cleanup (to clear these fields) */
3756  EOXactListAdd(relation);
3757 }
3758 
3759 
3760 /*
3761  * RelationCacheInitialize
3762  *
3763  * This initializes the relation descriptor cache. At the time
3764  * that this is invoked, we can't do database access yet (mainly
3765  * because the transaction subsystem is not up); all we are doing
3766  * is making an empty cache hashtable. This must be done before
3767  * starting the initialization transaction, because otherwise
3768  * AtEOXact_RelationCache would crash if that transaction aborts
3769  * before we can get the relcache set up.
3770  */
3771 
3772 #define INITRELCACHESIZE 400
3773 
3774 void
3776 {
3777  HASHCTL ctl;
3778 
3779  /*
3780  * make sure cache memory context exists
3781  */
3782  if (!CacheMemoryContext)
3784 
3785  /*
3786  * create hashtable that indexes the relcache
3787  */
3788  ctl.keysize = sizeof(Oid);
3789  ctl.entrysize = sizeof(RelIdCacheEnt);
3790  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3791  &ctl, HASH_ELEM | HASH_BLOBS);
3792 
3793  /*
3794  * relation mapper needs to be initialized too
3795  */
3797 }
3798 
3799 /*
3800  * RelationCacheInitializePhase2
3801  *
3802  * This is called to prepare for access to shared catalogs during startup.
3803  * We must at least set up nailed reldescs for pg_database, pg_authid,
3804  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3805  * for their indexes, too. We attempt to load this information from the
3806  * shared relcache init file. If that's missing or broken, just make
3807  * phony entries for the catalogs themselves.
3808  * RelationCacheInitializePhase3 will clean up as needed.
3809  */
3810 void
3812 {
3813  MemoryContext oldcxt;
3814 
3815  /*
3816  * relation mapper needs initialized too
3817  */
3819 
3820  /*
3821  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3822  * nothing.
3823  */
3825  return;
3826 
3827  /*
3828  * switch to cache memory context
3829  */
3831 
3832  /*
3833  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3834  * the cache with pre-made descriptors for the critical shared catalogs.
3835  */
3836  if (!load_relcache_init_file(true))
3837  {
3838  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3839  Natts_pg_database, Desc_pg_database);
3840  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3841  Natts_pg_authid, Desc_pg_authid);
3842  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3843  Natts_pg_auth_members, Desc_pg_auth_members);
3844  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3845  Natts_pg_shseclabel, Desc_pg_shseclabel);
3846  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3847  Natts_pg_subscription, Desc_pg_subscription);
3848 
3849 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3850  }
3851 
3852  MemoryContextSwitchTo(oldcxt);
3853 }
3854 
3855 /*
3856  * RelationCacheInitializePhase3
3857  *
3858  * This is called as soon as the catcache and transaction system
3859  * are functional and we have determined MyDatabaseId. At this point
3860  * we can actually read data from the database's system catalogs.
3861  * We first try to read pre-computed relcache entries from the local
3862  * relcache init file. If that's missing or broken, make phony entries
3863  * for the minimum set of nailed-in-cache relations. Then (unless
3864  * bootstrapping) make sure we have entries for the critical system
3865  * indexes. Once we've done all this, we have enough infrastructure to
3866  * open any system catalog or use any catcache. The last step is to
3867  * rewrite the cache files if needed.
3868  */
3869 void
3871 {
3873  RelIdCacheEnt *idhentry;
3874  MemoryContext oldcxt;
3875  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3876 
3877  /*
3878  * relation mapper needs initialized too
3879  */
3881 
3882  /*
3883  * switch to cache memory context
3884  */
3886 
3887  /*
3888  * Try to load the local relcache cache file. If unsuccessful, bootstrap
3889  * the cache with pre-made descriptors for the critical "nailed-in" system
3890  * catalogs.
3891  */
3892  if (IsBootstrapProcessingMode() ||
3893  !load_relcache_init_file(false))
3894  {
3895  needNewCacheFile = true;
3896 
3897  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3898  Natts_pg_class, Desc_pg_class);
3899  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3900  Natts_pg_attribute, Desc_pg_attribute);
3901  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3902  Natts_pg_proc, Desc_pg_proc);
3903  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3904  Natts_pg_type, Desc_pg_type);
3905 
3906 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3907  }
3908 
3909  MemoryContextSwitchTo(oldcxt);
3910 
3911  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3913  return;
3914 
3915  /*
3916  * If we didn't get the critical system indexes loaded into relcache, do
3917  * so now. These are critical because the catcache and/or opclass cache
3918  * depend on them for fetches done during relcache load. Thus, we have an
3919  * infinite-recursion problem. We can break the recursion by doing
3920  * heapscans instead of indexscans at certain key spots. To avoid hobbling
3921  * performance, we only want to do that until we have the critical indexes
3922  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3923  * decide whether to do heapscan or indexscan at the key spots, and we set
3924  * it true after we've loaded the critical indexes.
3925  *
3926  * The critical indexes are marked as "nailed in cache", partly to make it
3927  * easy for load_relcache_init_file to count them, but mainly because we
3928  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3929  * true. (NOTE: perhaps it would be possible to reload them by
3930  * temporarily setting criticalRelcachesBuilt to false again. For now,
3931  * though, we just nail 'em in.)
3932  *
3933  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3934  * in the same way as the others, because the critical catalogs don't
3935  * (currently) have any rules or triggers, and so these indexes can be
3936  * rebuilt without inducing recursion. However they are used during
3937  * relcache load when a rel does have rules or triggers, so we choose to
3938  * nail them for performance reasons.
3939  */
3941  {
3942  load_critical_index(ClassOidIndexId,
3943  RelationRelationId);
3944  load_critical_index(AttributeRelidNumIndexId,
3945  AttributeRelationId);
3946  load_critical_index(IndexRelidIndexId,
3947  IndexRelationId);
3948  load_critical_index(OpclassOidIndexId,
3949  OperatorClassRelationId);
3950  load_critical_index(AccessMethodProcedureIndexId,
3951  AccessMethodProcedureRelationId);
3952  load_critical_index(RewriteRelRulenameIndexId,
3953  RewriteRelationId);
3954  load_critical_index(TriggerRelidNameIndexId,
3955  TriggerRelationId);
3956 
3957 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3958 
3959  criticalRelcachesBuilt = true;
3960  }
3961 
3962  /*
3963  * Process critical shared indexes too.
3964  *
3965  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3966  * initial lookup of MyDatabaseId, without which we'll never find any
3967  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3968  * database OID, so it instead depends on DatabaseOidIndexId. We also
3969  * need to nail up some indexes on pg_authid and pg_auth_members for use
3970  * during client authentication. SharedSecLabelObjectIndexId isn't
3971  * critical for the core system, but authentication hooks might be
3972  * interested in it.
3973  */
3975  {
3976  load_critical_index(DatabaseNameIndexId,
3977  DatabaseRelationId);
3978  load_critical_index(DatabaseOidIndexId,
3979  DatabaseRelationId);
3980  load_critical_index(AuthIdRolnameIndexId,
3981  AuthIdRelationId);
3982  load_critical_index(AuthIdOidIndexId,
3983  AuthIdRelationId);
3984  load_critical_index(AuthMemMemRoleIndexId,
3985  AuthMemRelationId);
3986  load_critical_index(SharedSecLabelObjectIndexId,
3987  SharedSecLabelRelationId);
3988 
3989 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3990 
3992  }
3993 
3994  /*
3995  * Now, scan all the relcache entries and update anything that might be
3996  * wrong in the results from formrdesc or the relcache cache file. If we
3997  * faked up relcache entries using formrdesc, then read the real pg_class
3998  * rows and replace the fake entries with them. Also, if any of the
3999  * relcache entries have rules, triggers, or security policies, load that
4000  * info the hard way since it isn't recorded in the cache file.
4001  *
4002  * Whenever we access the catalogs to read data, there is a possibility of
4003  * a shared-inval cache flush causing relcache entries to be removed.
4004  * Since hash_seq_search only guarantees to still work after the *current*
4005  * entry is removed, it's unsafe to continue the hashtable scan afterward.
4006  * We handle this by restarting the scan from scratch after each access.
4007  * This is theoretically O(N^2), but the number of entries that actually
4008  * need to be fixed is small enough that it doesn't matter.
4009  */
4010  hash_seq_init(&status, RelationIdCache);
4011 
4012  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
4013  {
4014  Relation relation = idhentry->reldesc;
4015  bool restart = false;
4016 
4017  /*
4018  * Make sure *this* entry doesn't get flushed while we work with it.
4019  */
4021 
4022  /*
4023  * If it's a faked-up entry, read the real pg_class tuple.
4024  */
4025  if (relation->rd_rel->relowner == InvalidOid)
4026  {
4027  HeapTuple htup;
4028  Form_pg_class relp;
4029 
4030  htup = SearchSysCache1(RELOID,
4031  ObjectIdGetDatum(RelationGetRelid(relation)));
4032  if (!HeapTupleIsValid(htup))
4033  elog(FATAL, "cache lookup failed for relation %u",
4034  RelationGetRelid(relation));
4035  relp = (Form_pg_class) GETSTRUCT(htup);
4036 
4037  /*
4038  * Copy tuple to relation->rd_rel. (See notes in
4039  * AllocateRelationDesc())
4040  */
4041  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
4042 
4043  /* Update rd_options while we have the tuple */
4044  if (relation->rd_options)
4045  pfree(relation->rd_options);
4046  RelationParseRelOptions(relation, htup);
4047 
4048  /*
4049  * Check the values in rd_att were set up correctly. (We cannot
4050  * just copy them over now: formrdesc must have set up the rd_att
4051  * data correctly to start with, because it may already have been
4052  * copied into one or more catcache entries.)
4053  */
4054  Assert(relation->rd_att->tdtypeid == relp->reltype);
4055  Assert(relation->rd_att->tdtypmod == -1);
4056 
4057  ReleaseSysCache(htup);
4058 
4059  /* relowner had better be OK now, else we'll loop forever */
4060  if (relation->rd_rel->relowner == InvalidOid)
4061  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
4062  RelationGetRelationName(relation));
4063 
4064  restart = true;
4065  }
4066 
4067  /*
4068  * Fix data that isn't saved in relcache cache file.
4069  *
4070  * relhasrules or relhastriggers could possibly be wrong or out of
4071  * date. If we don't actually find any rules or triggers, clear the
4072  * local copy of the flag so that we don't get into an infinite loop
4073  * here. We don't make any attempt to fix the pg_class entry, though.
4074  */
4075  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
4076  {
4077  RelationBuildRuleLock(relation);
4078  if (relation->rd_rules == NULL)
4079  relation->rd_rel->relhasrules = false;
4080  restart = true;
4081  }
4082  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
4083  {
4084  RelationBuildTriggers(relation);
4085  if (relation->trigdesc == NULL)
4086  relation->rd_rel->relhastriggers = false;
4087  restart = true;
4088  }
4089 
4090  /*
4091  * Re-load the row security policies if the relation has them, since
4092  * they are not preserved in the cache. Note that we can never NOT
4093  * have a policy while relrowsecurity is true,
4094  * RelationBuildRowSecurity will create a single default-deny policy
4095  * if there is no policy defined in pg_policy.
4096  */
4097  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
4098  {
4099  RelationBuildRowSecurity(relation);
4100 
4101  Assert(relation->rd_rsdesc != NULL);
4102  restart = true;
4103  }
4104 
4105  /* Reload tableam data if needed */
4106  if (relation->rd_tableam == NULL &&
4107  (relation->rd_rel->relkind == RELKIND_RELATION ||
4108  relation->rd_rel->relkind == RELKIND_SEQUENCE ||
4109  relation->rd_rel->relkind == RELKIND_TOASTVALUE ||
4110  relation->rd_rel->relkind == RELKIND_MATVIEW))
4111  {
4113  Assert(relation->rd_tableam != NULL);
4114 
4115  restart = true;
4116  }
4117 
4118  /* Release hold on the relation */
4120 
4121  /* Now, restart the hashtable scan if needed */
4122  if (restart)
4123  {
4124  hash_seq_term(&status);
4125  hash_seq_init(&status, RelationIdCache);
4126  }
4127  }
4128 
4129  /*
4130  * Lastly, write out new relcache cache files if needed. We don't bother
4131  * to distinguish cases where only one of the two needs an update.
4132  */
4133  if (needNewCacheFile)
4134  {
4135  /*
4136  * Force all the catcaches to finish initializing and thereby open the
4137  * catalogs and indexes they use. This will preload the relcache with
4138  * entries for all the most important system catalogs and indexes, so
4139  * that the init files will be most useful for future backends.
4140  */
4142 
4143  /* now write the files */
4145  write_relcache_init_file(false);
4146  }
4147 }
4148 
4149 /*
4150  * Load one critical system index into the relcache
4151  *
4152  * indexoid is the OID of the target index, heapoid is the OID of the catalog
4153  * it belongs to.
4154  */
4155 static void
4156 load_critical_index(Oid indexoid, Oid heapoid)
4157 {
4158  Relation ird;
4159 
4160  /*
4161  * We must lock the underlying catalog before locking the index to avoid
4162  * deadlock, since RelationBuildDesc might well need to read the catalog,
4163  * and if anyone else is exclusive-locking this catalog and index they'll
4164  * be doing it in that order.
4165  */
4166  LockRelationOid(heapoid, AccessShareLock);
4167  LockRelationOid(indexoid, AccessShareLock);
4168  ird = RelationBuildDesc(indexoid, true);
4169  if (ird == NULL)
4170  elog(PANIC, "could not open critical system index %u", indexoid);
4171  ird->rd_isnailed = true;
4172  ird->rd_refcnt = 1;
4175 
4176  (void) RelationGetIndexAttOptions(ird, false);
4177 }
4178 
4179 /*
4180  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
4181  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
4182  *
4183  * We need this kluge because we have to be able to access non-fixed-width
4184  * fields of pg_class and pg_index before we have the standard catalog caches
4185  * available. We use predefined data that's set up in just the same way as
4186  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
4187  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
4188  * does it have a TupleConstr field. But it's good enough for the purpose of
4189  * extracting fields.
4190  */
4191 static TupleDesc
4193 {
4194  TupleDesc result;
4195  MemoryContext oldcxt;
4196  int i;
4197 
4199 
4200  result = CreateTemplateTupleDesc(natts);
4201  result->tdtypeid = RECORDOID; /* not right, but we don't care */
4202  result->tdtypmod = -1;
4203 
4204  for (i = 0; i < natts; i++)
4205  {
4206  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
4207  /* make sure attcacheoff is valid */
4208  TupleDescAttr(result, i)->attcacheoff = -1;
4209  }
4210 
4211  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
4212  TupleDescAttr(result, 0)->attcacheoff = 0;
4213 
4214  /* Note: we don't bother to set up a TupleConstr entry */
4215 
4216  MemoryContextSwitchTo(oldcxt);
4217 
4218  return result;
4219 }
4220 
4221 static TupleDesc
4223 {
4224  static TupleDesc pgclassdesc = NULL;
4225 
4226  /* Already done? */
4227  if (pgclassdesc == NULL)
4228  pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
4229  Desc_pg_class);
4230 
4231  return pgclassdesc;
4232 }
4233 
4234 static TupleDesc
4236 {
4237  static TupleDesc pgindexdesc = NULL;
4238 
4239  /* Already done? */
4240  if (pgindexdesc == NULL)
4241  pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4242  Desc_pg_index);
4243 
4244  return pgindexdesc;
4245 }
4246 
4247 /*
4248  * Load any default attribute value definitions for the relation.
4249  *
4250  * ndef is the number of attributes that were marked atthasdef.
4251  *
4252  * Note: we don't make it a hard error to be missing some pg_attrdef records.
4253  * We can limp along as long as nothing needs to use the default value. Code
4254  * that fails to find an expected AttrDefault record should throw an error.
4255  */
4256 static void
4257 AttrDefaultFetch(Relation relation, int ndef)
4258 {
4259  AttrDefault *attrdef;
4260  Relation adrel;
4261  SysScanDesc adscan;
4262  ScanKeyData skey;
4263  HeapTuple htup;
4264  int found = 0;
4265 
4266  /* Allocate array with room for as many entries as expected */
4267  attrdef = (AttrDefault *)
4269  ndef * sizeof(AttrDefault));
4270 
4271  /* Search pg_attrdef for relevant entries */
4272  ScanKeyInit(&skey,
4273  Anum_pg_attrdef_adrelid,
4274  BTEqualStrategyNumber, F_OIDEQ,
4275  ObjectIdGetDatum(RelationGetRelid(relation)));
4276 
4277  adrel = table_open(AttrDefaultRelationId, AccessShareLock);
4278  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4279  NULL, 1, &skey);
4280 
4281  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4282  {
4283  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4284  Datum val;
4285  bool isnull;
4286 
4287  /* protect limited size of array */
4288  if (found >= ndef)
4289  {
4290  elog(WARNING, "unexpected pg_attrdef record found for attribute %d of relation \"%s\"",
4291  adform->adnum, RelationGetRelationName(relation));
4292  break;
4293  }
4294 
4295  val = fastgetattr(htup,
4296  Anum_pg_attrdef_adbin,
4297  adrel->rd_att, &isnull);
4298  if (isnull)
4299  elog(WARNING, "null adbin for attribute %d of relation \"%s\"",
4300  adform->adnum, RelationGetRelationName(relation));
4301  else
4302  {
4303  /* detoast and convert to cstring in caller's context */
4304  char *s = TextDatumGetCString(val);
4305 
4306  attrdef[found].adnum = adform->adnum;
4307  attrdef[found].adbin = MemoryContextStrdup(CacheMemoryContext, s);
4308  pfree(s);
4309  found++;
4310  }
4311  }
4312 
4313  systable_endscan(adscan);
4314  table_close(adrel, AccessShareLock);
4315 
4316  if (found != ndef)
4317  elog(WARNING, "%d pg_attrdef record(s) missing for relation \"%s\"",
4318  ndef - found, RelationGetRelationName(relation));
4319 
4320  /*
4321  * Sort the AttrDefault entries by adnum, for the convenience of
4322  * equalTupleDescs(). (Usually, they already will be in order, but this
4323  * might not be so if systable_getnext isn't using an index.)
4324  */
4325  if (found > 1)
4326  qsort(attrdef, found, sizeof(AttrDefault), AttrDefaultCmp);
4327 
4328  /* Install array only after it's fully valid */
4329  relation->rd_att->constr->defval = attrdef;
4330  relation->rd_att->constr->num_defval = found;
4331 }
4332 
4333 /*
4334  * qsort comparator to sort AttrDefault entries by adnum
4335  */
4336 static int
4337 AttrDefaultCmp(const void *a, const void *b)
4338 {
4339  const AttrDefault *ada = (const AttrDefault *) a;
4340  const AttrDefault *adb = (const AttrDefault *) b;
4341 
4342  return ada->adnum - adb->adnum;
4343 }
4344 
4345 /*
4346  * Load any check constraints for the relation.
4347  *
4348  * As with defaults, if we don't find the expected number of them, just warn
4349  * here. The executor should throw an error if an INSERT/UPDATE is attempted.
4350  */
4351 static void
4353 {
4354  ConstrCheck *check;
4355  int ncheck = relation->rd_rel->relchecks;
4356  Relation conrel;
4357  SysScanDesc conscan;
4358  ScanKeyData skey[1];
4359  HeapTuple htup;
4360  int found = 0;
4361 
4362  /* Allocate array with room for as many entries as expected */
4363  check = (ConstrCheck *)
4365  ncheck * sizeof(ConstrCheck));
4366 
4367  /* Search pg_constraint for relevant entries */
4368  ScanKeyInit(&skey[0],
4369  Anum_pg_constraint_conrelid,
4370  BTEqualStrategyNumber, F_OIDEQ,
4371  ObjectIdGetDatum(RelationGetRelid(relation)));
4372 
4373  conrel = table_open(ConstraintRelationId, AccessShareLock);
4374  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4375  NULL, 1, skey);
4376 
4377  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4378  {
4380  Datum val;
4381  bool isnull;
4382 
4383  /* We want check constraints only */
4384  if (conform->contype != CONSTRAINT_CHECK)
4385  continue;
4386 
4387  /* protect limited size of array */
4388  if (found >= ncheck)
4389  {
4390  elog(WARNING, "unexpected pg_constraint record found for relation \"%s\"",
4391  RelationGetRelationName(relation));
4392  break;
4393  }
4394 
4395  check[found].ccvalid = conform->convalidated;
4396  check[found].ccnoinherit = conform->connoinherit;
4398  NameStr(conform->conname));
4399 
4400  /* Grab and test conbin is actually set */
4401  val = fastgetattr(htup,
4402  Anum_pg_constraint_conbin,
4403  conrel->rd_att, &isnull);
4404  if (isnull)
4405  elog(WARNING, "null conbin for relation \"%s\"",
4406  RelationGetRelationName(relation));
4407  else
4408  {
4409  /* detoast and convert to cstring in caller's context */
4410  char *s = TextDatumGetCString(val);
4411 
4412  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4413  pfree(s);
4414  found++;
4415  }
4416  }
4417 
4418  systable_endscan(conscan);
4419  table_close(conrel, AccessShareLock);
4420 
4421  if (found != ncheck)
4422  elog(WARNING, "%d pg_constraint record(s) missing for relation \"%s\"",
4423  ncheck - found, RelationGetRelationName(relation));
4424 
4425  /*
4426  * Sort the records by name. This ensures that CHECKs are applied in a
4427  * deterministic order, and it also makes equalTupleDescs() faster.
4428  */
4429  if (found > 1)
4430  qsort(check, found, sizeof(ConstrCheck), CheckConstraintCmp);
4431 
4432  /* Install array only after it's fully valid */
4433  relation->rd_att->constr->check = check;
4434  relation->rd_att->constr->num_check = found;
4435 }
4436 
4437 /*
4438  * qsort comparator to sort ConstrCheck entries by name
4439  */
4440 static int
4441 CheckConstraintCmp(const void *a, const void *b)
4442 {
4443  const ConstrCheck *ca = (const ConstrCheck *) a;
4444  const ConstrCheck *cb = (const ConstrCheck *) b;
4445 
4446  return strcmp(ca->ccname, cb->ccname);
4447 }
4448 
4449 /*
4450  * RelationGetFKeyList -- get a list of foreign key info for the relation
4451  *
4452  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4453  * the given relation. This data is a direct copy of relevant fields from
4454  * pg_constraint. The list items are in no particular order.
4455  *
4456  * CAUTION: the returned list is part of the relcache's data, and could
4457  * vanish in a relcache entry reset. Callers must inspect or copy it
4458  * before doing anything that might trigger a cache flush, such as
4459  * system catalog accesses. copyObject() can be used if desired.
4460  * (We define it this way because current callers want to filter and
4461  * modify the list entries anyway, so copying would be a waste of time.)
4462  */
4463 List *
4465 {
4466  List *result;
4467  Relation conrel;
4468  SysScanDesc conscan;
4469  ScanKeyData skey;
4470  HeapTuple htup;
4471  List *oldlist;
4472  MemoryContext oldcxt;
4473 
4474  /* Quick exit if we already computed the list. */
4475  if (relation->rd_fkeyvalid)
4476  return relation->rd_fkeylist;
4477 
4478  /* Fast path: non-partitioned tables without triggers can't have FKs */
4479  if (!relation->rd_rel->relhastriggers &&
4480  relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
4481  return NIL;
4482 
4483  /*
4484  * We build the list we intend to return (in the caller's context) while
4485  * doing the scan. After successfully completing the scan, we copy that
4486  * list into the relcache entry. This avoids cache-context memory leakage
4487  * if we get some sort of error partway through.
4488  */
4489  result = NIL;
4490 
4491  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4492  ScanKeyInit(&skey,
4493  Anum_pg_constraint_conrelid,
4494  BTEqualStrategyNumber, F_OIDEQ,
4495  ObjectIdGetDatum(RelationGetRelid(relation)));
4496 
4497  conrel = table_open(ConstraintRelationId, AccessShareLock);
4498  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4499  NULL, 1, &skey);
4500 
4501  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4502  {
4503  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4504  ForeignKeyCacheInfo *info;
4505 
4506  /* consider only foreign keys */
4507  if (constraint->contype != CONSTRAINT_FOREIGN)
4508  continue;
4509 
4510  info = makeNode(ForeignKeyCacheInfo);
4511  info->conoid = constraint->oid;
4512  info->conrelid = constraint->conrelid;
4513  info->confrelid = constraint->confrelid;
4514 
4515  DeconstructFkConstraintRow(htup, &info->nkeys,
4516  info->conkey,
4517  info->confkey,
4518  info->conpfeqop,
4519  NULL, NULL);
4520 
4521  /* Add FK's node to the result list */
4522  result = lappend(result, info);
4523  }
4524 
4525  systable_endscan(conscan);
4526  table_close(conrel, AccessShareLock);
4527 
4528  /* Now save a copy of the completed list in the relcache entry. */
4530  oldlist = relation->rd_fkeylist;
4531  relation->rd_fkeylist = copyObject(result);
4532  relation->rd_fkeyvalid = true;
4533  MemoryContextSwitchTo(oldcxt);
4534 
4535  /* Don't leak the old list, if there is one */
4536  list_free_deep(oldlist);
4537 
4538  return result;
4539 }
4540 
4541 /*
4542  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4543  *
4544  * The index list is created only if someone requests it. We scan pg_index
4545  * to find relevant indexes, and add the list to the relcache entry so that
4546  * we won't have to compute it again. Note that shared cache inval of a
4547  * relcache entry will delete the old list and set rd_indexvalid to false,
4548  * so that we must recompute the index list on next request. This handles
4549  * creation or deletion of an index.
4550  *
4551  * Indexes that are marked not indislive are omitted from the returned list.
4552  * Such indexes are expected to be dropped momentarily, and should not be
4553  * touched at all by any caller of this function.
4554  *
4555  * The returned list is guaranteed to be sorted in order by OID. This is
4556  * needed by the executor, since for index types that we obtain exclusive
4557  * locks on when updating the index, all backends must lock the indexes in
4558  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4559  * consistent ordering would do, but ordering by OID is easy.
4560  *
4561  * Since shared cache inval causes the relcache's copy of the list to go away,
4562  * we return a copy of the list palloc'd in the caller's context. The caller
4563  * may list_free() the returned list after scanning it. This is necessary
4564  * since the caller will typically be doing syscache lookups on the relevant
4565  * indexes, and syscache lookup could cause SI messages to be processed!
4566  *
4567  * In exactly the same way, we update rd_pkindex, which is the OID of the
4568  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4569  * which is the pg_class OID of an index to be used as the relation's
4570  * replication identity index, or InvalidOid if there is no such index.
4571  */
4572 List *
4574 {
4575  Relation indrel;
4576  SysScanDesc indscan;
4577  ScanKeyData skey;
4578  HeapTuple htup;
4579  List *result;
4580  List *oldlist;
4581  char replident = relation->rd_rel->relreplident;
4582  Oid pkeyIndex = InvalidOid;
4583  Oid candidateIndex = InvalidOid;
4584  MemoryContext oldcxt;
4585 
4586  /* Quick exit if we already computed the list. */
4587  if (relation->rd_indexvalid)
4588  return list_copy(relation->rd_indexlist);
4589 
4590  /*
4591  * We build the list we intend to return (in the caller's context) while
4592  * doing the scan. After successfully completing the scan, we copy that
4593  * list into the relcache entry. This avoids cache-context memory leakage
4594  * if we get some sort of error partway through.
4595  */
4596  result = NIL;
4597 
4598  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4599  ScanKeyInit(&skey,
4600  Anum_pg_index_indrelid,
4601  BTEqualStrategyNumber, F_OIDEQ,
4602  ObjectIdGetDatum(RelationGetRelid(relation)));
4603 
4604  indrel = table_open(IndexRelationId, AccessShareLock);
4605  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4606  NULL, 1, &skey);
4607 
4608  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4609  {
4611 
4612  /*
4613  * Ignore any indexes that are currently being dropped. This will
4614  * prevent them from being searched, inserted into, or considered in
4615  * HOT-safety decisions. It's unsafe to touch such an index at all
4616  * since its catalog entries could disappear at any instant.
4617  */
4618  if (!index->indislive)
4619  continue;
4620 
4621  /* add index's OID to result list */
4622  result = lappend_oid(result, index->indexrelid);
4623 
4624  /*
4625  * Invalid, non-unique, non-immediate or predicate indexes aren't
4626  * interesting for either oid indexes or replication identity indexes,
4627  * so don't check them.
4628  */
4629  if (!index->indisvalid || !index->indisunique ||
4630  !index->indimmediate ||
4631  !heap_attisnull(htup, Anum_pg_index_indpred, NULL))
4632  continue;
4633 
4634  /* remember primary key index if any */
4635  if (index->indisprimary)
4636  pkeyIndex = index->indexrelid;
4637 
4638  /* remember explicitly chosen replica index */
4639  if (index->indisreplident)
4640  candidateIndex = index->indexrelid;
4641  }
4642 
4643  systable_endscan(indscan);
4644 
4645  table_close(indrel, AccessShareLock);
4646 
4647  /* Sort the result list into OID order, per API spec. */
4648  list_sort(result, list_oid_cmp);
4649 
4650  /* Now save a copy of the completed list in the relcache entry. */
4652  oldlist = relation->rd_indexlist;
4653  relation->rd_indexlist = list_copy(result);
4654  relation->rd_pkindex = pkeyIndex;
4655  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4656  relation->rd_replidindex = pkeyIndex;
4657  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4658  relation->rd_replidindex = candidateIndex;
4659  else
4660  relation->rd_replidindex = InvalidOid;
4661  relation->rd_indexvalid = true;
4662  MemoryContextSwitchTo(oldcxt);
4663 
4664  /* Don't leak the old list, if there is one */
4665  list_free(oldlist);
4666 
4667  return result;
4668 }
4669 
4670 /*
4671  * RelationGetStatExtList
4672  * get a list of OIDs of statistics objects on this relation
4673  *
4674  * The statistics list is created only if someone requests it, in a way
4675  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4676  * relevant statistics, and add the list to the relcache entry so that we
4677  * won't have to compute it again. Note that shared cache inval of a
4678  * relcache entry will delete the old list and set rd_statvalid to 0,
4679  * so that we must recompute the statistics list on next request. This
4680  * handles creation or deletion of a statistics object.
4681  *
4682  * The returned list is guaranteed to be sorted in order by OID, although
4683  * this is not currently needed.
4684  *
4685  * Since shared cache inval causes the relcache's copy of the list to go away,
4686  * we return a copy of the list palloc'd in the caller's context. The caller
4687  * may list_free() the returned list after scanning it. This is necessary
4688  * since the caller will typically be doing syscache lookups on the relevant
4689  * statistics, and syscache lookup could cause SI messages to be processed!
4690  */
4691 List *
4693 {
4694  Relation indrel;
4695  SysScanDesc indscan;
4696  ScanKeyData skey;
4697  HeapTuple htup;
4698  List *result;
4699  List *oldlist;
4700  MemoryContext oldcxt;
4701 
4702  /* Quick exit if we already computed the list. */
4703  if (relation->rd_statvalid != 0)
4704  return list_copy(relation->rd_statlist);
4705 
4706  /*
4707  * We build the list we intend to return (in the caller's context) while
4708  * doing the scan. After successfully completing the scan, we copy that
4709  * list into the relcache entry. This avoids cache-context memory leakage
4710  * if we get some sort of error partway through.
4711  */
4712  result = NIL;
4713 
4714  /*
4715  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4716  * rel.
4717  */
4718  ScanKeyInit(&skey,
4719  Anum_pg_statistic_ext_stxrelid,
4720  BTEqualStrategyNumber, F_OIDEQ,
4721  ObjectIdGetDatum(RelationGetRelid(relation)));
4722 
4723  indrel = table_open(StatisticExtRelationId, AccessShareLock);
4724  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4725  NULL, 1, &skey);
4726 
4727  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4728  {
4729  Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;
4730 
4731  result = lappend_oid(result, oid);
4732  }
4733 
4734  systable_endscan(indscan);
4735 
4736  table_close(indrel, AccessShareLock);
4737 
4738  /* Sort the result list into OID order, per API spec. */
4739  list_sort(result, list_oid_cmp);
4740 
4741  /* Now save a copy of the completed list in the relcache entry. */
4743  oldlist = relation->rd_statlist;
4744  relation->rd_statlist = list_copy(result);
4745 
4746  relation->rd_statvalid = true;
4747  MemoryContextSwitchTo(oldcxt);
4748 
4749  /* Don't leak the old list, if there is one */
4750  list_free(oldlist);
4751 
4752  return result;
4753 }
4754 
4755 /*
4756  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4757  *
4758  * Returns InvalidOid if there is no such index.
4759  */
4760 Oid
4762 {
4763  List *ilist;
4764 
4765  if (!relation->rd_indexvalid)
4766  {
4767  /* RelationGetIndexList does the heavy lifting. */
4768  ilist = RelationGetIndexList(relation);
4769  list_free(ilist);
4770  Assert(relation->rd_indexvalid);
4771  }
4772 
4773  return relation->rd_pkindex;
4774 }
4775 
4776 /*
4777  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4778  *
4779  * Returns InvalidOid if there is no such index.
4780  */
4781 Oid
4783 {
4784  List *ilist;
4785 
4786  if (!relation->rd_indexvalid)
4787  {
4788  /* RelationGetIndexList does the heavy lifting. */
4789  ilist = RelationGetIndexList(relation);
4790  list_free(ilist);
4791  Assert(relation->rd_indexvalid);
4792  }
4793 
4794  return relation->rd_replidindex;
4795 }
4796 
4797 /*
4798  * RelationGetIndexExpressions -- get the index expressions for an index
4799  *
4800  * We cache the result of transforming pg_index.indexprs into a node tree.
4801  * If the rel is not an index or has no expressional columns, we return NIL.
4802  * Otherwise, the returned tree is copied into the caller's memory context.
4803  * (We don't want to return a pointer to the relcache copy, since it could
4804  * disappear due to relcache invalidation.)
4805  */
4806 List *
4808 {
4809  List *result;
4810  Datum exprsDatum;
4811  bool isnull;
4812  char *exprsString;
4813  MemoryContext oldcxt;
4814 
4815  /* Quick exit if we already computed the result. */
4816  if (relation->rd_indexprs)
4817  return copyObject(relation->rd_indexprs);
4818 
4819  /* Quick exit if there is nothing to do. */
4820  if (relation->rd_indextuple == NULL ||
4821  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4822  return NIL;
4823 
4824  /*
4825  * We build the tree we intend to return in the caller's context. After
4826  * successfully completing the work, we copy it into the relcache entry.
4827  * This avoids problems if we get some sort of error partway through.
4828  */
4829  exprsDatum = heap_getattr(relation->rd_indextuple,
4830  Anum_pg_index_indexprs,
4832  &isnull);
4833  Assert(!isnull);
4834  exprsString = TextDatumGetCString(exprsDatum);
4835  result = (List *) stringToNode(exprsString);
4836  pfree(exprsString);
4837 
4838  /*
4839  * Run the expressions through eval_const_expressions. This is not just an
4840  * optimization, but is necessary, because the planner will be comparing
4841  * them to similarly-processed qual clauses, and may fail to detect valid
4842  * matches without this. We must not use canonicalize_qual, however,
4843  * since these aren't qual expressions.
4844  */
4845  result = (List *) eval_const_expressions(NULL, (Node *) result);
4846 
4847  /* May as well fix opfuncids too */
4848  fix_opfuncids((Node *) result);
4849 
4850  /* Now save a copy of the completed tree in the relcache entry. */
4851  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4852  relation->rd_indexprs = copyObject(result);
4853  MemoryContextSwitchTo(oldcxt);
4854 
4855  return result;
4856 }
4857 
4858 /*
4859  * RelationGetDummyIndexExpressions -- get dummy expressions for an index
4860  *
4861  * Return a list of dummy expressions (just Const nodes) with the same
4862  * types/typmods/collations as the index's real expressions. This is
4863  * useful in situations where we don't want to run any user-defined code.
4864  */
4865 List *
4867 {
4868  List *result;
4869  Datum exprsDatum;
4870  bool isnull;
4871  char *exprsString;
4872  List *rawExprs;
4873  ListCell *lc;
4874 
4875  /* Quick exit if there is nothing to do. */
4876  if (relation->rd_indextuple == NULL ||
4877  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4878  return NIL;
4879 
4880  /* Extract raw node tree(s) from index tuple. */
4881  exprsDatum = heap_getattr(relation->rd_indextuple,
4882  Anum_pg_index_indexprs,
4884  &isnull);
4885  Assert(!isnull);
4886  exprsString = TextDatumGetCString(exprsDatum);
4887  rawExprs = (List *) stringToNode(exprsString);
4888  pfree(exprsString);
4889 
4890  /* Construct null Consts; the typlen and typbyval are arbitrary. */
4891  result = NIL;
4892  foreach(lc, rawExprs)
4893  {
4894  Node *rawExpr = (Node *) lfirst(lc);
4895 
4896  result = lappend(result,
4897  makeConst(exprType(rawExpr),
4898  exprTypmod(rawExpr),
4899  exprCollation(rawExpr),
4900  1,
4901  (Datum) 0,
4902  true,
4903  true));
4904  }
4905 
4906  return result;
4907 }
4908 
4909 /*
4910  * RelationGetIndexPredicate -- get the index predicate for an index
4911  *
4912  * We cache the result of transforming pg_index.indpred into an implicit-AND
4913  * node tree (suitable for use in planning).
4914  * If the rel is not an index or has no predicate, we return NIL.
4915  * Otherwise, the returned tree is copied into the caller's memory context.
4916  * (We don't want to return a pointer to the relcache copy, since it could
4917  * disappear due to relcache invalidation.)
4918  */
4919 List *
4921 {
4922  List *result;
4923  Datum predDatum;
4924  bool isnull;
4925  char *predString;
4926  MemoryContext oldcxt;
4927 
4928  /* Quick exit if we already computed the result. */
4929  if (relation->rd_indpred)
4930  return copyObject(relation->rd_indpred);
4931 
4932  /* Quick exit if there is nothing to do. */
4933  if (relation->rd_indextuple == NULL ||
4934  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL))
4935  return NIL;
4936 
4937  /*
4938  * We build the tree we intend to return in the caller's context. After
4939  * successfully completing the work, we copy it into the relcache entry.
4940  * This avoids problems if we get some sort of error partway through.
4941  */
4942  predDatum = heap_getattr(relation->rd_indextuple,
4943  Anum_pg_index_indpred,
4945  &isnull);
4946  Assert(!isnull);
4947  predString = TextDatumGetCString(predDatum);
4948  result = (List *) stringToNode(predString);
4949  pfree(predString);
4950 
4951  /*
4952  * Run the expression through const-simplification and canonicalization.
4953  * This is not just an optimization, but is necessary, because the planner
4954  * will be comparing it to similarly-processed qual clauses, and may fail
4955  * to detect valid matches without this. This must match the processing
4956  * done to qual clauses in preprocess_expression()! (We can skip the
4957  * stuff involving subqueries, however, since we don't allow any in index
4958  * predicates.)
4959  */
4960  result = (List *) eval_const_expressions(NULL, (Node *) result);
4961 
4962  result = (List *) canonicalize_qual((Expr *) result, false);
4963 
4964  /* Also convert to implicit-AND format */
4965  result = make_ands_implicit((Expr *) result);
4966 
4967  /* May as well fix opfuncids too */
4968  fix_opfuncids((Node *) result);
4969 
4970  /* Now save a copy of the completed tree in the relcache entry. */
4971  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4972  relation->rd_indpred = copyObject(result);
4973  MemoryContextSwitchTo(oldcxt);
4974 
4975  return result;
4976 }
4977 
4978 /*
4979  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4980  *
4981  * The result has a bit set for each attribute used anywhere in the index
4982  * definitions of all the indexes on this relation. (This includes not only
4983  * simple index keys, but attributes used in expressions and partial-index
4984  * predicates.)
4985  *
4986  * Depending on attrKind, a bitmap covering the attnums for all index columns,
4987  * for all potential foreign key columns, or for all columns in the configured
4988  * replica identity index is returned.
4989  *
4990  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4991  * we can include system attributes (e.g., OID) in the bitmap representation.
4992  *
4993  * Caller had better hold at least RowExclusiveLock on the target relation
4994  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4995  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4996  * that lock level doesn't guarantee a stable set of indexes, so we have to
4997  * be prepared to retry here in case of a change in the set of indexes.
4998  *
4999  * The returned result is palloc'd in the caller's memory context and should
5000  * be bms_free'd when not needed anymore.
5001  */
5002 Bitmapset *
5004 {
5005  Bitmapset *indexattrs; /* indexed columns */
5006  Bitmapset *uindexattrs; /* columns in unique indexes */
5007  Bitmapset *pkindexattrs; /* columns in the primary index */
5008  Bitmapset *idindexattrs; /* columns in the replica identity */
5009  List *indexoidlist;
5010  List *newindexoidlist;
5011  Oid relpkindex;
5012  Oid relreplindex;
5013  ListCell *l;
5014  MemoryContext oldcxt;
5015 
5016  /* Quick exit if we already computed the result. */
5017  if (relation->rd_indexattr != NULL)
5018  {
5019  switch (attrKind)
5020  {
5021  case INDEX_ATTR_BITMAP_ALL:
5022  return bms_copy(relation->rd_indexattr);
5023  case INDEX_ATTR_BITMAP_KEY:
5024  return bms_copy(relation->rd_keyattr);
5026  return bms_copy(relation->rd_pkattr);
5028  return bms_copy(relation->rd_idattr);
5029  default:
5030  elog(ERROR, "unknown attrKind %u", attrKind);
5031  }
5032  }
5033 
5034  /* Fast path if definitely no indexes */
5035  if (!RelationGetForm(relation)->relhasindex)
5036  return NULL;
5037 
5038  /*
5039  * Get cached list of index OIDs. If we have to start over, we do so here.
5040  */
5041 restart:
5042  indexoidlist = RelationGetIndexList(relation);
5043 
5044  /* Fall out if no indexes (but relhasindex was set) */
5045  if (indexoidlist == NIL)
5046  return NULL;
5047 
5048  /*
5049  * Copy the rd_pkindex and rd_replidindex values computed by
5050  * RelationGetIndexList before proceeding. This is needed because a
5051  * relcache flush could occur inside index_open below, resetting the
5052  * fields managed by RelationGetIndexList. We need to do the work with
5053  * stable values of these fields.
5054  */
5055  relpkindex = relation->rd_pkindex;
5056  relreplindex = relation->rd_replidindex;
5057 
5058  /*
5059  * For each index, add referenced attributes to indexattrs.
5060  *
5061  * Note: we consider all indexes returned by RelationGetIndexList, even if
5062  * they are not indisready or indisvalid. This is important because an
5063  * index for which CREATE INDEX CONCURRENTLY has just started must be
5064  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
5065  * CONCURRENTLY is far enough along that we should ignore the index, it
5066  * won't be returned at all by RelationGetIndexList.
5067  */
5068  indexattrs = NULL;
5069  uindexattrs = NULL;
5070  pkindexattrs = NULL;
5071  idindexattrs = NULL;
5072  foreach(l, indexoidlist)
5073  {
5074  Oid indexOid = lfirst_oid(l);
5075  Relation indexDesc;
5076  Datum datum;
5077  bool isnull;
5078  Node *indexExpressions;
5079  Node *indexPredicate;
5080  int i;
5081  bool isKey; /* candidate key */
5082  bool isPK; /* primary key */
5083  bool isIDKey; /* replica identity index */
5084 
5085  indexDesc = index_open(indexOid, AccessShareLock);
5086 
5087  /*
5088  * Extract index expressions and index predicate. Note: Don't use
5089  * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
5090  * those might run constant expressions evaluation, which needs a
5091  * snapshot, which we might not have here. (Also, it's probably more
5092  * sound to collect the bitmaps before any transformations that might
5093  * eliminate columns, but the practical impact of this is limited.)
5094  */
5095 
5096  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs,
5097  GetPgIndexDescriptor(), &isnull);
5098  if (!isnull)
5099  indexExpressions = stringToNode(TextDatumGetCString(datum));
5100  else
5101  indexExpressions = NULL;
5102 
5103  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred,
5104  GetPgIndexDescriptor(), &isnull);
5105  if (!isnull)
5106  indexPredicate = stringToNode(TextDatumGetCString(datum));
5107  else
5108  indexPredicate = NULL;
5109 
5110  /* Can this index be referenced by a foreign key? */
5111  isKey = indexDesc->rd_index->indisunique &&
5112  indexExpressions == NULL &&
5113  indexPredicate == NULL;
5114 
5115  /* Is this a primary key? */
5116  isPK = (indexOid == relpkindex);
5117 
5118  /* Is this index the configured (or default) replica identity? */
5119  isIDKey = (indexOid == relreplindex);
5120 
5121  /* Collect simple attribute references */
5122  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
5123  {
5124  int attrnum = indexDesc->rd_index->indkey.values[i];
5125 
5126  /*
5127  * Since we have covering indexes with non-key columns, we must
5128  * handle them accurately here. non-key columns must be added into
5129  * indexattrs, since they are in index, and HOT-update shouldn't
5130  * miss them. Obviously, non-key columns couldn't be referenced by
5131  * foreign key or identity key. Hence we do not include them into
5132  * uindexattrs, pkindexattrs and idindexattrs bitmaps.
5133  */
5134  if (attrnum != 0)
5135  {
5136  indexattrs = bms_add_member(indexattrs,
5138 
5139  if (isKey && i < indexDesc->rd_index->indnkeyatts)
5140  uindexattrs = bms_add_member(uindexattrs,
5142 
5143  if (isPK && i < indexDesc->rd_index->indnkeyatts)
5144  pkindexattrs = bms_add_member(pkindexattrs,
5146 
5147  if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
5148  idindexattrs = bms_add_member(idindexattrs,
5150  }
5151  }
5152 
5153  /* Collect all attributes used in expressions, too */
5154  pull_varattnos(indexExpressions, 1, &indexattrs);
5155 
5156  /* Collect all attributes in the index predicate, too */
5157  pull_varattnos(indexPredicate, 1, &indexattrs);
5158 
5159  index_close(indexDesc, AccessShareLock);
5160  }
5161 
5162  /*
5163  * During one of the index_opens in the above loop, we might have received
5164  * a relcache flush event on this relcache entry, which might have been
5165  * signaling a change in the rel's index list. If so, we'd better start
5166  * over to ensure we deliver up-to-date attribute bitmaps.
5167  */
5168  newindexoidlist = RelationGetIndexList(relation);
5169  if (equal(indexoidlist, newindexoidlist) &&
5170  relpkindex == relation->rd_pkindex &&
5171  relreplindex == relation->rd_replidindex)
5172  {
5173  /* Still the same index set, so proceed */
5174  list_free(newindexoidlist);
5175  list_free(indexoidlist);
5176  }
5177  else
5178  {
5179  /* Gotta do it over ... might as well not leak memory */
5180  list_free(newindexoidlist);
5181  list_free(indexoidlist);
5182  bms_free(uindexattrs);
5183  bms_free(pkindexattrs);
5184  bms_free(idindexattrs);
5185  bms_free(indexattrs);
5186 
5187  goto restart;
5188  }
5189 
5190  /* Don't leak the old values of these bitmaps, if any */
5191  bms_free(relation->rd_indexattr);
5192  relation->rd_indexattr = NULL;
5193  bms_free(relation->rd_keyattr);
5194  relation->rd_keyattr = NULL;
5195  bms_free(relation->rd_pkattr);
5196  relation->rd_pkattr = NULL;
5197  bms_free(relation->rd_idattr);
5198  relation->rd_idattr = NULL;
5199 
5200  /*
5201  * Now save copies of the bitmaps in the relcache entry. We intentionally
5202  * set rd_indexattr last, because that's the one that signals validity of
5203  * the values; if we run out of memory before making that copy, we won't
5204  * leave the relcache entry looking like the other ones are valid but
5205  * empty.
5206  */
5208  relation->rd_keyattr = bms_copy(uindexattrs);
5209  relation->rd_pkattr = bms_copy(pkindexattrs);
5210  relation->rd_idattr = bms_copy(idindexattrs);
5211  relation->rd_indexattr = bms_copy(indexattrs);
5212  MemoryContextSwitchTo(oldcxt);
5213 
5214  /* We return our original working copy for caller to play with */
5215  switch (attrKind)
5216  {
5217  case INDEX_ATTR_BITMAP_ALL:
5218  return indexattrs;
5219  case INDEX_ATTR_BITMAP_KEY:
5220  return uindexattrs;
5222  return pkindexattrs;
5224  return idindexattrs;
5225  default:
5226  elog(ERROR, "unknown attrKind %u", attrKind);
5227  return NULL;
5228  }
5229 }
5230 
5231 /*
5232  * RelationGetIdentityKeyBitmap -- get a bitmap of replica identity attribute
5233  * numbers
5234  *
5235  * A bitmap of index attribute numbers for the configured replica identity
5236  * index is returned.
5237  *
5238  * See also comments of RelationGetIndexAttrBitmap().
5239  *
5240  * This is a special purpose function used during logical replication. Here,
5241  * unlike RelationGetIndexAttrBitmap(), we don't acquire a lock on the required
5242  * index as we build the cache entry using a historic snapshot and all the
5243  * later changes are absorbed while decoding WAL. Due to this reason, we don't
5244  * need to retry here in case of a change in the set of indexes.
5245  */
5246 Bitmapset *
5248 {
5249  Bitmapset *idindexattrs = NULL; /* columns in the replica identity */
5250  Relation indexDesc;
5251  int i;
5252  Oid replidindex;
5253  MemoryContext oldcxt;
5254 
5255  /* Quick exit if we already computed the result */
5256  if (relation->rd_idattr != NULL)
5257  return bms_copy(relation->rd_idattr);
5258 
5259  /* Fast path if definitely no indexes */
5260  if (!RelationGetForm(relation)->relhasindex)
5261  return NULL;
5262 
5263  /* Historic snapshot must be set. */
5265 
5266  replidindex = RelationGetReplicaIndex(relation);
5267 
5268  /* Fall out if there is no replica identity index */
5269  if (!OidIsValid(replidindex))
5270  return NULL;
5271 
5272  /* Look up the description for the replica identity index */
5273  indexDesc = RelationIdGetRelation(replidindex);
5274 
5275  if (!RelationIsValid(indexDesc))
5276  elog(ERROR, "could not open relation with OID %u",
5277  relation->rd_replidindex);
5278 
5279  /* Add referenced attributes to idindexattrs */
5280  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
5281  {
5282  int attrnum = indexDesc->rd_index->indkey.values[i];
5283 
5284  /*
5285  * We don't include non-key columns into idindexattrs bitmaps. See
5286  * RelationGetIndexAttrBitmap.
5287  */
5288  if (attrnum != 0)
5289  {
5290  if (i < indexDesc->rd_index->indnkeyatts)
5291  idindexattrs = bms_add_member(idindexattrs,
5293  }
5294  }
5295 
5296  RelationClose(indexDesc);
5297 
5298  /* Don't leak the old values of these bitmaps, if any */
5299  bms_free(relation->rd_idattr);
5300  relation->rd_idattr = NULL;
5301 
5302  /* Now save copy of the bitmap in the relcache entry */
5304  relation->rd_idattr = bms_copy(idindexattrs);
5305  MemoryContextSwitchTo(oldcxt);
5306 
5307  /* We return our original working copy for caller to play with */
5308  return idindexattrs;
5309 }
5310 
5311 /*
5312  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5313  *
5314  * This should be called only for an index that is known to have an
5315  * associated exclusion constraint. It returns arrays (palloc'd in caller's
5316  * context) of the exclusion operator OIDs, their underlying functions'
5317  * OIDs, and their strategy numbers in the index's opclasses. We cache
5318  * all this information since it requires a fair amount of work to get.
5319  */
5320 void
5322  Oid **operators,
5323  Oid **procs,
5324  uint16 **strategies)
5325 {
5326  int indnkeyatts;
5327  Oid *ops;
5328  Oid *funcs;
5329  uint16 *strats;
5330  Relation conrel;
5331  SysScanDesc conscan;
5332  ScanKeyData skey[1];
5333  HeapTuple htup;
5334  bool found;
5335  MemoryContext oldcxt;
5336  int i;
5337 
5338  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
5339 
5340  /* Allocate result space in caller context */
5341  *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5342  *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5343  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5344 
5345  /* Quick exit if we have the data cached already */
5346  if (indexRelation->rd_exclstrats != NULL)
5347  {
5348  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts);
5349  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts);
5350  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts);
5351  return;
5352  }
5353 
5354  /*
5355  * Search pg_constraint for the constraint associated with the index. To
5356  * make this not too painfully slow, we use the index on conrelid; that
5357  * will hold the parent relation's OID not the index's own OID.
5358  *
5359  * Note: if we wanted to rely on the constraint name matching the index's
5360  * name, we could just do a direct lookup using pg_constraint's unique
5361  * index. For the moment it doesn't seem worth requiring that.
5362  */
5363  ScanKeyInit(&skey[0],
5364  Anum_pg_constraint_conrelid,
5365  BTEqualStrategyNumber, F_OIDEQ,
5366  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5367 
5368  conrel = table_open(ConstraintRelationId, AccessShareLock);
5369  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
5370  NULL, 1, skey);
5371  found = false;
5372 
5373  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5374  {
5376  Datum val;
5377  bool isnull;
5378  ArrayType *arr;
5379  int nelem;
5380 
5381  /* We want the exclusion constraint owning the index */
5382  if (conform->contype != CONSTRAINT_EXCLUSION ||
5383  conform->conindid != RelationGetRelid(indexRelation))
5384  continue;
5385 
5386  /* There should be only one */
5387  if (found)
5388  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5389  RelationGetRelationName(indexRelation));
5390  found = true;
5391 
5392  /* Extract the operator OIDS from conexclop */
5393  val = fastgetattr(htup,
5394  Anum_pg_constraint_conexclop,
5395  conrel->rd_att, &isnull);
5396  if (isnull)
5397  elog(ERROR, "null conexclop for rel %s",
5398  RelationGetRelationName(indexRelation));
5399 
5400  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5401  nelem = ARR_DIMS(arr)[0];
5402  if (ARR_NDIM(arr) != 1 ||
5403  nelem != indnkeyatts ||
5404  ARR_HASNULL(arr) ||
5405  ARR_ELEMTYPE(arr) != OIDOID)
5406  elog(ERROR, "conexclop is not a 1-D Oid array");
5407 
5408  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts);
5409  }
5410 
5411  systable_endscan(conscan);
5412  table_close(conrel, AccessShareLock);
5413 
5414  if (!found)
5415  elog(ERROR, "exclusion constraint record missing for rel %s",
5416  RelationGetRelationName(indexRelation));
5417 
5418  /* We need the func OIDs and strategy numbers too */
5419  for (i = 0; i < indnkeyatts; i++)
5420  {
5421  funcs[i] = get_opcode(ops[i]);
5422  strats[i] = get_op_opfamily_strategy(ops[i],
5423  indexRelation->rd_opfamily[i]);
5424  /* shouldn't fail, since it was checked at index creation */
5425  if (strats[i] == InvalidStrategy)
5426  elog(ERROR, "could not find strategy for operator %u in family %u",
5427  ops[i], indexRelation->rd_opfamily[i]);
5428  }
5429 
5430  /* Save a copy of the results in the relcache entry. */
5431  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5432  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5433  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5434  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5435  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts);
5436  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts);
5437  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts);
5438  MemoryContextSwitchTo(oldcxt);
5439 }
5440 
5441 /*
5442  * Get publication actions for the given relation.
5443  */
5444 struct PublicationActions *
5446 {
5447  List *puboids;
5448  ListCell *lc;
5449  MemoryContext oldcxt;
5450  PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5451 
5452  /*
5453  * If not publishable, it publishes no actions. (pgoutput_change() will
5454  * ignore it.)
5455  */
5456  if (!is_publishable_relation(relation))
5457  return pubactions;
5458 
5459  if (relation->rd_pubactions)
5460  return memcpy(pubactions, relation->rd_pubactions,
5461  sizeof(PublicationActions));
5462 
5463  /* Fetch the publication membership info. */
5464  puboids = GetRelationPublications(RelationGetRelid(relation));
5465  if (relation->rd_rel->relispartition)
5466  {
5467  /* Add publications that the ancestors are in too. */
5468  List *ancestors = get_partition_ancestors(RelationGetRelid(relation));
5469  ListCell *lc;
5470 
5471  foreach(lc, ancestors)
5472  {
5473  Oid ancestor = lfirst_oid(lc);
5474 
5475  puboids = list_concat_unique_oid(puboids,
5476  GetRelationPublications(ancestor));
5477  }
5478  }
5479  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5480 
5481  foreach(lc, puboids)
5482  {
5483  Oid pubid = lfirst_oid(lc);
5484  HeapTuple tup;
5485  Form_pg_publication pubform;
5486 
5488 
5489  if (!HeapTupleIsValid(tup))
5490  elog(ERROR, "cache lookup failed for publication %u", pubid);
5491 
5492  pubform = (Form_pg_publication) GETSTRUCT(tup);
5493 
5494  pubactions->pubinsert |= pubform->pubinsert;
5495  pubactions->pubupdate |= pubform->pubupdate;
5496  pubactions->pubdelete |= pubform->pubdelete;
5497  pubactions->pubtruncate |= pubform->pubtruncate;
5498 
5499  ReleaseSysCache(tup);
5500 
5501  /*
5502  * If we know everything is replicated, there is no point to check for
5503  * other publications.
5504  */
5505  if (pubactions->pubinsert && pubactions->pubupdate &&
5506  pubactions->pubdelete && pubactions->pubtruncate)
5507  break;
5508  }
5509 
5510  if (relation->rd_pubactions)
5511  {
5512  pfree(relation->rd_pubactions);
5513  relation->rd_pubactions = NULL;
5514  }
5515 
5516  /* Now save copy of the actions in the relcache entry. */
5518  relation->rd_pubactions = palloc(sizeof(PublicationActions));
5519  memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5520  MemoryContextSwitchTo(oldcxt);
5521 
5522  return pubactions;
5523 }
5524 
5525 /*
5526  * RelationGetIndexRawAttOptions -- get AM/opclass-specific options for the index
5527  */
5528 Datum *
5530 {
5531  Oid indexrelid = RelationGetRelid(indexrel);
5532  int16 natts = RelationGetNumberOfAttributes(indexrel);
5533  Datum *options = NULL;
5534  int16 attnum;
5535 
5536  for (attnum = 1; attnum <= natts; attnum++)
5537  {
5538  if (indexrel->rd_indam->amoptsprocnum == 0)
5539  continue;
5540 
5541  if (!OidIsValid(index_getprocid(indexrel, attnum,
5542  indexrel->rd_indam->amoptsprocnum)))
5543  continue;
5544 
5545  if (!options)
5546  options = palloc0(sizeof(Datum) * natts);
5547 
5548  options[attnum - 1] = get_attoptions(indexrelid, attnum);
5549  }
5550 
5551  return options;
5552 }
5553 
5554 static bytea **
5555 CopyIndexAttOptions(bytea **srcopts, int natts)
5556 {
5557  bytea **opts = palloc(sizeof(*opts) * natts);
5558 
5559  for (int i = 0; i < natts; i++)
5560  {
5561  bytea *opt = srcopts[i];
5562 
5563  opts[i] = !opt ? NULL : (bytea *)
5564  DatumGetPointer(datumCopy(PointerGetDatum(opt), false, -1));
5565  }
5566 
5567  return opts;
5568 }
5569 
5570 /*
5571  * RelationGetIndexAttOptions
5572  * get AM/opclass-specific options for an index parsed into a binary form
5573  */
5574 bytea **
5576 {
5577  MemoryContext oldcxt;
5578  bytea **opts = relation->rd_opcoptions;
5579  Oid relid = RelationGetRelid(relation);
5580  int natts = RelationGetNumberOfAttributes(relation); /* XXX
5581  * IndexRelationGetNumberOfKeyAttributes */
5582  int i;
5583 
5584  /* Try to copy cached options. */
5585  if (opts)
5586  return copy ? CopyIndexAttOptions(opts, natts) : opts;
5587 
5588  /* Get and parse opclass options. */
5589  opts = palloc0(sizeof(*opts) * natts);
5590 
5591  for (i = 0; i < natts; i++)
5592  {
5593  if (criticalRelcachesBuilt && relid != AttributeRelidNumIndexId)
5594  {
5595  Datum attoptions = get_attoptions(relid, i + 1);
5596 
5597  opts[i] = index_opclass_options(relation, i + 1, attoptions, false);
5598 
5599  if (attoptions != (Datum) 0)
5600  pfree(DatumGetPointer(attoptions));
5601  }
5602  }
5603 
5604  /* Copy parsed options to the cache. */
5605  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
5606  relation->rd_opcoptions = CopyIndexAttOptions(opts, natts);
5607  MemoryContextSwitchTo(oldcxt);
5608 
5609  if (copy)
5610  return opts;
5611 
5612  for (i = 0; i < natts; i++)
5613  {
5614  if (opts[i])
5615  pfree(opts[i]);
5616  }
5617 
5618  pfree(opts);
5619 
5620  return relation->rd_opcoptions;
5621 }
5622 
5623 /*
5624  * Routines to support ereport() reports of relation-related errors
5625  *
5626  * These could have been put into elog.c, but it seems like a module layering
5627  * violation to have elog.c calling relcache or syscache stuff --- and we
5628  * definitely don't want elog.h including rel.h. So we put them here.
5629  */
5630 
5631 /*
5632  * errtable --- stores schema_name and table_name of a table
5633  * within the current errordata.
5634  */
5635 int
5637 {
5641 
5642  return 0; /* return value does not matter */
5643 }
5644 
5645 /*
5646  * errtablecol --- stores schema_name, table_name and column_name
5647  * of a table column within the current errordata.
5648  *
5649  * The column is specified by attribute number --- for most callers, this is
5650  * easier and less error-prone than getting the column name for themselves.
5651  */
5652 int
5654 {
5656  const char *colname;
5657 
5658  /* Use reldesc if it's a user attribute, else consult the catalogs */
5659  if (attnum > 0 && attnum <= reldesc->natts)
5660  colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname);
5661  else
5662  colname = get_attname(RelationGetRelid(rel), attnum, false);
5663 
5664  return errtablecolname(rel, colname);
5665 }
5666 
5667 /*
5668  * errtablecolname --- stores schema_name, table_name and column_name
5669  * of a table column within the current errordata, where the column name is
5670  * given directly rather than extracted from the relation's catalog data.
5671  *
5672  * Don't use this directly unless errtablecol() is inconvenient for some
5673  * reason. This might possibly be needed during intermediate states in ALTER
5674  * TABLE, for instance.
5675  */
5676 int
5677 errtablecolname(Relation rel, const char *colname)
5678 {
5679  errtable(rel);
5681 
5682  return 0; /* return value does not matter */
5683 }
5684 
5685 /*
5686  * errtableconstraint --- stores schema_name, table_name and constraint_name
5687  * of a table-related constraint within the current errordata.
5688  */
5689 int
5690 errtableconstraint(Relation rel, const char *conname)
5691 {
5692  errtable(rel);
5694 
5695  return 0; /* return value does not matter */
5696 }
5697 
5698 
5699 /*
5700  * load_relcache_init_file, write_relcache_init_file
5701  *
5702  * In late 1992, we started regularly having databases with more than
5703  * a thousand classes in them. With this number of classes, it became
5704  * critical to do indexed lookups on the system catalogs.
5705  *
5706  * Bootstrapping these lookups is very hard. We want to be able to
5707  * use an index on pg_attribute, for example, but in order to do so,
5708  * we must have read pg_attribute for the attributes in the index,
5709  * which implies that we need to use the index.
5710  *
5711  * In order to get around the problem, we do the following:
5712  *
5713  * + When the database system is initialized (at initdb time), we
5714  * don't use indexes. We do sequential scans.
5715  *
5716  * + When the backend is started up in normal mode, we load an image
5717  * of the appropriate relation descriptors, in internal format,
5718  * from an initialization file in the data/base/... directory.
5719  *
5720  * + If the initialization file isn't there, then we create the
5721  * relation descriptors using sequential scans and write 'em to
5722  * the initialization file for use by subsequent backends.
5723  *
5724  * As of Postgres 9.0, there is one local initialization file in each
5725  * database, plus one shared initialization file for shared catalogs.
5726  *
5727  * We could dispense with the initialization files and just build the
5728  * critical reldescs the hard way on every backend startup, but that
5729  * slows down backend startup noticeably.
5730  *
5731  * We can in fact go further, and save more relcache entries than
5732  * just the ones that are absolutely critical; this allows us to speed
5733  * up backend startup by not having to build such entries the hard way.
5734  * Presently, all the catalog and index entries that are referred to
5735  * by catcaches are stored in the initialization files.
5736  *
5737  * The same mechanism that detects when catcache and relcache entries
5738  * need to be invalidated (due to catalog updates) also arranges to
5739  * unlink the initialization files when the contents may be out of date.
5740  * The files will then be rebuilt during the next backend startup.
5741  */
5742 
5743 /*
5744  * load_relcache_init_file -- attempt to load cache from the shared
5745  * or local cache init file
5746  *
5747  * If successful, return true and set criticalRelcachesBuilt or
5748  * criticalSharedRelcachesBuilt to true.
5749  * If not successful, return false.
5750  *
5751  * NOTE: we assume we are already switched into CacheMemoryContext.
5752  */
5753 static bool
5755 {
5756  FILE *fp;
5757  char initfilename[MAXPGPATH];
5758  Relation *rels;
5759  int relno,
5760  num_rels,
5761  max_rels,
5762  nailed_rels,
5763  nailed_indexes,
5764  magic;
5765  int i;
5766 
5767  if (shared)
5768  snprintf(initfilename, sizeof(initfilename), "global/%s",
5770  else
5771  snprintf(initfilename, sizeof(initfilename), "%s/%s",
5773 
5774  fp = AllocateFile(initfilename, PG_BINARY_R);
5775  if (fp == NULL)
5776  return false;
5777 
5778  /*
5779  * Read the index relcache entries from the file. Note we will not enter
5780  * any of them into the cache if the read fails partway through; this
5781  * helps to guard against broken init files.
5782  */
5783  max_rels = 100;
5784  rels = (Relation *) palloc(max_rels * sizeof(Relation));
5785  num_rels = 0;
5786  nailed_rels = nailed_indexes = 0;
5787 
5788  /* check for correct magic number (compatible version) */
5789  if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5790  goto read_failed;
5791  if (magic != RELCACHE_INIT_FILEMAGIC)
5792  goto read_failed;
5793 
5794  for (relno = 0;; relno++)
5795  {
5796  Size len;
5797  size_t nread;
5798  Relation rel;
5799  Form_pg_class relform;
5800  bool has_not_null;
5801 
5802  /* first read the relation descriptor length */
5803  nread = fread(&len, 1, sizeof(len), fp);
5804  if (nread != sizeof(len))
5805  {
5806  if (nread == 0)
5807  break; /* end of file */
5808  goto read_failed;
5809  }
5810 
5811  /* safety check for incompatible relcache layout */
5812  if (len != sizeof(RelationData))
5813  goto read_failed;
5814 
5815  /* allocate another relcache header */
5816  if (num_rels >= max_rels)
5817  {
5818  max_rels *= 2;
5819  rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5820  }
5821 
5822  rel = rels[num_rels++] = (Relation) palloc(len);
5823 
5824  /* then, read the Relation structure */
5825  if (fread(rel, 1, len, fp) != len)
5826  goto read_failed;
5827 
5828  /* next read the relation tuple form */
5829  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5830  goto read_failed;
5831 
5832  relform = (Form_pg_class) palloc(len);
5833  if (fread(relform, 1, len, fp) != len)
5834  goto read_failed;
5835 
5836  rel->rd_rel = relform;
5837 
5838  /* initialize attribute tuple forms */
5839  rel->rd_att = CreateTemplateTupleDesc(relform->relnatts);
5840  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5841 
5842  rel->rd_att->tdtypeid = relform->reltype ? relform->reltype : RECORDOID;
5843  rel->rd_att->tdtypmod = -1; /* just to be sure */
5844 
5845  /* next read all the attribute tuple form data entries */
5846  has_not_null = false;
5847  for (i = 0; i < relform->relnatts; i++)
5848  {
5849  Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5850 
5851  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5852  goto read_failed;
5853  if (len != ATTRIBUTE_FIXED_PART_SIZE)
5854  goto read_failed;
5855  if (fread(attr, 1, len, fp) != len)
5856  goto read_failed;
5857 
5858  has_not_null |= attr->attnotnull;
5859  }
5860 
5861  /* next read the access method specific field */
5862  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5863  goto read_failed;
5864  if (len > 0)
5865  {
5866  rel->rd_options = palloc(len);
5867  if (fread(rel->rd_options, 1, len, fp) != len)
5868  goto read_failed;
5869  if (len != VARSIZE(rel->rd_options))
5870  goto read_failed; /* sanity check */
5871  }
5872  else
5873  {
5874  rel->rd_options = NULL;
5875  }
5876 
5877  /* mark not-null status */
5878  if (has_not_null)
5879  {
5880  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5881 
5882  constr->has_not_null = true;
5883  rel->rd_att->constr = constr;
5884  }
5885 
5886  /*
5887  * If it's an index, there's more to do. Note we explicitly ignore
5888  * partitioned indexes here.
5889  */
5890  if (rel->rd_rel->relkind == RELKIND_INDEX)
5891  {
5892  MemoryContext indexcxt;
5893  Oid *opfamily;
5894  Oid *opcintype;
5895  RegProcedure *support;
5896  int nsupport;
5897  int16 *indoption;
5898  Oid *indcollation;
5899 
5900  /* Count nailed indexes to ensure we have 'em all */
5901  if (rel->rd_isnailed)
5902  nailed_indexes++;
5903 
5904  /* next, read the pg_index tuple */
5905  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5906  goto read_failed;
5907 
5908  rel->rd_indextuple = (HeapTuple) palloc(len);
5909  if (fread(rel->rd_indextuple, 1, len, fp) != len)
5910  goto read_failed;
5911 
5912  /* Fix up internal pointers in the tuple -- see heap_copytuple */
5913  rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5915 
5916  /*
5917  * prepare index info context --- parameters should match
5918  * RelationInitIndexAccessInfo
5919  */
5921  "index info",
5923  rel->rd_indexcxt = indexcxt;
5926 
5927  /*
5928  * Now we can fetch the index AM's API struct. (We can't store
5929  * that in the init file, since it contains function pointers that
5930  * might vary across server executions. Fortunately, it should be
5931  * safe to call the amhandler even while bootstrapping indexes.)
5932  */
5933  InitIndexAmRoutine(rel);
5934 
5935  /* next, read the vector of opfamily OIDs */
5936  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5937  goto read_failed;
5938 
5939  opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5940  if (fread(opfamily, 1, len, fp) != len)
5941  goto read_failed;
5942 
5943  rel->rd_opfamily = opfamily;
5944 
5945  /* next, read the vector of opcintype OIDs */
5946  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5947  goto read_failed;
5948 
5949  opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5950  if (fread(opcintype, 1, len, fp) != len)
5951  goto read_failed;
5952 
5953  rel->rd_opcintype = opcintype;
5954 
5955  /* next, read the vector of support procedure OIDs */
5956  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5957  goto read_failed;
5958  support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5959  if (fread(support, 1, len, fp) != len)
5960  goto read_failed;
5961 
5962  rel->