PostgreSQL Source Code  git master
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/nbtree.h"
36 #include "access/reloptions.h"
37 #include "access/sysattr.h"
38 #include "access/table.h"
39 #include "access/tableam.h"
40 #include "access/tupdesc_details.h"
41 #include "access/xact.h"
42 #include "access/xlog.h"
43 #include "catalog/catalog.h"
44 #include "catalog/indexing.h"
45 #include "catalog/namespace.h"
46 #include "catalog/pg_am.h"
47 #include "catalog/pg_amproc.h"
48 #include "catalog/pg_attrdef.h"
50 #include "catalog/pg_authid.h"
51 #include "catalog/pg_constraint.h"
52 #include "catalog/pg_database.h"
53 #include "catalog/pg_namespace.h"
54 #include "catalog/pg_opclass.h"
55 #include "catalog/pg_proc.h"
56 #include "catalog/pg_publication.h"
57 #include "catalog/pg_rewrite.h"
58 #include "catalog/pg_shseclabel.h"
61 #include "catalog/pg_tablespace.h"
62 #include "catalog/pg_trigger.h"
63 #include "catalog/pg_type.h"
64 #include "catalog/schemapg.h"
65 #include "catalog/storage.h"
66 #include "commands/policy.h"
67 #include "commands/trigger.h"
68 #include "miscadmin.h"
69 #include "nodes/makefuncs.h"
70 #include "nodes/nodeFuncs.h"
71 #include "optimizer/optimizer.h"
72 #include "rewrite/rewriteDefine.h"
73 #include "rewrite/rowsecurity.h"
74 #include "storage/lmgr.h"
75 #include "storage/smgr.h"
76 #include "utils/array.h"
77 #include "utils/builtins.h"
78 #include "utils/datum.h"
79 #include "utils/fmgroids.h"
80 #include "utils/inval.h"
81 #include "utils/lsyscache.h"
82 #include "utils/memutils.h"
83 #include "utils/relmapper.h"
84 #include "utils/resowner_private.h"
85 #include "utils/snapmgr.h"
86 #include "utils/syscache.h"
87 
88 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
89 
90 /*
91  * Default policy for whether to apply RECOVER_RELATION_BUILD_MEMORY:
92  * do so in clobber-cache builds but not otherwise. This choice can be
93  * overridden at compile time with -DRECOVER_RELATION_BUILD_MEMORY=1 or =0.
94  */
95 #ifndef RECOVER_RELATION_BUILD_MEMORY
96 #if defined(CLOBBER_CACHE_ALWAYS) || defined(CLOBBER_CACHE_RECURSIVELY)
97 #define RECOVER_RELATION_BUILD_MEMORY 1
98 #else
99 #define RECOVER_RELATION_BUILD_MEMORY 0
100 #endif
101 #endif
102 
103 /*
104  * hardcoded tuple descriptors, contents generated by genbki.pl
105  */
106 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
107 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
108 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
109 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
110 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
111 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
112 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
113 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
114 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
115 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
116 
117 /*
118  * Hash tables that index the relation cache
119  *
120  * We used to index the cache by both name and OID, but now there
121  * is only an index by OID.
122  */
123 typedef struct relidcacheent
124 {
127 } RelIdCacheEnt;
128 
130 
131 /*
132  * This flag is false until we have prepared the critical relcache entries
133  * that are needed to do indexscans on the tables read by relcache building.
134  */
136 
137 /*
138  * This flag is false until we have prepared the critical relcache entries
139  * for shared catalogs (which are the tables needed for login).
140  */
142 
143 /*
144  * This counter counts relcache inval events received since backend startup
145  * (but only for rels that are actually in cache). Presently, we use it only
146  * to detect whether data about to be written by write_relcache_init_file()
147  * might already be obsolete.
148  */
149 static long relcacheInvalsReceived = 0L;
150 
151 /*
152  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
153  * cleanup work. This list intentionally has limited size; if it overflows,
154  * we fall back to scanning the whole hashtable. There is no value in a very
155  * large list because (1) at some point, a hash_seq_search scan is faster than
156  * retail lookups, and (2) the value of this is to reduce EOXact work for
157  * short transactions, which can't have dirtied all that many tables anyway.
158  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
159  * cleanup processing must be idempotent.
160  */
161 #define MAX_EOXACT_LIST 32
163 static int eoxact_list_len = 0;
164 static bool eoxact_list_overflowed = false;
165 
166 #define EOXactListAdd(rel) \
167  do { \
168  if (eoxact_list_len < MAX_EOXACT_LIST) \
169  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
170  else \
171  eoxact_list_overflowed = true; \
172  } while (0)
173 
174 /*
175  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
176  * cleanup work. The array expands as needed; there is no hashtable because
177  * we don't need to access individual items except at EOXact.
178  */
180 static int NextEOXactTupleDescNum = 0;
181 static int EOXactTupleDescArrayLen = 0;
182 
183 /*
184  * macros to manipulate the lookup hashtable
185  */
186 #define RelationCacheInsert(RELATION, replace_allowed) \
187 do { \
188  RelIdCacheEnt *hentry; bool found; \
189  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
190  (void *) &((RELATION)->rd_id), \
191  HASH_ENTER, &found); \
192  if (found) \
193  { \
194  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
195  Relation _old_rel = hentry->reldesc; \
196  Assert(replace_allowed); \
197  hentry->reldesc = (RELATION); \
198  if (RelationHasReferenceCountZero(_old_rel)) \
199  RelationDestroyRelation(_old_rel, false); \
200  else if (!IsBootstrapProcessingMode()) \
201  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
202  RelationGetRelationName(_old_rel)); \
203  } \
204  else \
205  hentry->reldesc = (RELATION); \
206 } while(0)
207 
208 #define RelationIdCacheLookup(ID, RELATION) \
209 do { \
210  RelIdCacheEnt *hentry; \
211  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
212  (void *) &(ID), \
213  HASH_FIND, NULL); \
214  if (hentry) \
215  RELATION = hentry->reldesc; \
216  else \
217  RELATION = NULL; \
218 } while(0)
219 
220 #define RelationCacheDelete(RELATION) \
221 do { \
222  RelIdCacheEnt *hentry; \
223  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
224  (void *) &((RELATION)->rd_id), \
225  HASH_REMOVE, NULL); \
226  if (hentry == NULL) \
227  elog(WARNING, "failed to delete relcache entry for OID %u", \
228  (RELATION)->rd_id); \
229 } while(0)
230 
231 
232 /*
233  * Special cache for opclass-related information
234  *
235  * Note: only default support procs get cached, ie, those with
236  * lefttype = righttype = opcintype.
237  */
238 typedef struct opclasscacheent
239 {
240  Oid opclassoid; /* lookup key: OID of opclass */
241  bool valid; /* set true after successful fill-in */
242  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
243  Oid opcfamily; /* OID of opclass's family */
244  Oid opcintype; /* OID of opclass's declared input type */
245  RegProcedure *supportProcs; /* OIDs of support procedures */
247 
248 static HTAB *OpClassCache = NULL;
249 
250 
251 /* non-export function prototypes */
252 
253 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
254 static void RelationClearRelation(Relation relation, bool rebuild);
255 
256 static void RelationReloadIndexInfo(Relation relation);
257 static void RelationReloadNailed(Relation relation);
258 static void RelationFlushRelation(Relation relation);
260 static void AtEOXact_cleanup(Relation relation, bool isCommit);
261 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
262  SubTransactionId mySubid, SubTransactionId parentSubid);
263 static bool load_relcache_init_file(bool shared);
264 static void write_relcache_init_file(bool shared);
265 static void write_item(const void *data, Size len, FILE *fp);
266 
267 static void formrdesc(const char *relationName, Oid relationReltype,
268  bool isshared, int natts, const FormData_pg_attribute *attrs);
269 
270 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
272 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
273 static void RelationBuildTupleDesc(Relation relation);
274 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
275 static void RelationInitPhysicalAddr(Relation relation);
276 static void load_critical_index(Oid indexoid, Oid heapoid);
277 static TupleDesc GetPgClassDescriptor(void);
278 static TupleDesc GetPgIndexDescriptor(void);
279 static void AttrDefaultFetch(Relation relation);
280 static void CheckConstraintFetch(Relation relation);
281 static int CheckConstraintCmp(const void *a, const void *b);
282 static void InitIndexAmRoutine(Relation relation);
283 static void IndexSupportInitialize(oidvector *indclass,
284  RegProcedure *indexSupport,
285  Oid *opFamily,
286  Oid *opcInType,
287  StrategyNumber maxSupportNumber,
288  AttrNumber maxAttributeNumber);
289 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
290  StrategyNumber numSupport);
291 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
292 static void unlink_initfile(const char *initfilename, int elevel);
293 
294 
295 /*
296  * ScanPgRelation
297  *
298  * This is used by RelationBuildDesc to find a pg_class
299  * tuple matching targetRelId. The caller must hold at least
300  * AccessShareLock on the target relid to prevent concurrent-update
301  * scenarios; it isn't guaranteed that all scans used to build the
302  * relcache entry will use the same snapshot. If, for example,
303  * an attribute were to be added after scanning pg_class and before
304  * scanning pg_attribute, relnatts wouldn't match.
305  *
306  * NB: the returned tuple has been copied into palloc'd storage
307  * and must eventually be freed with heap_freetuple.
308  */
309 static HeapTuple
310 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
311 {
312  HeapTuple pg_class_tuple;
313  Relation pg_class_desc;
314  SysScanDesc pg_class_scan;
315  ScanKeyData key[1];
316  Snapshot snapshot;
317 
318  /*
319  * If something goes wrong during backend startup, we might find ourselves
320  * trying to read pg_class before we've selected a database. That ain't
321  * gonna work, so bail out with a useful error message. If this happens,
322  * it probably means a relcache entry that needs to be nailed isn't.
323  */
324  if (!OidIsValid(MyDatabaseId))
325  elog(FATAL, "cannot read pg_class without having selected a database");
326 
327  /*
328  * form a scan key
329  */
330  ScanKeyInit(&key[0],
331  Anum_pg_class_oid,
332  BTEqualStrategyNumber, F_OIDEQ,
333  ObjectIdGetDatum(targetRelId));
334 
335  /*
336  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
337  * built the critical relcache entries (this includes initdb and startup
338  * without a pg_internal.init file). The caller can also force a heap
339  * scan by setting indexOK == false.
340  */
341  pg_class_desc = table_open(RelationRelationId, AccessShareLock);
342 
343  /*
344  * The caller might need a tuple that's newer than the one the historic
345  * snapshot; currently the only case requiring to do so is looking up the
346  * relfilenode of non mapped system relations during decoding.
347  */
348  if (force_non_historic)
349  snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
350  else
351  snapshot = GetCatalogSnapshot(RelationRelationId);
352 
353  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
354  indexOK && criticalRelcachesBuilt,
355  snapshot,
356  1, key);
357 
358  pg_class_tuple = systable_getnext(pg_class_scan);
359 
360  /*
361  * Must copy tuple before releasing buffer.
362  */
363  if (HeapTupleIsValid(pg_class_tuple))
364  pg_class_tuple = heap_copytuple(pg_class_tuple);
365 
366  /* all done */
367  systable_endscan(pg_class_scan);
368  table_close(pg_class_desc, AccessShareLock);
369 
370  return pg_class_tuple;
371 }
372 
373 /*
374  * AllocateRelationDesc
375  *
376  * This is used to allocate memory for a new relation descriptor
377  * and initialize the rd_rel field from the given pg_class tuple.
378  */
379 static Relation
381 {
382  Relation relation;
383  MemoryContext oldcxt;
384  Form_pg_class relationForm;
385 
386  /* Relcache entries must live in CacheMemoryContext */
388 
389  /*
390  * allocate and zero space for new relation descriptor
391  */
392  relation = (Relation) palloc0(sizeof(RelationData));
393 
394  /* make sure relation is marked as having no open file yet */
395  relation->rd_smgr = NULL;
396 
397  /*
398  * Copy the relation tuple form
399  *
400  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
401  * variable-length fields (relacl, reloptions) are NOT stored in the
402  * relcache --- there'd be little point in it, since we don't copy the
403  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
404  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
405  * it from the syscache if you need it. The same goes for the original
406  * form of reloptions (however, we do store the parsed form of reloptions
407  * in rd_options).
408  */
409  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
410 
411  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
412 
413  /* initialize relation tuple form */
414  relation->rd_rel = relationForm;
415 
416  /* and allocate attribute tuple form storage */
417  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
418  /* which we mark as a reference-counted tupdesc */
419  relation->rd_att->tdrefcount = 1;
420 
421  MemoryContextSwitchTo(oldcxt);
422 
423  return relation;
424 }
425 
426 /*
427  * RelationParseRelOptions
428  * Convert pg_class.reloptions into pre-parsed rd_options
429  *
430  * tuple is the real pg_class tuple (not rd_rel!) for relation
431  *
432  * Note: rd_rel and (if an index) rd_indam must be valid already
433  */
434 static void
436 {
437  bytea *options;
438  amoptions_function amoptsfn;
439 
440  relation->rd_options = NULL;
441 
442  /*
443  * Look up any AM-specific parse function; fall out if relkind should not
444  * have options.
445  */
446  switch (relation->rd_rel->relkind)
447  {
448  case RELKIND_RELATION:
449  case RELKIND_TOASTVALUE:
450  case RELKIND_VIEW:
451  case RELKIND_MATVIEW:
452  case RELKIND_PARTITIONED_TABLE:
453  amoptsfn = NULL;
454  break;
455  case RELKIND_INDEX:
456  case RELKIND_PARTITIONED_INDEX:
457  amoptsfn = relation->rd_indam->amoptions;
458  break;
459  default:
460  return;
461  }
462 
463  /*
464  * Fetch reloptions from tuple; have to use a hardwired descriptor because
465  * we might not have any other for pg_class yet (consider executing this
466  * code for pg_class itself)
467  */
468  options = extractRelOptions(tuple, GetPgClassDescriptor(), amoptsfn);
469 
470  /*
471  * Copy parsed data into CacheMemoryContext. To guard against the
472  * possibility of leaks in the reloptions code, we want to do the actual
473  * parsing in the caller's memory context and copy the results into
474  * CacheMemoryContext after the fact.
475  */
476  if (options)
477  {
479  VARSIZE(options));
480  memcpy(relation->rd_options, options, VARSIZE(options));
481  pfree(options);
482  }
483 }
484 
485 /*
486  * RelationBuildTupleDesc
487  *
488  * Form the relation's tuple descriptor from information in
489  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
490  */
491 static void
493 {
494  HeapTuple pg_attribute_tuple;
495  Relation pg_attribute_desc;
496  SysScanDesc pg_attribute_scan;
497  ScanKeyData skey[2];
498  int need;
499  TupleConstr *constr;
500  AttrDefault *attrdef = NULL;
501  AttrMissing *attrmiss = NULL;
502  int ndef = 0;
503 
504  /* copy some fields from pg_class row to rd_att */
505  relation->rd_att->tdtypeid = relation->rd_rel->reltype;
506  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
507 
509  sizeof(TupleConstr));
510  constr->has_not_null = false;
511  constr->has_generated_stored = false;
512 
513  /*
514  * Form a scan key that selects only user attributes (attnum > 0).
515  * (Eliminating system attribute rows at the index level is lots faster
516  * than fetching them.)
517  */
518  ScanKeyInit(&skey[0],
519  Anum_pg_attribute_attrelid,
520  BTEqualStrategyNumber, F_OIDEQ,
522  ScanKeyInit(&skey[1],
523  Anum_pg_attribute_attnum,
524  BTGreaterStrategyNumber, F_INT2GT,
525  Int16GetDatum(0));
526 
527  /*
528  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
529  * built the critical relcache entries (this includes initdb and startup
530  * without a pg_internal.init file).
531  */
532  pg_attribute_desc = table_open(AttributeRelationId, AccessShareLock);
533  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
536  NULL,
537  2, skey);
538 
539  /*
540  * add attribute data to relation->rd_att
541  */
542  need = RelationGetNumberOfAttributes(relation);
543 
544  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
545  {
546  Form_pg_attribute attp;
547  int attnum;
548 
549  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
550 
551  attnum = attp->attnum;
552  if (attnum <= 0 || attnum > RelationGetNumberOfAttributes(relation))
553  elog(ERROR, "invalid attribute number %d for %s",
554  attp->attnum, RelationGetRelationName(relation));
555 
556 
557  memcpy(TupleDescAttr(relation->rd_att, attnum - 1),
558  attp,
560 
561  /* Update constraint/default info */
562  if (attp->attnotnull)
563  constr->has_not_null = true;
564  if (attp->attgenerated == ATTRIBUTE_GENERATED_STORED)
565  constr->has_generated_stored = true;
566 
567  /* If the column has a default, fill it into the attrdef array */
568  if (attp->atthasdef)
569  {
570  if (attrdef == NULL)
571  attrdef = (AttrDefault *)
574  sizeof(AttrDefault));
575  attrdef[ndef].adnum = attnum;
576  attrdef[ndef].adbin = NULL;
577 
578  ndef++;
579  }
580 
581  /* Likewise for a missing value */
582  if (attp->atthasmissing)
583  {
584  Datum missingval;
585  bool missingNull;
586 
587  /* Do we have a missing value? */
588  missingval = heap_getattr(pg_attribute_tuple,
589  Anum_pg_attribute_attmissingval,
590  pg_attribute_desc->rd_att,
591  &missingNull);
592  if (!missingNull)
593  {
594  /* Yes, fetch from the array */
595  MemoryContext oldcxt;
596  bool is_null;
597  int one = 1;
598  Datum missval;
599 
600  if (attrmiss == NULL)
601  attrmiss = (AttrMissing *)
603  relation->rd_rel->relnatts *
604  sizeof(AttrMissing));
605 
606  missval = array_get_element(missingval,
607  1,
608  &one,
609  -1,
610  attp->attlen,
611  attp->attbyval,
612  attp->attalign,
613  &is_null);
614  Assert(!is_null);
615  if (attp->attbyval)
616  {
617  /* for copy by val just copy the datum direct */
618  attrmiss[attnum - 1].am_value = missval;
619  }
620  else
621  {
622  /* otherwise copy in the correct context */
624  attrmiss[attnum - 1].am_value = datumCopy(missval,
625  attp->attbyval,
626  attp->attlen);
627  MemoryContextSwitchTo(oldcxt);
628  }
629  attrmiss[attnum - 1].am_present = true;
630  }
631  }
632  need--;
633  if (need == 0)
634  break;
635  }
636 
637  /*
638  * end the scan and close the attribute relation
639  */
640  systable_endscan(pg_attribute_scan);
641  table_close(pg_attribute_desc, AccessShareLock);
642 
643  if (need != 0)
644  elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
645  need, RelationGetRelid(relation));
646 
647  /*
648  * The attcacheoff values we read from pg_attribute should all be -1
649  * ("unknown"). Verify this if assert checking is on. They will be
650  * computed when and if needed during tuple access.
651  */
652 #ifdef USE_ASSERT_CHECKING
653  {
654  int i;
655 
656  for (i = 0; i < RelationGetNumberOfAttributes(relation); i++)
657  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
658  }
659 #endif
660 
661  /*
662  * However, we can easily set the attcacheoff value for the first
663  * attribute: it must be zero. This eliminates the need for special cases
664  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
665  */
666  if (RelationGetNumberOfAttributes(relation) > 0)
667  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
668 
669  /*
670  * Set up constraint/default info
671  */
672  if (constr->has_not_null || ndef > 0 ||
673  attrmiss || relation->rd_rel->relchecks)
674  {
675  relation->rd_att->constr = constr;
676 
677  if (ndef > 0) /* DEFAULTs */
678  {
679  if (ndef < RelationGetNumberOfAttributes(relation))
680  constr->defval = (AttrDefault *)
681  repalloc(attrdef, ndef * sizeof(AttrDefault));
682  else
683  constr->defval = attrdef;
684  constr->num_defval = ndef;
685  AttrDefaultFetch(relation);
686  }
687  else
688  constr->num_defval = 0;
689 
690  constr->missing = attrmiss;
691 
692  if (relation->rd_rel->relchecks > 0) /* CHECKs */
693  {
694  constr->num_check = relation->rd_rel->relchecks;
695  constr->check = (ConstrCheck *)
697  constr->num_check * sizeof(ConstrCheck));
698  CheckConstraintFetch(relation);
699  }
700  else
701  constr->num_check = 0;
702  }
703  else
704  {
705  pfree(constr);
706  relation->rd_att->constr = NULL;
707  }
708 }
709 
710 /*
711  * RelationBuildRuleLock
712  *
713  * Form the relation's rewrite rules from information in
714  * the pg_rewrite system catalog.
715  *
716  * Note: The rule parsetrees are potentially very complex node structures.
717  * To allow these trees to be freed when the relcache entry is flushed,
718  * we make a private memory context to hold the RuleLock information for
719  * each relcache entry that has associated rules. The context is used
720  * just for rule info, not for any other subsidiary data of the relcache
721  * entry, because that keeps the update logic in RelationClearRelation()
722  * manageable. The other subsidiary data structures are simple enough
723  * to be easy to free explicitly, anyway.
724  */
725 static void
727 {
728  MemoryContext rulescxt;
729  MemoryContext oldcxt;
730  HeapTuple rewrite_tuple;
731  Relation rewrite_desc;
732  TupleDesc rewrite_tupdesc;
733  SysScanDesc rewrite_scan;
735  RuleLock *rulelock;
736  int numlocks;
737  RewriteRule **rules;
738  int maxlocks;
739 
740  /*
741  * Make the private context. Assume it'll not contain much data.
742  */
744  "relation rules",
746  relation->rd_rulescxt = rulescxt;
748  RelationGetRelationName(relation));
749 
750  /*
751  * allocate an array to hold the rewrite rules (the array is extended if
752  * necessary)
753  */
754  maxlocks = 4;
755  rules = (RewriteRule **)
756  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
757  numlocks = 0;
758 
759  /*
760  * form a scan key
761  */
762  ScanKeyInit(&key,
763  Anum_pg_rewrite_ev_class,
764  BTEqualStrategyNumber, F_OIDEQ,
766 
767  /*
768  * open pg_rewrite and begin a scan
769  *
770  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
771  * be reading the rules in name order, except possibly during
772  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
773  * ensures that rules will be fired in name order.
774  */
775  rewrite_desc = table_open(RewriteRelationId, AccessShareLock);
776  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
777  rewrite_scan = systable_beginscan(rewrite_desc,
779  true, NULL,
780  1, &key);
781 
782  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
783  {
784  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
785  bool isnull;
786  Datum rule_datum;
787  char *rule_str;
788  RewriteRule *rule;
789 
790  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
791  sizeof(RewriteRule));
792 
793  rule->ruleId = rewrite_form->oid;
794 
795  rule->event = rewrite_form->ev_type - '0';
796  rule->enabled = rewrite_form->ev_enabled;
797  rule->isInstead = rewrite_form->is_instead;
798 
799  /*
800  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
801  * rule strings are often large enough to be toasted. To avoid
802  * leaking memory in the caller's context, do the detoasting here so
803  * we can free the detoasted version.
804  */
805  rule_datum = heap_getattr(rewrite_tuple,
806  Anum_pg_rewrite_ev_action,
807  rewrite_tupdesc,
808  &isnull);
809  Assert(!isnull);
810  rule_str = TextDatumGetCString(rule_datum);
811  oldcxt = MemoryContextSwitchTo(rulescxt);
812  rule->actions = (List *) stringToNode(rule_str);
813  MemoryContextSwitchTo(oldcxt);
814  pfree(rule_str);
815 
816  rule_datum = heap_getattr(rewrite_tuple,
817  Anum_pg_rewrite_ev_qual,
818  rewrite_tupdesc,
819  &isnull);
820  Assert(!isnull);
821  rule_str = TextDatumGetCString(rule_datum);
822  oldcxt = MemoryContextSwitchTo(rulescxt);
823  rule->qual = (Node *) stringToNode(rule_str);
824  MemoryContextSwitchTo(oldcxt);
825  pfree(rule_str);
826 
827  /*
828  * We want the rule's table references to be checked as though by the
829  * table owner, not the user referencing the rule. Therefore, scan
830  * through the rule's actions and set the checkAsUser field on all
831  * rtable entries. We have to look at the qual as well, in case it
832  * contains sublinks.
833  *
834  * The reason for doing this when the rule is loaded, rather than when
835  * it is stored, is that otherwise ALTER TABLE OWNER would have to
836  * grovel through stored rules to update checkAsUser fields. Scanning
837  * the rule tree during load is relatively cheap (compared to
838  * constructing it in the first place), so we do it here.
839  */
840  setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
841  setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
842 
843  if (numlocks >= maxlocks)
844  {
845  maxlocks *= 2;
846  rules = (RewriteRule **)
847  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
848  }
849  rules[numlocks++] = rule;
850  }
851 
852  /*
853  * end the scan and close the attribute relation
854  */
855  systable_endscan(rewrite_scan);
856  table_close(rewrite_desc, AccessShareLock);
857 
858  /*
859  * there might not be any rules (if relhasrules is out-of-date)
860  */
861  if (numlocks == 0)
862  {
863  relation->rd_rules = NULL;
864  relation->rd_rulescxt = NULL;
865  MemoryContextDelete(rulescxt);
866  return;
867  }
868 
869  /*
870  * form a RuleLock and insert into relation
871  */
872  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
873  rulelock->numLocks = numlocks;
874  rulelock->rules = rules;
875 
876  relation->rd_rules = rulelock;
877 }
878 
879 /*
880  * equalRuleLocks
881  *
882  * Determine whether two RuleLocks are equivalent
883  *
884  * Probably this should be in the rules code someplace...
885  */
886 static bool
888 {
889  int i;
890 
891  /*
892  * As of 7.3 we assume the rule ordering is repeatable, because
893  * RelationBuildRuleLock should read 'em in a consistent order. So just
894  * compare corresponding slots.
895  */
896  if (rlock1 != NULL)
897  {
898  if (rlock2 == NULL)
899  return false;
900  if (rlock1->numLocks != rlock2->numLocks)
901  return false;
902  for (i = 0; i < rlock1->numLocks; i++)
903  {
904  RewriteRule *rule1 = rlock1->rules[i];
905  RewriteRule *rule2 = rlock2->rules[i];
906 
907  if (rule1->ruleId != rule2->ruleId)
908  return false;
909  if (rule1->event != rule2->event)
910  return false;
911  if (rule1->enabled != rule2->enabled)
912  return false;
913  if (rule1->isInstead != rule2->isInstead)
914  return false;
915  if (!equal(rule1->qual, rule2->qual))
916  return false;
917  if (!equal(rule1->actions, rule2->actions))
918  return false;
919  }
920  }
921  else if (rlock2 != NULL)
922  return false;
923  return true;
924 }
925 
926 /*
927  * equalPolicy
928  *
929  * Determine whether two policies are equivalent
930  */
931 static bool
933 {
934  int i;
935  Oid *r1,
936  *r2;
937 
938  if (policy1 != NULL)
939  {
940  if (policy2 == NULL)
941  return false;
942 
943  if (policy1->polcmd != policy2->polcmd)
944  return false;
945  if (policy1->hassublinks != policy2->hassublinks)
946  return false;
947  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
948  return false;
949  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
950  return false;
951 
952  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
953  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
954 
955  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
956  {
957  if (r1[i] != r2[i])
958  return false;
959  }
960 
961  if (!equal(policy1->qual, policy2->qual))
962  return false;
963  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
964  return false;
965  }
966  else if (policy2 != NULL)
967  return false;
968 
969  return true;
970 }
971 
972 /*
973  * equalRSDesc
974  *
975  * Determine whether two RowSecurityDesc's are equivalent
976  */
977 static bool
979 {
980  ListCell *lc,
981  *rc;
982 
983  if (rsdesc1 == NULL && rsdesc2 == NULL)
984  return true;
985 
986  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
987  (rsdesc1 == NULL && rsdesc2 != NULL))
988  return false;
989 
990  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
991  return false;
992 
993  /* RelationBuildRowSecurity should build policies in order */
994  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
995  {
998 
999  if (!equalPolicy(l, r))
1000  return false;
1001  }
1002 
1003  return true;
1004 }
1005 
1006 /*
1007  * RelationBuildDesc
1008  *
1009  * Build a relation descriptor. The caller must hold at least
1010  * AccessShareLock on the target relid.
1011  *
1012  * The new descriptor is inserted into the hash table if insertIt is true.
1013  *
1014  * Returns NULL if no pg_class row could be found for the given relid
1015  * (suggesting we are trying to access a just-deleted relation).
1016  * Any other error is reported via elog.
1017  */
1018 static Relation
1019 RelationBuildDesc(Oid targetRelId, bool insertIt)
1020 {
1021  Relation relation;
1022  Oid relid;
1023  HeapTuple pg_class_tuple;
1024  Form_pg_class relp;
1025 
1026  /*
1027  * This function and its subroutines can allocate a good deal of transient
1028  * data in CurrentMemoryContext. Traditionally we've just leaked that
1029  * data, reasoning that the caller's context is at worst of transaction
1030  * scope, and relcache loads shouldn't happen so often that it's essential
1031  * to recover transient data before end of statement/transaction. However
1032  * that's definitely not true in clobber-cache test builds, and perhaps
1033  * it's not true in other cases. If RECOVER_RELATION_BUILD_MEMORY is not
1034  * zero, arrange to allocate the junk in a temporary context that we'll
1035  * free before returning. Make it a child of caller's context so that it
1036  * will get cleaned up appropriately if we error out partway through.
1037  */
1038 #if RECOVER_RELATION_BUILD_MEMORY
1039  MemoryContext tmpcxt;
1040  MemoryContext oldcxt;
1041 
1043  "RelationBuildDesc workspace",
1045  oldcxt = MemoryContextSwitchTo(tmpcxt);
1046 #endif
1047 
1048  /*
1049  * find the tuple in pg_class corresponding to the given relation id
1050  */
1051  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1052 
1053  /*
1054  * if no such tuple exists, return NULL
1055  */
1056  if (!HeapTupleIsValid(pg_class_tuple))
1057  {
1058 #if RECOVER_RELATION_BUILD_MEMORY
1059  /* Return to caller's context, and blow away the temporary context */
1060  MemoryContextSwitchTo(oldcxt);
1061  MemoryContextDelete(tmpcxt);
1062 #endif
1063  return NULL;
1064  }
1065 
1066  /*
1067  * get information from the pg_class_tuple
1068  */
1069  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1070  relid = relp->oid;
1071  Assert(relid == targetRelId);
1072 
1073  /*
1074  * allocate storage for the relation descriptor, and copy pg_class_tuple
1075  * to relation->rd_rel.
1076  */
1077  relation = AllocateRelationDesc(relp);
1078 
1079  /*
1080  * initialize the relation's relation id (relation->rd_id)
1081  */
1082  RelationGetRelid(relation) = relid;
1083 
1084  /*
1085  * normal relations are not nailed into the cache; nor can a pre-existing
1086  * relation be new. It could be temp though. (Actually, it could be new
1087  * too, but it's okay to forget that fact if forced to flush the entry.)
1088  */
1089  relation->rd_refcnt = 0;
1090  relation->rd_isnailed = false;
1093  switch (relation->rd_rel->relpersistence)
1094  {
1095  case RELPERSISTENCE_UNLOGGED:
1096  case RELPERSISTENCE_PERMANENT:
1097  relation->rd_backend = InvalidBackendId;
1098  relation->rd_islocaltemp = false;
1099  break;
1100  case RELPERSISTENCE_TEMP:
1101  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1102  {
1103  relation->rd_backend = BackendIdForTempRelations();
1104  relation->rd_islocaltemp = true;
1105  }
1106  else
1107  {
1108  /*
1109  * If it's a temp table, but not one of ours, we have to use
1110  * the slow, grotty method to figure out the owning backend.
1111  *
1112  * Note: it's possible that rd_backend gets set to MyBackendId
1113  * here, in case we are looking at a pg_class entry left over
1114  * from a crashed backend that coincidentally had the same
1115  * BackendId we're using. We should *not* consider such a
1116  * table to be "ours"; this is why we need the separate
1117  * rd_islocaltemp flag. The pg_class entry will get flushed
1118  * if/when we clean out the corresponding temp table namespace
1119  * in preparation for using it.
1120  */
1121  relation->rd_backend =
1122  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1123  Assert(relation->rd_backend != InvalidBackendId);
1124  relation->rd_islocaltemp = false;
1125  }
1126  break;
1127  default:
1128  elog(ERROR, "invalid relpersistence: %c",
1129  relation->rd_rel->relpersistence);
1130  break;
1131  }
1132 
1133  /*
1134  * initialize the tuple descriptor (relation->rd_att).
1135  */
1136  RelationBuildTupleDesc(relation);
1137 
1138  /*
1139  * Fetch rules and triggers that affect this relation
1140  */
1141  if (relation->rd_rel->relhasrules)
1142  RelationBuildRuleLock(relation);
1143  else
1144  {
1145  relation->rd_rules = NULL;
1146  relation->rd_rulescxt = NULL;
1147  }
1148 
1149  if (relation->rd_rel->relhastriggers)
1150  RelationBuildTriggers(relation);
1151  else
1152  relation->trigdesc = NULL;
1153 
1154  if (relation->rd_rel->relrowsecurity)
1155  RelationBuildRowSecurity(relation);
1156  else
1157  relation->rd_rsdesc = NULL;
1158 
1159  /* foreign key data is not loaded till asked for */
1160  relation->rd_fkeylist = NIL;
1161  relation->rd_fkeyvalid = false;
1162 
1163  /* partitioning data is not loaded till asked for */
1164  relation->rd_partkey = NULL;
1165  relation->rd_partkeycxt = NULL;
1166  relation->rd_partdesc = NULL;
1167  relation->rd_pdcxt = NULL;
1168  relation->rd_partcheck = NIL;
1169  relation->rd_partcheckvalid = false;
1170  relation->rd_partcheckcxt = NULL;
1171 
1172  /*
1173  * initialize access method information
1174  */
1175  switch (relation->rd_rel->relkind)
1176  {
1177  case RELKIND_INDEX:
1178  case RELKIND_PARTITIONED_INDEX:
1179  Assert(relation->rd_rel->relam != InvalidOid);
1180  RelationInitIndexAccessInfo(relation);
1181  break;
1182  case RELKIND_RELATION:
1183  case RELKIND_TOASTVALUE:
1184  case RELKIND_MATVIEW:
1185  Assert(relation->rd_rel->relam != InvalidOid);
1187  break;
1188  case RELKIND_SEQUENCE:
1189  Assert(relation->rd_rel->relam == InvalidOid);
1191  break;
1192  case RELKIND_VIEW:
1193  case RELKIND_COMPOSITE_TYPE:
1194  case RELKIND_FOREIGN_TABLE:
1195  case RELKIND_PARTITIONED_TABLE:
1196  Assert(relation->rd_rel->relam == InvalidOid);
1197  break;
1198  }
1199 
1200  /* extract reloptions if any */
1201  RelationParseRelOptions(relation, pg_class_tuple);
1202 
1203  /*
1204  * initialize the relation lock manager information
1205  */
1206  RelationInitLockInfo(relation); /* see lmgr.c */
1207 
1208  /*
1209  * initialize physical addressing information for the relation
1210  */
1211  RelationInitPhysicalAddr(relation);
1212 
1213  /* make sure relation is marked as having no open file yet */
1214  relation->rd_smgr = NULL;
1215 
1216  /*
1217  * now we can free the memory allocated for pg_class_tuple
1218  */
1219  heap_freetuple(pg_class_tuple);
1220 
1221  /*
1222  * Insert newly created relation into relcache hash table, if requested.
1223  *
1224  * There is one scenario in which we might find a hashtable entry already
1225  * present, even though our caller failed to find it: if the relation is a
1226  * system catalog or index that's used during relcache load, we might have
1227  * recursively created the same relcache entry during the preceding steps.
1228  * So allow RelationCacheInsert to delete any already-present relcache
1229  * entry for the same OID. The already-present entry should have refcount
1230  * zero (else somebody forgot to close it); in the event that it doesn't,
1231  * we'll elog a WARNING and leak the already-present entry.
1232  */
1233  if (insertIt)
1234  RelationCacheInsert(relation, true);
1235 
1236  /* It's fully valid */
1237  relation->rd_isvalid = true;
1238 
1239 #if RECOVER_RELATION_BUILD_MEMORY
1240  /* Return to caller's context, and blow away the temporary context */
1241  MemoryContextSwitchTo(oldcxt);
1242  MemoryContextDelete(tmpcxt);
1243 #endif
1244 
1245  return relation;
1246 }
1247 
1248 /*
1249  * Initialize the physical addressing info (RelFileNode) for a relcache entry
1250  *
1251  * Note: at the physical level, relations in the pg_global tablespace must
1252  * be treated as shared, even if relisshared isn't set. Hence we do not
1253  * look at relisshared here.
1254  */
1255 static void
1257 {
1258  /* these relations kinds never have storage */
1259  if (!RELKIND_HAS_STORAGE(relation->rd_rel->relkind))
1260  return;
1261 
1262  if (relation->rd_rel->reltablespace)
1263  relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1264  else
1265  relation->rd_node.spcNode = MyDatabaseTableSpace;
1266  if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1267  relation->rd_node.dbNode = InvalidOid;
1268  else
1269  relation->rd_node.dbNode = MyDatabaseId;
1270 
1271  if (relation->rd_rel->relfilenode)
1272  {
1273  /*
1274  * Even if we are using a decoding snapshot that doesn't represent the
1275  * current state of the catalog we need to make sure the filenode
1276  * points to the current file since the older file will be gone (or
1277  * truncated). The new file will still contain older rows so lookups
1278  * in them will work correctly. This wouldn't work correctly if
1279  * rewrites were allowed to change the schema in an incompatible way,
1280  * but those are prevented both on catalog tables and on user tables
1281  * declared as additional catalog tables.
1282  */
1285  && IsTransactionState())
1286  {
1287  HeapTuple phys_tuple;
1288  Form_pg_class physrel;
1289 
1290  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1291  RelationGetRelid(relation) != ClassOidIndexId,
1292  true);
1293  if (!HeapTupleIsValid(phys_tuple))
1294  elog(ERROR, "could not find pg_class entry for %u",
1295  RelationGetRelid(relation));
1296  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1297 
1298  relation->rd_rel->reltablespace = physrel->reltablespace;
1299  relation->rd_rel->relfilenode = physrel->relfilenode;
1300  heap_freetuple(phys_tuple);
1301  }
1302 
1303  relation->rd_node.relNode = relation->rd_rel->relfilenode;
1304  }
1305  else
1306  {
1307  /* Consult the relation mapper */
1308  relation->rd_node.relNode =
1309  RelationMapOidToFilenode(relation->rd_id,
1310  relation->rd_rel->relisshared);
1311  if (!OidIsValid(relation->rd_node.relNode))
1312  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1313  RelationGetRelationName(relation), relation->rd_id);
1314  }
1315 }
1316 
1317 /*
1318  * Fill in the IndexAmRoutine for an index relation.
1319  *
1320  * relation's rd_amhandler and rd_indexcxt must be valid already.
1321  */
1322 static void
1324 {
1325  IndexAmRoutine *cached,
1326  *tmp;
1327 
1328  /*
1329  * Call the amhandler in current, short-lived memory context, just in case
1330  * it leaks anything (it probably won't, but let's be paranoid).
1331  */
1332  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1333 
1334  /* OK, now transfer the data into relation's rd_indexcxt. */
1335  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1336  sizeof(IndexAmRoutine));
1337  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1338  relation->rd_indam = cached;
1339 
1340  pfree(tmp);
1341 }
1342 
1343 /*
1344  * Initialize index-access-method support data for an index relation
1345  */
1346 void
1348 {
1349  HeapTuple tuple;
1350  Form_pg_am aform;
1351  Datum indcollDatum;
1352  Datum indclassDatum;
1353  Datum indoptionDatum;
1354  bool isnull;
1355  oidvector *indcoll;
1356  oidvector *indclass;
1357  int2vector *indoption;
1358  MemoryContext indexcxt;
1359  MemoryContext oldcontext;
1360  int indnatts;
1361  int indnkeyatts;
1362  uint16 amsupport;
1363 
1364  /*
1365  * Make a copy of the pg_index entry for the index. Since pg_index
1366  * contains variable-length and possibly-null fields, we have to do this
1367  * honestly rather than just treating it as a Form_pg_index struct.
1368  */
1369  tuple = SearchSysCache1(INDEXRELID,
1370  ObjectIdGetDatum(RelationGetRelid(relation)));
1371  if (!HeapTupleIsValid(tuple))
1372  elog(ERROR, "cache lookup failed for index %u",
1373  RelationGetRelid(relation));
1375  relation->rd_indextuple = heap_copytuple(tuple);
1376  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1377  MemoryContextSwitchTo(oldcontext);
1378  ReleaseSysCache(tuple);
1379 
1380  /*
1381  * Look up the index's access method, save the OID of its handler function
1382  */
1383  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1384  if (!HeapTupleIsValid(tuple))
1385  elog(ERROR, "cache lookup failed for access method %u",
1386  relation->rd_rel->relam);
1387  aform = (Form_pg_am) GETSTRUCT(tuple);
1388  relation->rd_amhandler = aform->amhandler;
1389  ReleaseSysCache(tuple);
1390 
1391  indnatts = RelationGetNumberOfAttributes(relation);
1392  if (indnatts != IndexRelationGetNumberOfAttributes(relation))
1393  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1394  RelationGetRelid(relation));
1395  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(relation);
1396 
1397  /*
1398  * Make the private context to hold index access info. The reason we need
1399  * a context, and not just a couple of pallocs, is so that we won't leak
1400  * any subsidiary info attached to fmgr lookup records.
1401  */
1403  "index info",
1405  relation->rd_indexcxt = indexcxt;
1407  RelationGetRelationName(relation));
1408 
1409  /*
1410  * Now we can fetch the index AM's API struct
1411  */
1412  InitIndexAmRoutine(relation);
1413 
1414  /*
1415  * Allocate arrays to hold data. Opclasses are not used for included
1416  * columns, so allocate them for indnkeyatts only.
1417  */
1418  relation->rd_opfamily = (Oid *)
1419  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1420  relation->rd_opcintype = (Oid *)
1421  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1422 
1423  amsupport = relation->rd_indam->amsupport;
1424  if (amsupport > 0)
1425  {
1426  int nsupport = indnatts * amsupport;
1427 
1428  relation->rd_support = (RegProcedure *)
1429  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1430  relation->rd_supportinfo = (FmgrInfo *)
1431  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1432  }
1433  else
1434  {
1435  relation->rd_support = NULL;
1436  relation->rd_supportinfo = NULL;
1437  }
1438 
1439  relation->rd_indcollation = (Oid *)
1440  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(Oid));
1441 
1442  relation->rd_indoption = (int16 *)
1443  MemoryContextAllocZero(indexcxt, indnkeyatts * sizeof(int16));
1444 
1445  /*
1446  * indcollation cannot be referenced directly through the C struct,
1447  * because it comes after the variable-width indkey field. Must extract
1448  * the datum the hard way...
1449  */
1450  indcollDatum = fastgetattr(relation->rd_indextuple,
1451  Anum_pg_index_indcollation,
1453  &isnull);
1454  Assert(!isnull);
1455  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1456  memcpy(relation->rd_indcollation, indcoll->values, indnkeyatts * sizeof(Oid));
1457 
1458  /*
1459  * indclass cannot be referenced directly through the C struct, because it
1460  * comes after the variable-width indkey field. Must extract the datum
1461  * the hard way...
1462  */
1463  indclassDatum = fastgetattr(relation->rd_indextuple,
1464  Anum_pg_index_indclass,
1466  &isnull);
1467  Assert(!isnull);
1468  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1469 
1470  /*
1471  * Fill the support procedure OID array, as well as the info about
1472  * opfamilies and opclass input types. (aminfo and supportinfo are left
1473  * as zeroes, and are filled on-the-fly when used)
1474  */
1475  IndexSupportInitialize(indclass, relation->rd_support,
1476  relation->rd_opfamily, relation->rd_opcintype,
1477  amsupport, indnkeyatts);
1478 
1479  /*
1480  * Similarly extract indoption and copy it to the cache entry
1481  */
1482  indoptionDatum = fastgetattr(relation->rd_indextuple,
1483  Anum_pg_index_indoption,
1485  &isnull);
1486  Assert(!isnull);
1487  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1488  memcpy(relation->rd_indoption, indoption->values, indnkeyatts * sizeof(int16));
1489 
1490  /*
1491  * expressions, predicate, exclusion caches will be filled later
1492  */
1493  relation->rd_indexprs = NIL;
1494  relation->rd_indpred = NIL;
1495  relation->rd_exclops = NULL;
1496  relation->rd_exclprocs = NULL;
1497  relation->rd_exclstrats = NULL;
1498  relation->rd_amcache = NULL;
1499 }
1500 
1501 /*
1502  * IndexSupportInitialize
1503  * Initializes an index's cached opclass information,
1504  * given the index's pg_index.indclass entry.
1505  *
1506  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1507  * which are arrays allocated by the caller.
1508  *
1509  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1510  * indicate the size of the arrays it has allocated --- but in practice these
1511  * numbers must always match those obtainable from the system catalog entries
1512  * for the index and access method.
1513  */
1514 static void
1516  RegProcedure *indexSupport,
1517  Oid *opFamily,
1518  Oid *opcInType,
1519  StrategyNumber maxSupportNumber,
1520  AttrNumber maxAttributeNumber)
1521 {
1522  int attIndex;
1523 
1524  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1525  {
1526  OpClassCacheEnt *opcentry;
1527 
1528  if (!OidIsValid(indclass->values[attIndex]))
1529  elog(ERROR, "bogus pg_index tuple");
1530 
1531  /* look up the info for this opclass, using a cache */
1532  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1533  maxSupportNumber);
1534 
1535  /* copy cached data into relcache entry */
1536  opFamily[attIndex] = opcentry->opcfamily;
1537  opcInType[attIndex] = opcentry->opcintype;
1538  if (maxSupportNumber > 0)
1539  memcpy(&indexSupport[attIndex * maxSupportNumber],
1540  opcentry->supportProcs,
1541  maxSupportNumber * sizeof(RegProcedure));
1542  }
1543 }
1544 
1545 /*
1546  * LookupOpclassInfo
1547  *
1548  * This routine maintains a per-opclass cache of the information needed
1549  * by IndexSupportInitialize(). This is more efficient than relying on
1550  * the catalog cache, because we can load all the info about a particular
1551  * opclass in a single indexscan of pg_amproc.
1552  *
1553  * The information from pg_am about expected range of support function
1554  * numbers is passed in, rather than being looked up, mainly because the
1555  * caller will have it already.
1556  *
1557  * Note there is no provision for flushing the cache. This is OK at the
1558  * moment because there is no way to ALTER any interesting properties of an
1559  * existing opclass --- all you can do is drop it, which will result in
1560  * a useless but harmless dead entry in the cache. To support altering
1561  * opclass membership (not the same as opfamily membership!), we'd need to
1562  * be able to flush this cache as well as the contents of relcache entries
1563  * for indexes.
1564  */
1565 static OpClassCacheEnt *
1566 LookupOpclassInfo(Oid operatorClassOid,
1567  StrategyNumber numSupport)
1568 {
1569  OpClassCacheEnt *opcentry;
1570  bool found;
1571  Relation rel;
1572  SysScanDesc scan;
1573  ScanKeyData skey[3];
1574  HeapTuple htup;
1575  bool indexOK;
1576 
1577  if (OpClassCache == NULL)
1578  {
1579  /* First time through: initialize the opclass cache */
1580  HASHCTL ctl;
1581 
1582  MemSet(&ctl, 0, sizeof(ctl));
1583  ctl.keysize = sizeof(Oid);
1584  ctl.entrysize = sizeof(OpClassCacheEnt);
1585  OpClassCache = hash_create("Operator class cache", 64,
1586  &ctl, HASH_ELEM | HASH_BLOBS);
1587 
1588  /* Also make sure CacheMemoryContext exists */
1589  if (!CacheMemoryContext)
1591  }
1592 
1593  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1594  (void *) &operatorClassOid,
1595  HASH_ENTER, &found);
1596 
1597  if (!found)
1598  {
1599  /* Need to allocate memory for new entry */
1600  opcentry->valid = false; /* until known OK */
1601  opcentry->numSupport = numSupport;
1602 
1603  if (numSupport > 0)
1604  opcentry->supportProcs = (RegProcedure *)
1606  numSupport * sizeof(RegProcedure));
1607  else
1608  opcentry->supportProcs = NULL;
1609  }
1610  else
1611  {
1612  Assert(numSupport == opcentry->numSupport);
1613  }
1614 
1615  /*
1616  * When testing for cache-flush hazards, we intentionally disable the
1617  * operator class cache and force reloading of the info on each call. This
1618  * is helpful because we want to test the case where a cache flush occurs
1619  * while we are loading the info, and it's very hard to provoke that if
1620  * this happens only once per opclass per backend.
1621  */
1622 #if defined(CLOBBER_CACHE_ALWAYS)
1623  opcentry->valid = false;
1624 #endif
1625 
1626  if (opcentry->valid)
1627  return opcentry;
1628 
1629  /*
1630  * Need to fill in new entry.
1631  *
1632  * To avoid infinite recursion during startup, force heap scans if we're
1633  * looking up info for the opclasses used by the indexes we would like to
1634  * reference here.
1635  */
1636  indexOK = criticalRelcachesBuilt ||
1637  (operatorClassOid != OID_BTREE_OPS_OID &&
1638  operatorClassOid != INT2_BTREE_OPS_OID);
1639 
1640  /*
1641  * We have to fetch the pg_opclass row to determine its opfamily and
1642  * opcintype, which are needed to look up related operators and functions.
1643  * It'd be convenient to use the syscache here, but that probably doesn't
1644  * work while bootstrapping.
1645  */
1646  ScanKeyInit(&skey[0],
1647  Anum_pg_opclass_oid,
1648  BTEqualStrategyNumber, F_OIDEQ,
1649  ObjectIdGetDatum(operatorClassOid));
1650  rel = table_open(OperatorClassRelationId, AccessShareLock);
1651  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1652  NULL, 1, skey);
1653 
1654  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1655  {
1656  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1657 
1658  opcentry->opcfamily = opclassform->opcfamily;
1659  opcentry->opcintype = opclassform->opcintype;
1660  }
1661  else
1662  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1663 
1664  systable_endscan(scan);
1666 
1667  /*
1668  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1669  * the default ones (those with lefttype = righttype = opcintype).
1670  */
1671  if (numSupport > 0)
1672  {
1673  ScanKeyInit(&skey[0],
1674  Anum_pg_amproc_amprocfamily,
1675  BTEqualStrategyNumber, F_OIDEQ,
1676  ObjectIdGetDatum(opcentry->opcfamily));
1677  ScanKeyInit(&skey[1],
1678  Anum_pg_amproc_amproclefttype,
1679  BTEqualStrategyNumber, F_OIDEQ,
1680  ObjectIdGetDatum(opcentry->opcintype));
1681  ScanKeyInit(&skey[2],
1682  Anum_pg_amproc_amprocrighttype,
1683  BTEqualStrategyNumber, F_OIDEQ,
1684  ObjectIdGetDatum(opcentry->opcintype));
1685  rel = table_open(AccessMethodProcedureRelationId, AccessShareLock);
1686  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1687  NULL, 3, skey);
1688 
1689  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1690  {
1691  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1692 
1693  if (amprocform->amprocnum <= 0 ||
1694  (StrategyNumber) amprocform->amprocnum > numSupport)
1695  elog(ERROR, "invalid amproc number %d for opclass %u",
1696  amprocform->amprocnum, operatorClassOid);
1697 
1698  opcentry->supportProcs[amprocform->amprocnum - 1] =
1699  amprocform->amproc;
1700  }
1701 
1702  systable_endscan(scan);
1704  }
1705 
1706  opcentry->valid = true;
1707  return opcentry;
1708 }
1709 
1710 /*
1711  * Fill in the TableAmRoutine for a relation
1712  *
1713  * relation's rd_amhandler must be valid already.
1714  */
1715 static void
1717 {
1718  relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
1719 }
1720 
1721 /*
1722  * Initialize table access method support for a table like relation
1723  */
1724 void
1726 {
1727  HeapTuple tuple;
1728  Form_pg_am aform;
1729 
1730  if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
1731  {
1732  /*
1733  * Sequences are currently accessed like heap tables, but it doesn't
1734  * seem prudent to show that in the catalog. So just overwrite it
1735  * here.
1736  */
1737  relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1738  }
1739  else if (IsCatalogRelation(relation))
1740  {
1741  /*
1742  * Avoid doing a syscache lookup for catalog tables.
1743  */
1744  Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
1745  relation->rd_amhandler = HEAP_TABLE_AM_HANDLER_OID;
1746  }
1747  else
1748  {
1749  /*
1750  * Look up the table access method, save the OID of its handler
1751  * function.
1752  */
1753  Assert(relation->rd_rel->relam != InvalidOid);
1754  tuple = SearchSysCache1(AMOID,
1755  ObjectIdGetDatum(relation->rd_rel->relam));
1756  if (!HeapTupleIsValid(tuple))
1757  elog(ERROR, "cache lookup failed for access method %u",
1758  relation->rd_rel->relam);
1759  aform = (Form_pg_am) GETSTRUCT(tuple);
1760  relation->rd_amhandler = aform->amhandler;
1761  ReleaseSysCache(tuple);
1762  }
1763 
1764  /*
1765  * Now we can fetch the table AM's API struct
1766  */
1767  InitTableAmRoutine(relation);
1768 }
1769 
1770 /*
1771  * formrdesc
1772  *
1773  * This is a special cut-down version of RelationBuildDesc(),
1774  * used while initializing the relcache.
1775  * The relation descriptor is built just from the supplied parameters,
1776  * without actually looking at any system table entries. We cheat
1777  * quite a lot since we only need to work for a few basic system
1778  * catalogs.
1779  *
1780  * The catalogs this is used for can't have constraints (except attnotnull),
1781  * default values, rules, or triggers, since we don't cope with any of that.
1782  * (Well, actually, this only matters for properties that need to be valid
1783  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1784  * these properties matter then...)
1785  *
1786  * NOTE: we assume we are already switched into CacheMemoryContext.
1787  */
1788 static void
1789 formrdesc(const char *relationName, Oid relationReltype,
1790  bool isshared,
1791  int natts, const FormData_pg_attribute *attrs)
1792 {
1793  Relation relation;
1794  int i;
1795  bool has_not_null;
1796 
1797  /*
1798  * allocate new relation desc, clear all fields of reldesc
1799  */
1800  relation = (Relation) palloc0(sizeof(RelationData));
1801 
1802  /* make sure relation is marked as having no open file yet */
1803  relation->rd_smgr = NULL;
1804 
1805  /*
1806  * initialize reference count: 1 because it is nailed in cache
1807  */
1808  relation->rd_refcnt = 1;
1809 
1810  /*
1811  * all entries built with this routine are nailed-in-cache; none are for
1812  * new or temp relations.
1813  */
1814  relation->rd_isnailed = true;
1817  relation->rd_backend = InvalidBackendId;
1818  relation->rd_islocaltemp = false;
1819 
1820  /*
1821  * initialize relation tuple form
1822  *
1823  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1824  * get us launched. RelationCacheInitializePhase3() will read the real
1825  * data from pg_class and replace what we've done here. Note in
1826  * particular that relowner is left as zero; this cues
1827  * RelationCacheInitializePhase3 that the real data isn't there yet.
1828  */
1830 
1831  namestrcpy(&relation->rd_rel->relname, relationName);
1832  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1833  relation->rd_rel->reltype = relationReltype;
1834 
1835  /*
1836  * It's important to distinguish between shared and non-shared relations,
1837  * even at bootstrap time, to make sure we know where they are stored.
1838  */
1839  relation->rd_rel->relisshared = isshared;
1840  if (isshared)
1841  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1842 
1843  /* formrdesc is used only for permanent relations */
1844  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1845 
1846  /* ... and they're always populated, too */
1847  relation->rd_rel->relispopulated = true;
1848 
1849  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1850  relation->rd_rel->relpages = 0;
1851  relation->rd_rel->reltuples = 0;
1852  relation->rd_rel->relallvisible = 0;
1853  relation->rd_rel->relkind = RELKIND_RELATION;
1854  relation->rd_rel->relnatts = (int16) natts;
1855  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1856 
1857  /*
1858  * initialize attribute tuple form
1859  *
1860  * Unlike the case with the relation tuple, this data had better be right
1861  * because it will never be replaced. The data comes from
1862  * src/include/catalog/ headers via genbki.pl.
1863  */
1864  relation->rd_att = CreateTemplateTupleDesc(natts);
1865  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1866 
1867  relation->rd_att->tdtypeid = relationReltype;
1868  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1869 
1870  /*
1871  * initialize tuple desc info
1872  */
1873  has_not_null = false;
1874  for (i = 0; i < natts; i++)
1875  {
1876  memcpy(TupleDescAttr(relation->rd_att, i),
1877  &attrs[i],
1879  has_not_null |= attrs[i].attnotnull;
1880  /* make sure attcacheoff is valid */
1881  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1882  }
1883 
1884  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1885  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1886 
1887  /* mark not-null status */
1888  if (has_not_null)
1889  {
1890  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1891 
1892  constr->has_not_null = true;
1893  relation->rd_att->constr = constr;
1894  }
1895 
1896  /*
1897  * initialize relation id from info in att array (my, this is ugly)
1898  */
1899  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
1900 
1901  /*
1902  * All relations made with formrdesc are mapped. This is necessarily so
1903  * because there is no other way to know what filenode they currently
1904  * have. In bootstrap mode, add them to the initial relation mapper data,
1905  * specifying that the initial filenode is the same as the OID.
1906  */
1907  relation->rd_rel->relfilenode = InvalidOid;
1910  RelationGetRelid(relation),
1911  isshared, true);
1912 
1913  /*
1914  * initialize the relation lock manager information
1915  */
1916  RelationInitLockInfo(relation); /* see lmgr.c */
1917 
1918  /*
1919  * initialize physical addressing information for the relation
1920  */
1921  RelationInitPhysicalAddr(relation);
1922 
1923  /*
1924  * initialize the table am handler
1925  */
1926  relation->rd_rel->relam = HEAP_TABLE_AM_OID;
1927  relation->rd_tableam = GetHeapamTableAmRoutine();
1928 
1929  /*
1930  * initialize the rel-has-index flag, using hardwired knowledge
1931  */
1933  {
1934  /* In bootstrap mode, we have no indexes */
1935  relation->rd_rel->relhasindex = false;
1936  }
1937  else
1938  {
1939  /* Otherwise, all the rels formrdesc is used for have indexes */
1940  relation->rd_rel->relhasindex = true;
1941  }
1942 
1943  /*
1944  * add new reldesc to relcache
1945  */
1946  RelationCacheInsert(relation, false);
1947 
1948  /* It's fully valid */
1949  relation->rd_isvalid = true;
1950 }
1951 
1952 
1953 /* ----------------------------------------------------------------
1954  * Relation Descriptor Lookup Interface
1955  * ----------------------------------------------------------------
1956  */
1957 
1958 /*
1959  * RelationIdGetRelation
1960  *
1961  * Lookup a reldesc by OID; make one if not already in cache.
1962  *
1963  * Returns NULL if no pg_class row could be found for the given relid
1964  * (suggesting we are trying to access a just-deleted relation).
1965  * Any other error is reported via elog.
1966  *
1967  * NB: caller should already have at least AccessShareLock on the
1968  * relation ID, else there are nasty race conditions.
1969  *
1970  * NB: relation ref count is incremented, or set to 1 if new entry.
1971  * Caller should eventually decrement count. (Usually,
1972  * that happens by calling RelationClose().)
1973  */
1974 Relation
1976 {
1977  Relation rd;
1978 
1979  /* Make sure we're in an xact, even if this ends up being a cache hit */
1981 
1982  /*
1983  * first try to find reldesc in the cache
1984  */
1985  RelationIdCacheLookup(relationId, rd);
1986 
1987  if (RelationIsValid(rd))
1988  {
1990  /* revalidate cache entry if necessary */
1991  if (!rd->rd_isvalid)
1992  {
1993  /*
1994  * Indexes only have a limited number of possible schema changes,
1995  * and we don't want to use the full-blown procedure because it's
1996  * a headache for indexes that reload itself depends on.
1997  */
1998  if (rd->rd_rel->relkind == RELKIND_INDEX ||
1999  rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2001  else
2002  RelationClearRelation(rd, true);
2003 
2004  /*
2005  * Normally entries need to be valid here, but before the relcache
2006  * has been initialized, not enough infrastructure exists to
2007  * perform pg_class lookups. The structure of such entries doesn't
2008  * change, but we still want to update the rd_rel entry. So
2009  * rd_isvalid = false is left in place for a later lookup.
2010  */
2011  Assert(rd->rd_isvalid ||
2013  }
2014  return rd;
2015  }
2016 
2017  /*
2018  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2019  * it.
2020  */
2021  rd = RelationBuildDesc(relationId, true);
2022  if (RelationIsValid(rd))
2024  return rd;
2025 }
2026 
2027 /* ----------------------------------------------------------------
2028  * cache invalidation support routines
2029  * ----------------------------------------------------------------
2030  */
2031 
2032 /*
2033  * RelationIncrementReferenceCount
2034  * Increments relation reference count.
2035  *
2036  * Note: bootstrap mode has its own weird ideas about relation refcount
2037  * behavior; we ought to fix it someday, but for now, just disable
2038  * reference count ownership tracking in bootstrap mode.
2039  */
2040 void
2042 {
2044  rel->rd_refcnt += 1;
2047 }
2048 
2049 /*
2050  * RelationDecrementReferenceCount
2051  * Decrements relation reference count.
2052  */
2053 void
2055 {
2056  Assert(rel->rd_refcnt > 0);
2057  rel->rd_refcnt -= 1;
2060 }
2061 
2062 /*
2063  * RelationClose - close an open relation
2064  *
2065  * Actually, we just decrement the refcount.
2066  *
2067  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2068  * will be freed as soon as their refcount goes to zero. In combination
2069  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2070  * to catch references to already-released relcache entries. It slows
2071  * things down quite a bit, however.
2072  */
2073 void
2075 {
2076  /* Note: no locking manipulations needed */
2078 
2079  /*
2080  * If the relation is no longer open in this session, we can clean up any
2081  * stale partition descriptors it has. This is unlikely, so check to see
2082  * if there are child contexts before expending a call to mcxt.c.
2083  */
2084  if (RelationHasReferenceCountZero(relation) &&
2085  relation->rd_pdcxt != NULL &&
2086  relation->rd_pdcxt->firstchild != NULL)
2088 
2089 #ifdef RELCACHE_FORCE_RELEASE
2090  if (RelationHasReferenceCountZero(relation) &&
2091  relation->rd_createSubid == InvalidSubTransactionId &&
2093  RelationClearRelation(relation, false);
2094 #endif
2095 }
2096 
2097 /*
2098  * RelationReloadIndexInfo - reload minimal information for an open index
2099  *
2100  * This function is used only for indexes. A relcache inval on an index
2101  * can mean that its pg_class or pg_index row changed. There are only
2102  * very limited changes that are allowed to an existing index's schema,
2103  * so we can update the relcache entry without a complete rebuild; which
2104  * is fortunate because we can't rebuild an index entry that is "nailed"
2105  * and/or in active use. We support full replacement of the pg_class row,
2106  * as well as updates of a few simple fields of the pg_index row.
2107  *
2108  * We can't necessarily reread the catalog rows right away; we might be
2109  * in a failed transaction when we receive the SI notification. If so,
2110  * RelationClearRelation just marks the entry as invalid by setting
2111  * rd_isvalid to false. This routine is called to fix the entry when it
2112  * is next needed.
2113  *
2114  * We assume that at the time we are called, we have at least AccessShareLock
2115  * on the target index. (Note: in the calls from RelationClearRelation,
2116  * this is legitimate because we know the rel has positive refcount.)
2117  *
2118  * If the target index is an index on pg_class or pg_index, we'd better have
2119  * previously gotten at least AccessShareLock on its underlying catalog,
2120  * else we are at risk of deadlock against someone trying to exclusive-lock
2121  * the heap and index in that order. This is ensured in current usage by
2122  * only applying this to indexes being opened or having positive refcount.
2123  */
2124 static void
2126 {
2127  bool indexOK;
2128  HeapTuple pg_class_tuple;
2129  Form_pg_class relp;
2130 
2131  /* Should be called only for invalidated indexes */
2132  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
2133  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2134  !relation->rd_isvalid);
2135 
2136  /* Ensure it's closed at smgr level */
2137  RelationCloseSmgr(relation);
2138 
2139  /* Must free any AM cached data upon relcache flush */
2140  if (relation->rd_amcache)
2141  pfree(relation->rd_amcache);
2142  relation->rd_amcache = NULL;
2143 
2144  /*
2145  * If it's a shared index, we might be called before backend startup has
2146  * finished selecting a database, in which case we have no way to read
2147  * pg_class yet. However, a shared index can never have any significant
2148  * schema updates, so it's okay to ignore the invalidation signal. Just
2149  * mark it valid and return without doing anything more.
2150  */
2151  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2152  {
2153  relation->rd_isvalid = true;
2154  return;
2155  }
2156 
2157  /*
2158  * Read the pg_class row
2159  *
2160  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2161  * for pg_class_oid_index ...
2162  */
2163  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2164  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2165  if (!HeapTupleIsValid(pg_class_tuple))
2166  elog(ERROR, "could not find pg_class tuple for index %u",
2167  RelationGetRelid(relation));
2168  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2169  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2170  /* Reload reloptions in case they changed */
2171  if (relation->rd_options)
2172  pfree(relation->rd_options);
2173  RelationParseRelOptions(relation, pg_class_tuple);
2174  /* done with pg_class tuple */
2175  heap_freetuple(pg_class_tuple);
2176  /* We must recalculate physical address in case it changed */
2177  RelationInitPhysicalAddr(relation);
2178 
2179  /*
2180  * For a non-system index, there are fields of the pg_index row that are
2181  * allowed to change, so re-read that row and update the relcache entry.
2182  * Most of the info derived from pg_index (such as support function lookup
2183  * info) cannot change, and indeed the whole point of this routine is to
2184  * update the relcache entry without clobbering that data; so wholesale
2185  * replacement is not appropriate.
2186  */
2187  if (!IsSystemRelation(relation))
2188  {
2189  HeapTuple tuple;
2191 
2192  tuple = SearchSysCache1(INDEXRELID,
2193  ObjectIdGetDatum(RelationGetRelid(relation)));
2194  if (!HeapTupleIsValid(tuple))
2195  elog(ERROR, "cache lookup failed for index %u",
2196  RelationGetRelid(relation));
2197  index = (Form_pg_index) GETSTRUCT(tuple);
2198 
2199  /*
2200  * Basically, let's just copy all the bool fields. There are one or
2201  * two of these that can't actually change in the current code, but
2202  * it's not worth it to track exactly which ones they are. None of
2203  * the array fields are allowed to change, though.
2204  */
2205  relation->rd_index->indisunique = index->indisunique;
2206  relation->rd_index->indisprimary = index->indisprimary;
2207  relation->rd_index->indisexclusion = index->indisexclusion;
2208  relation->rd_index->indimmediate = index->indimmediate;
2209  relation->rd_index->indisclustered = index->indisclustered;
2210  relation->rd_index->indisvalid = index->indisvalid;
2211  relation->rd_index->indcheckxmin = index->indcheckxmin;
2212  relation->rd_index->indisready = index->indisready;
2213  relation->rd_index->indislive = index->indislive;
2214 
2215  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2217  HeapTupleHeaderGetXmin(tuple->t_data));
2218 
2219  ReleaseSysCache(tuple);
2220  }
2221 
2222  /* Okay, now it's valid again */
2223  relation->rd_isvalid = true;
2224 }
2225 
2226 /*
2227  * RelationReloadNailed - reload minimal information for nailed relations.
2228  *
2229  * The structure of a nailed relation can never change (which is good, because
2230  * we rely on knowing their structure to be able to read catalog content). But
2231  * some parts, e.g. pg_class.relfrozenxid, are still important to have
2232  * accurate content for. Therefore those need to be reloaded after the arrival
2233  * of invalidations.
2234  */
2235 static void
2237 {
2238  Assert(relation->rd_isnailed);
2239 
2240  /*
2241  * Redo RelationInitPhysicalAddr in case it is a mapped relation whose
2242  * mapping changed.
2243  */
2244  RelationInitPhysicalAddr(relation);
2245 
2246  /* flag as needing to be revalidated */
2247  relation->rd_isvalid = false;
2248 
2249  /*
2250  * Can only reread catalog contents if in a transaction. If the relation
2251  * is currently open (not counting the nailed refcount), do so
2252  * immediately. Otherwise we've already marked the entry as possibly
2253  * invalid, and it'll be fixed when next opened.
2254  */
2255  if (!IsTransactionState() || relation->rd_refcnt <= 1)
2256  return;
2257 
2258  if (relation->rd_rel->relkind == RELKIND_INDEX)
2259  {
2260  /*
2261  * If it's a nailed-but-not-mapped index, then we need to re-read the
2262  * pg_class row to see if its relfilenode changed.
2263  */
2264  RelationReloadIndexInfo(relation);
2265  }
2266  else
2267  {
2268  /*
2269  * Reload a non-index entry. We can't easily do so if relcaches
2270  * aren't yet built, but that's fine because at that stage the
2271  * attributes that need to be current (like relfrozenxid) aren't yet
2272  * accessed. To ensure the entry will later be revalidated, we leave
2273  * it in invalid state, but allow use (cf. RelationIdGetRelation()).
2274  */
2276  {
2277  HeapTuple pg_class_tuple;
2278  Form_pg_class relp;
2279 
2280  /*
2281  * NB: Mark the entry as valid before starting to scan, to avoid
2282  * self-recursion when re-building pg_class.
2283  */
2284  relation->rd_isvalid = true;
2285 
2286  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation),
2287  true, false);
2288  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2289  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2290  heap_freetuple(pg_class_tuple);
2291 
2292  /*
2293  * Again mark as valid, to protect against concurrently arriving
2294  * invalidations.
2295  */
2296  relation->rd_isvalid = true;
2297  }
2298  }
2299 }
2300 
2301 /*
2302  * RelationDestroyRelation
2303  *
2304  * Physically delete a relation cache entry and all subsidiary data.
2305  * Caller must already have unhooked the entry from the hash table.
2306  */
2307 static void
2308 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2309 {
2311 
2312  /*
2313  * Make sure smgr and lower levels close the relation's files, if they
2314  * weren't closed already. (This was probably done by caller, but let's
2315  * just be real sure.)
2316  */
2317  RelationCloseSmgr(relation);
2318 
2319  /*
2320  * Free all the subsidiary data structures of the relcache entry, then the
2321  * entry itself.
2322  */
2323  if (relation->rd_rel)
2324  pfree(relation->rd_rel);
2325  /* can't use DecrTupleDescRefCount here */
2326  Assert(relation->rd_att->tdrefcount > 0);
2327  if (--relation->rd_att->tdrefcount == 0)
2328  {
2329  /*
2330  * If we Rebuilt a relcache entry during a transaction then its
2331  * possible we did that because the TupDesc changed as the result of
2332  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2333  * possible someone copied that TupDesc, in which case the copy would
2334  * point to free'd memory. So if we rebuild an entry we keep the
2335  * TupDesc around until end of transaction, to be safe.
2336  */
2337  if (remember_tupdesc)
2339  else
2340  FreeTupleDesc(relation->rd_att);
2341  }
2342  FreeTriggerDesc(relation->trigdesc);
2343  list_free_deep(relation->rd_fkeylist);
2344  list_free(relation->rd_indexlist);
2345  bms_free(relation->rd_indexattr);
2346  bms_free(relation->rd_keyattr);
2347  bms_free(relation->rd_pkattr);
2348  bms_free(relation->rd_idattr);
2349  if (relation->rd_pubactions)
2350  pfree(relation->rd_pubactions);
2351  if (relation->rd_options)
2352  pfree(relation->rd_options);
2353  if (relation->rd_indextuple)
2354  pfree(relation->rd_indextuple);
2355  if (relation->rd_amcache)
2356  pfree(relation->rd_amcache);
2357  if (relation->rd_fdwroutine)
2358  pfree(relation->rd_fdwroutine);
2359  if (relation->rd_indexcxt)
2360  MemoryContextDelete(relation->rd_indexcxt);
2361  if (relation->rd_rulescxt)
2362  MemoryContextDelete(relation->rd_rulescxt);
2363  if (relation->rd_rsdesc)
2364  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2365  if (relation->rd_partkeycxt)
2367  if (relation->rd_pdcxt)
2368  MemoryContextDelete(relation->rd_pdcxt);
2369  if (relation->rd_partcheckcxt)
2371  pfree(relation);
2372 }
2373 
2374 /*
2375  * RelationClearRelation
2376  *
2377  * Physically blow away a relation cache entry, or reset it and rebuild
2378  * it from scratch (that is, from catalog entries). The latter path is
2379  * used when we are notified of a change to an open relation (one with
2380  * refcount > 0).
2381  *
2382  * NB: when rebuilding, we'd better hold some lock on the relation,
2383  * else the catalog data we need to read could be changing under us.
2384  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2385  * a sinval reset could happen while we're accessing the catalogs, and
2386  * the rel would get blown away underneath us by RelationCacheInvalidate
2387  * if it has zero refcnt.
2388  *
2389  * The "rebuild" parameter is redundant in current usage because it has
2390  * to match the relation's refcnt status, but we keep it as a crosscheck
2391  * that we're doing what the caller expects.
2392  */
2393 static void
2394 RelationClearRelation(Relation relation, bool rebuild)
2395 {
2396  /*
2397  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2398  * course it would be an equally bad idea to blow away one with nonzero
2399  * refcnt, since that would leave someone somewhere with a dangling
2400  * pointer. All callers are expected to have verified that this holds.
2401  */
2402  Assert(rebuild ?
2403  !RelationHasReferenceCountZero(relation) :
2404  RelationHasReferenceCountZero(relation));
2405 
2406  /*
2407  * Make sure smgr and lower levels close the relation's files, if they
2408  * weren't closed already. If the relation is not getting deleted, the
2409  * next smgr access should reopen the files automatically. This ensures
2410  * that the low-level file access state is updated after, say, a vacuum
2411  * truncation.
2412  */
2413  RelationCloseSmgr(relation);
2414 
2415  /* Free AM cached data, if any */
2416  if (relation->rd_amcache)
2417  pfree(relation->rd_amcache);
2418  relation->rd_amcache = NULL;
2419 
2420  /*
2421  * Treat nailed-in system relations separately, they always need to be
2422  * accessible, so we can't blow them away.
2423  */
2424  if (relation->rd_isnailed)
2425  {
2426  RelationReloadNailed(relation);
2427  return;
2428  }
2429 
2430  /*
2431  * Even non-system indexes should not be blown away if they are open and
2432  * have valid index support information. This avoids problems with active
2433  * use of the index support information. As with nailed indexes, we
2434  * re-read the pg_class row to handle possible physical relocation of the
2435  * index, and we check for pg_index updates too.
2436  */
2437  if ((relation->rd_rel->relkind == RELKIND_INDEX ||
2438  relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) &&
2439  relation->rd_refcnt > 0 &&
2440  relation->rd_indexcxt != NULL)
2441  {
2442  relation->rd_isvalid = false; /* needs to be revalidated */
2443  if (IsTransactionState())
2444  RelationReloadIndexInfo(relation);
2445  return;
2446  }
2447 
2448  /* Mark it invalid until we've finished rebuild */
2449  relation->rd_isvalid = false;
2450 
2451  /*
2452  * If we're really done with the relcache entry, blow it away. But if
2453  * someone is still using it, reconstruct the whole deal without moving
2454  * the physical RelationData record (so that the someone's pointer is
2455  * still valid).
2456  */
2457  if (!rebuild)
2458  {
2459  /* Remove it from the hash table */
2460  RelationCacheDelete(relation);
2461 
2462  /* And release storage */
2463  RelationDestroyRelation(relation, false);
2464  }
2465  else if (!IsTransactionState())
2466  {
2467  /*
2468  * If we're not inside a valid transaction, we can't do any catalog
2469  * access so it's not possible to rebuild yet. Just exit, leaving
2470  * rd_isvalid = false so that the rebuild will occur when the entry is
2471  * next opened.
2472  *
2473  * Note: it's possible that we come here during subtransaction abort,
2474  * and the reason for wanting to rebuild is that the rel is open in
2475  * the outer transaction. In that case it might seem unsafe to not
2476  * rebuild immediately, since whatever code has the rel already open
2477  * will keep on using the relcache entry as-is. However, in such a
2478  * case the outer transaction should be holding a lock that's
2479  * sufficient to prevent any significant change in the rel's schema,
2480  * so the existing entry contents should be good enough for its
2481  * purposes; at worst we might be behind on statistics updates or the
2482  * like. (See also CheckTableNotInUse() and its callers.) These same
2483  * remarks also apply to the cases above where we exit without having
2484  * done RelationReloadIndexInfo() yet.
2485  */
2486  return;
2487  }
2488  else
2489  {
2490  /*
2491  * Our strategy for rebuilding an open relcache entry is to build a
2492  * new entry from scratch, swap its contents with the old entry, and
2493  * finally delete the new entry (along with any infrastructure swapped
2494  * over from the old entry). This is to avoid trouble in case an
2495  * error causes us to lose control partway through. The old entry
2496  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2497  * on next access. Meanwhile it's not any less valid than it was
2498  * before, so any code that might expect to continue accessing it
2499  * isn't hurt by the rebuild failure. (Consider for example a
2500  * subtransaction that ALTERs a table and then gets canceled partway
2501  * through the cache entry rebuild. The outer transaction should
2502  * still see the not-modified cache entry as valid.) The worst
2503  * consequence of an error is leaking the necessarily-unreferenced new
2504  * entry, and this shouldn't happen often enough for that to be a big
2505  * problem.
2506  *
2507  * When rebuilding an open relcache entry, we must preserve ref count,
2508  * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2509  * attempt to preserve the pg_class entry (rd_rel), tupledesc,
2510  * rewrite-rule, partition key, and partition descriptor substructures
2511  * in place, because various places assume that these structures won't
2512  * move while they are working with an open relcache entry. (Note:
2513  * the refcount mechanism for tupledescs might someday allow us to
2514  * remove this hack for the tupledesc.)
2515  *
2516  * Note that this process does not touch CurrentResourceOwner; which
2517  * is good because whatever ref counts the entry may have do not
2518  * necessarily belong to that resource owner.
2519  */
2520  Relation newrel;
2521  Oid save_relid = RelationGetRelid(relation);
2522  bool keep_tupdesc;
2523  bool keep_rules;
2524  bool keep_policies;
2525  bool keep_partkey;
2526 
2527  /* Build temporary entry, but don't link it into hashtable */
2528  newrel = RelationBuildDesc(save_relid, false);
2529  if (newrel == NULL)
2530  {
2531  /*
2532  * We can validly get here, if we're using a historic snapshot in
2533  * which a relation, accessed from outside logical decoding, is
2534  * still invisible. In that case it's fine to just mark the
2535  * relation as invalid and return - it'll fully get reloaded by
2536  * the cache reset at the end of logical decoding (or at the next
2537  * access). During normal processing we don't want to ignore this
2538  * case as it shouldn't happen there, as explained below.
2539  */
2540  if (HistoricSnapshotActive())
2541  return;
2542 
2543  /*
2544  * This shouldn't happen as dropping a relation is intended to be
2545  * impossible if still referenced (cf. CheckTableNotInUse()). But
2546  * if we get here anyway, we can't just delete the relcache entry,
2547  * as it possibly could get accessed later (as e.g. the error
2548  * might get trapped and handled via a subtransaction rollback).
2549  */
2550  elog(ERROR, "relation %u deleted while still in use", save_relid);
2551  }
2552 
2553  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2554  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2555  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2556  /* partkey is immutable once set up, so we can always keep it */
2557  keep_partkey = (relation->rd_partkey != NULL);
2558 
2559  /*
2560  * Perform swapping of the relcache entry contents. Within this
2561  * process the old entry is momentarily invalid, so there *must* be no
2562  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2563  * all-in-line code for safety.
2564  *
2565  * Since the vast majority of fields should be swapped, our method is
2566  * to swap the whole structures and then re-swap those few fields we
2567  * didn't want swapped.
2568  */
2569 #define SWAPFIELD(fldtype, fldname) \
2570  do { \
2571  fldtype _tmp = newrel->fldname; \
2572  newrel->fldname = relation->fldname; \
2573  relation->fldname = _tmp; \
2574  } while (0)
2575 
2576  /* swap all Relation struct fields */
2577  {
2578  RelationData tmpstruct;
2579 
2580  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2581  memcpy(newrel, relation, sizeof(RelationData));
2582  memcpy(relation, &tmpstruct, sizeof(RelationData));
2583  }
2584 
2585  /* rd_smgr must not be swapped, due to back-links from smgr level */
2586  SWAPFIELD(SMgrRelation, rd_smgr);
2587  /* rd_refcnt must be preserved */
2588  SWAPFIELD(int, rd_refcnt);
2589  /* isnailed shouldn't change */
2590  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2591  /* creation sub-XIDs must be preserved */
2592  SWAPFIELD(SubTransactionId, rd_createSubid);
2593  SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2594  /* un-swap rd_rel pointers, swap contents instead */
2595  SWAPFIELD(Form_pg_class, rd_rel);
2596  /* ... but actually, we don't have to update newrel->rd_rel */
2597  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2598  /* preserve old tupledesc, rules, policies if no logical change */
2599  if (keep_tupdesc)
2600  SWAPFIELD(TupleDesc, rd_att);
2601  if (keep_rules)
2602  {
2603  SWAPFIELD(RuleLock *, rd_rules);
2604  SWAPFIELD(MemoryContext, rd_rulescxt);
2605  }
2606  if (keep_policies)
2607  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2608  /* toast OID override must be preserved */
2609  SWAPFIELD(Oid, rd_toastoid);
2610  /* pgstat_info must be preserved */
2611  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2612  /* preserve old partition key if we have one */
2613  if (keep_partkey)
2614  {
2615  SWAPFIELD(PartitionKey, rd_partkey);
2616  SWAPFIELD(MemoryContext, rd_partkeycxt);
2617  }
2618  if (newrel->rd_pdcxt != NULL)
2619  {
2620  /*
2621  * We are rebuilding a partitioned relation with a non-zero
2622  * reference count, so we must keep the old partition descriptor
2623  * around, in case there's a PartitionDirectory with a pointer to
2624  * it. This means we can't free the old rd_pdcxt yet. (This is
2625  * necessary because RelationGetPartitionDesc hands out direct
2626  * pointers to the relcache's data structure, unlike our usual
2627  * practice which is to hand out copies. We'd have the same
2628  * problem with rd_partkey, except that we always preserve that
2629  * once created.)
2630  *
2631  * To ensure that it's not leaked completely, re-attach it to the
2632  * new reldesc, or make it a child of the new reldesc's rd_pdcxt
2633  * in the unlikely event that there is one already. (Compare hack
2634  * in RelationBuildPartitionDesc.) RelationClose will clean up
2635  * any such contexts once the reference count reaches zero.
2636  *
2637  * In the case where the reference count is zero, this code is not
2638  * reached, which should be OK because in that case there should
2639  * be no PartitionDirectory with a pointer to the old entry.
2640  *
2641  * Note that newrel and relation have already been swapped, so the
2642  * "old" partition descriptor is actually the one hanging off of
2643  * newrel.
2644  */
2645  relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */
2646  if (relation->rd_pdcxt != NULL) /* probably never happens */
2647  MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt);
2648  else
2649  relation->rd_pdcxt = newrel->rd_pdcxt;
2650  /* drop newrel's pointers so we don't destroy it below */
2651  newrel->rd_partdesc = NULL;
2652  newrel->rd_pdcxt = NULL;
2653  }
2654 
2655 #undef SWAPFIELD
2656 
2657  /* And now we can throw away the temporary entry */
2658  RelationDestroyRelation(newrel, !keep_tupdesc);
2659  }
2660 }
2661 
2662 /*
2663  * RelationFlushRelation
2664  *
2665  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2666  * This is used when we receive a cache invalidation event for the rel.
2667  */
2668 static void
2670 {
2671  if (relation->rd_createSubid != InvalidSubTransactionId ||
2673  {
2674  /*
2675  * New relcache entries are always rebuilt, not flushed; else we'd
2676  * forget the "new" status of the relation, which is a useful
2677  * optimization to have. Ditto for the new-relfilenode status.
2678  *
2679  * The rel could have zero refcnt here, so temporarily increment the
2680  * refcnt to ensure it's safe to rebuild it. We can assume that the
2681  * current transaction has some lock on the rel already.
2682  */
2684  RelationClearRelation(relation, true);
2686  }
2687  else
2688  {
2689  /*
2690  * Pre-existing rels can be dropped from the relcache if not open.
2691  */
2692  bool rebuild = !RelationHasReferenceCountZero(relation);
2693 
2694  RelationClearRelation(relation, rebuild);
2695  }
2696 }
2697 
2698 /*
2699  * RelationForgetRelation - unconditionally remove a relcache entry
2700  *
2701  * External interface for destroying a relcache entry when we
2702  * drop the relation.
2703  */
2704 void
2706 {
2707  Relation relation;
2708 
2709  RelationIdCacheLookup(rid, relation);
2710 
2711  if (!PointerIsValid(relation))
2712  return; /* not in cache, nothing to do */
2713 
2714  if (!RelationHasReferenceCountZero(relation))
2715  elog(ERROR, "relation %u is still open", rid);
2716 
2717  /* Unconditionally destroy the relcache entry */
2718  RelationClearRelation(relation, false);
2719 }
2720 
2721 /*
2722  * RelationCacheInvalidateEntry
2723  *
2724  * This routine is invoked for SI cache flush messages.
2725  *
2726  * Any relcache entry matching the relid must be flushed. (Note: caller has
2727  * already determined that the relid belongs to our database or is a shared
2728  * relation.)
2729  *
2730  * We used to skip local relations, on the grounds that they could
2731  * not be targets of cross-backend SI update messages; but it seems
2732  * safer to process them, so that our *own* SI update messages will
2733  * have the same effects during CommandCounterIncrement for both
2734  * local and nonlocal relations.
2735  */
2736 void
2738 {
2739  Relation relation;
2740 
2741  RelationIdCacheLookup(relationId, relation);
2742 
2743  if (PointerIsValid(relation))
2744  {
2746  RelationFlushRelation(relation);
2747  }
2748 }
2749 
2750 /*
2751  * RelationCacheInvalidate
2752  * Blow away cached relation descriptors that have zero reference counts,
2753  * and rebuild those with positive reference counts. Also reset the smgr
2754  * relation cache and re-read relation mapping data.
2755  *
2756  * This is currently used only to recover from SI message buffer overflow,
2757  * so we do not touch new-in-transaction relations; they cannot be targets
2758  * of cross-backend SI updates (and our own updates now go through a
2759  * separate linked list that isn't limited by the SI message buffer size).
2760  * Likewise, we need not discard new-relfilenode-in-transaction hints,
2761  * since any invalidation of those would be a local event.
2762  *
2763  * We do this in two phases: the first pass deletes deletable items, and
2764  * the second one rebuilds the rebuildable items. This is essential for
2765  * safety, because hash_seq_search only copes with concurrent deletion of
2766  * the element it is currently visiting. If a second SI overflow were to
2767  * occur while we are walking the table, resulting in recursive entry to
2768  * this routine, we could crash because the inner invocation blows away
2769  * the entry next to be visited by the outer scan. But this way is OK,
2770  * because (a) during the first pass we won't process any more SI messages,
2771  * so hash_seq_search will complete safely; (b) during the second pass we
2772  * only hold onto pointers to nondeletable entries.
2773  *
2774  * The two-phase approach also makes it easy to update relfilenodes for
2775  * mapped relations before we do anything else, and to ensure that the
2776  * second pass processes nailed-in-cache items before other nondeletable
2777  * items. This should ensure that system catalogs are up to date before
2778  * we attempt to use them to reload information about other open relations.
2779  */
2780 void
2782 {
2784  RelIdCacheEnt *idhentry;
2785  Relation relation;
2786  List *rebuildFirstList = NIL;
2787  List *rebuildList = NIL;
2788  ListCell *l;
2789 
2790  /*
2791  * Reload relation mapping data before starting to reconstruct cache.
2792  */
2794 
2795  /* Phase 1 */
2796  hash_seq_init(&status, RelationIdCache);
2797 
2798  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2799  {
2800  relation = idhentry->reldesc;
2801 
2802  /* Must close all smgr references to avoid leaving dangling ptrs */
2803  RelationCloseSmgr(relation);
2804 
2805  /*
2806  * Ignore new relations; no other backend will manipulate them before
2807  * we commit. Likewise, before replacing a relation's relfilenode, we
2808  * shall have acquired AccessExclusiveLock and drained any applicable
2809  * pending invalidations.
2810  */
2811  if (relation->rd_createSubid != InvalidSubTransactionId ||
2813  continue;
2814 
2816 
2817  if (RelationHasReferenceCountZero(relation))
2818  {
2819  /* Delete this entry immediately */
2820  Assert(!relation->rd_isnailed);
2821  RelationClearRelation(relation, false);
2822  }
2823  else
2824  {
2825  /*
2826  * If it's a mapped relation, immediately update its rd_node in
2827  * case its relfilenode changed. We must do this during phase 1
2828  * in case the relation is consulted during rebuild of other
2829  * relcache entries in phase 2. It's safe since consulting the
2830  * map doesn't involve any access to relcache entries.
2831  */
2832  if (RelationIsMapped(relation))
2833  RelationInitPhysicalAddr(relation);
2834 
2835  /*
2836  * Add this entry to list of stuff to rebuild in second pass.
2837  * pg_class goes to the front of rebuildFirstList while
2838  * pg_class_oid_index goes to the back of rebuildFirstList, so
2839  * they are done first and second respectively. Other nailed
2840  * relations go to the front of rebuildList, so they'll be done
2841  * next in no particular order; and everything else goes to the
2842  * back of rebuildList.
2843  */
2844  if (RelationGetRelid(relation) == RelationRelationId)
2845  rebuildFirstList = lcons(relation, rebuildFirstList);
2846  else if (RelationGetRelid(relation) == ClassOidIndexId)
2847  rebuildFirstList = lappend(rebuildFirstList, relation);
2848  else if (relation->rd_isnailed)
2849  rebuildList = lcons(relation, rebuildList);
2850  else
2851  rebuildList = lappend(rebuildList, relation);
2852  }
2853  }
2854 
2855  /*
2856  * Now zap any remaining smgr cache entries. This must happen before we
2857  * start to rebuild entries, since that may involve catalog fetches which
2858  * will re-open catalog files.
2859  */
2860  smgrcloseall();
2861 
2862  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2863  foreach(l, rebuildFirstList)
2864  {
2865  relation = (Relation) lfirst(l);
2866  RelationClearRelation(relation, true);
2867  }
2868  list_free(rebuildFirstList);
2869  foreach(l, rebuildList)
2870  {
2871  relation = (Relation) lfirst(l);
2872  RelationClearRelation(relation, true);
2873  }
2874  list_free(rebuildList);
2875 }
2876 
2877 /*
2878  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2879  *
2880  * Needed in some cases where we are changing a relation's physical mapping.
2881  * The link will be automatically reopened on next use.
2882  */
2883 void
2885 {
2886  Relation relation;
2887 
2888  RelationIdCacheLookup(relationId, relation);
2889 
2890  if (!PointerIsValid(relation))
2891  return; /* not in cache, nothing to do */
2892 
2893  RelationCloseSmgr(relation);
2894 }
2895 
2896 static void
2898 {
2899  if (EOXactTupleDescArray == NULL)
2900  {
2901  MemoryContext oldcxt;
2902 
2904 
2905  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2908  MemoryContextSwitchTo(oldcxt);
2909  }
2911  {
2912  int32 newlen = EOXactTupleDescArrayLen * 2;
2913 
2915 
2916  EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2917  newlen * sizeof(TupleDesc));
2918  EOXactTupleDescArrayLen = newlen;
2919  }
2920 
2921  EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2922 }
2923 
2924 /*
2925  * AtEOXact_RelationCache
2926  *
2927  * Clean up the relcache at main-transaction commit or abort.
2928  *
2929  * Note: this must be called *before* processing invalidation messages.
2930  * In the case of abort, we don't want to try to rebuild any invalidated
2931  * cache entries (since we can't safely do database accesses). Therefore
2932  * we must reset refcnts before handling pending invalidations.
2933  *
2934  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2935  * ResourceOwner mechanism. This routine just does a debugging
2936  * cross-check that no pins remain. However, we also need to do special
2937  * cleanup when the current transaction created any relations or made use
2938  * of forced index lists.
2939  */
2940 void
2942 {
2944  RelIdCacheEnt *idhentry;
2945  int i;
2946 
2947  /*
2948  * Unless the eoxact_list[] overflowed, we only need to examine the rels
2949  * listed in it. Otherwise fall back on a hash_seq_search scan.
2950  *
2951  * For simplicity, eoxact_list[] entries are not deleted till end of
2952  * top-level transaction, even though we could remove them at
2953  * subtransaction end in some cases, or remove relations from the list if
2954  * they are cleared for other reasons. Therefore we should expect the
2955  * case that list entries are not found in the hashtable; if not, there's
2956  * nothing to do for them.
2957  */
2959  {
2960  hash_seq_init(&status, RelationIdCache);
2961  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2962  {
2963  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2964  }
2965  }
2966  else
2967  {
2968  for (i = 0; i < eoxact_list_len; i++)
2969  {
2970  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2971  (void *) &eoxact_list[i],
2972  HASH_FIND,
2973  NULL);
2974  if (idhentry != NULL)
2975  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2976  }
2977  }
2978 
2979  if (EOXactTupleDescArrayLen > 0)
2980  {
2981  Assert(EOXactTupleDescArray != NULL);
2982  for (i = 0; i < NextEOXactTupleDescNum; i++)
2983  FreeTupleDesc(EOXactTupleDescArray[i]);
2984  pfree(EOXactTupleDescArray);
2985  EOXactTupleDescArray = NULL;
2986  }
2987 
2988  /* Now we're out of the transaction and can clear the lists */
2989  eoxact_list_len = 0;
2990  eoxact_list_overflowed = false;
2993 }
2994 
2995 /*
2996  * AtEOXact_cleanup
2997  *
2998  * Clean up a single rel at main-transaction commit or abort
2999  *
3000  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3001  * bother to prevent duplicate entries in eoxact_list[].
3002  */
3003 static void
3004 AtEOXact_cleanup(Relation relation, bool isCommit)
3005 {
3006  /*
3007  * The relcache entry's ref count should be back to its normal
3008  * not-in-a-transaction state: 0 unless it's nailed in cache.
3009  *
3010  * In bootstrap mode, this is NOT true, so don't check it --- the
3011  * bootstrap code expects relations to stay open across start/commit
3012  * transaction calls. (That seems bogus, but it's not worth fixing.)
3013  *
3014  * Note: ideally this check would be applied to every relcache entry, not
3015  * just those that have eoxact work to do. But it's not worth forcing a
3016  * scan of the whole relcache just for this. (Moreover, doing so would
3017  * mean that assert-enabled testing never tests the hash_search code path
3018  * above, which seems a bad idea.)
3019  */
3020 #ifdef USE_ASSERT_CHECKING
3022  {
3023  int expected_refcnt;
3024 
3025  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3026  Assert(relation->rd_refcnt == expected_refcnt);
3027  }
3028 #endif
3029 
3030  /*
3031  * Is it a relation created in the current transaction?
3032  *
3033  * During commit, reset the flag to zero, since we are now out of the
3034  * creating transaction. During abort, simply delete the relcache entry
3035  * --- it isn't interesting any longer. (NOTE: if we have forgotten the
3036  * new-ness of a new relation due to a forced cache flush, the entry will
3037  * get deleted anyway by shared-cache-inval processing of the aborted
3038  * pg_class insertion.)
3039  */
3040  if (relation->rd_createSubid != InvalidSubTransactionId)
3041  {
3042  if (isCommit)
3044  else if (RelationHasReferenceCountZero(relation))
3045  {
3046  RelationClearRelation(relation, false);
3047  return;
3048  }
3049  else
3050  {
3051  /*
3052  * Hmm, somewhere there's a (leaked?) reference to the relation.
3053  * We daren't remove the entry for fear of dereferencing a
3054  * dangling pointer later. Bleat, and mark it as not belonging to
3055  * the current transaction. Hopefully it'll get cleaned up
3056  * eventually. This must be just a WARNING to avoid
3057  * error-during-error-recovery loops.
3058  */
3060  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3061  RelationGetRelationName(relation));
3062  }
3063  }
3064 
3065  /*
3066  * Likewise, reset the hint about the relfilenode being new.
3067  */
3069 }
3070 
3071 /*
3072  * AtEOSubXact_RelationCache
3073  *
3074  * Clean up the relcache at sub-transaction commit or abort.
3075  *
3076  * Note: this must be called *before* processing invalidation messages.
3077  */
3078 void
3080  SubTransactionId parentSubid)
3081 {
3083  RelIdCacheEnt *idhentry;
3084  int i;
3085 
3086  /*
3087  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3088  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3089  * logic as in AtEOXact_RelationCache.
3090  */
3092  {
3093  hash_seq_init(&status, RelationIdCache);
3094  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3095  {
3096  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3097  mySubid, parentSubid);
3098  }
3099  }
3100  else
3101  {
3102  for (i = 0; i < eoxact_list_len; i++)
3103  {
3104  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3105  (void *) &eoxact_list[i],
3106  HASH_FIND,
3107  NULL);
3108  if (idhentry != NULL)
3109  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3110  mySubid, parentSubid);
3111  }
3112  }
3113 
3114  /* Don't reset the list; we still need more cleanup later */
3115 }
3116 
3117 /*
3118  * AtEOSubXact_cleanup
3119  *
3120  * Clean up a single rel at subtransaction commit or abort
3121  *
3122  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3123  * bother to prevent duplicate entries in eoxact_list[].
3124  */
3125 static void
3126 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3127  SubTransactionId mySubid, SubTransactionId parentSubid)
3128 {
3129  /*
3130  * Is it a relation created in the current subtransaction?
3131  *
3132  * During subcommit, mark it as belonging to the parent, instead. During
3133  * subabort, simply delete the relcache entry.
3134  */
3135  if (relation->rd_createSubid == mySubid)
3136  {
3137  if (isCommit)
3138  relation->rd_createSubid = parentSubid;
3139  else if (RelationHasReferenceCountZero(relation))
3140  {
3141  RelationClearRelation(relation, false);
3142  return;
3143  }
3144  else
3145  {
3146  /*
3147  * Hmm, somewhere there's a (leaked?) reference to the relation.
3148  * We daren't remove the entry for fear of dereferencing a
3149  * dangling pointer later. Bleat, and transfer it to the parent
3150  * subtransaction so we can try again later. This must be just a
3151  * WARNING to avoid error-during-error-recovery loops.
3152  */
3153  relation->rd_createSubid = parentSubid;
3154  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3155  RelationGetRelationName(relation));
3156  }
3157  }
3158 
3159  /*
3160  * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3161  */
3162  if (relation->rd_newRelfilenodeSubid == mySubid)
3163  {
3164  if (isCommit)
3165  relation->rd_newRelfilenodeSubid = parentSubid;
3166  else
3168  }
3169 }
3170 
3171 
3172 /*
3173  * RelationBuildLocalRelation
3174  * Build a relcache entry for an about-to-be-created relation,
3175  * and enter it into the relcache.
3176  */
3177 Relation
3179  Oid relnamespace,
3180  TupleDesc tupDesc,
3181  Oid relid,
3182  Oid accessmtd,
3183  Oid relfilenode,
3184  Oid reltablespace,
3185  bool shared_relation,
3186  bool mapped_relation,
3187  char relpersistence,
3188  char relkind)
3189 {
3190  Relation rel;
3191  MemoryContext oldcxt;
3192  int natts = tupDesc->natts;
3193  int i;
3194  bool has_not_null;
3195  bool nailit;
3196 
3197  AssertArg(natts >= 0);
3198 
3199  /*
3200  * check for creation of a rel that must be nailed in cache.
3201  *
3202  * XXX this list had better match the relations specially handled in
3203  * RelationCacheInitializePhase2/3.
3204  */
3205  switch (relid)
3206  {
3207  case DatabaseRelationId:
3208  case AuthIdRelationId:
3209  case AuthMemRelationId:
3210  case RelationRelationId:
3211  case AttributeRelationId:
3212  case ProcedureRelationId:
3213  case TypeRelationId:
3214  nailit = true;
3215  break;
3216  default:
3217  nailit = false;
3218  break;
3219  }
3220 
3221  /*
3222  * check that hardwired list of shared rels matches what's in the
3223  * bootstrap .bki file. If you get a failure here during initdb, you
3224  * probably need to fix IsSharedRelation() to match whatever you've done
3225  * to the set of shared relations.
3226  */
3227  if (shared_relation != IsSharedRelation(relid))
3228  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3229  relname, relid);
3230 
3231  /* Shared relations had better be mapped, too */
3232  Assert(mapped_relation || !shared_relation);
3233 
3234  /*
3235  * switch to the cache context to create the relcache entry.
3236  */
3237  if (!CacheMemoryContext)
3239 
3241 
3242  /*
3243  * allocate a new relation descriptor and fill in basic state fields.
3244  */
3245  rel = (Relation) palloc0(sizeof(RelationData));
3246 
3247  /* make sure relation is marked as having no open file yet */
3248  rel->rd_smgr = NULL;
3249 
3250  /* mark it nailed if appropriate */
3251  rel->rd_isnailed = nailit;
3252 
3253  rel->rd_refcnt = nailit ? 1 : 0;
3254 
3255  /* it's being created in this transaction */
3258 
3259  /*
3260  * create a new tuple descriptor from the one passed in. We do this
3261  * partly to copy it into the cache context, and partly because the new
3262  * relation can't have any defaults or constraints yet; they have to be
3263  * added in later steps, because they require additions to multiple system
3264  * catalogs. We can copy attnotnull constraints here, however.
3265  */
3266  rel->rd_att = CreateTupleDescCopy(tupDesc);
3267  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3268  has_not_null = false;
3269  for (i = 0; i < natts; i++)
3270  {
3271  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3272  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3273 
3274  datt->attidentity = satt->attidentity;
3275  datt->attgenerated = satt->attgenerated;
3276  datt->attnotnull = satt->attnotnull;
3277  has_not_null |= satt->attnotnull;
3278  }
3279 
3280  if (has_not_null)
3281  {
3282  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3283 
3284  constr->has_not_null = true;
3285  rel->rd_att->constr = constr;
3286  }
3287 
3288  /*
3289  * initialize relation tuple form (caller may add/override data later)
3290  */
3292 
3293  namestrcpy(&rel->rd_rel->relname, relname);
3294  rel->rd_rel->relnamespace = relnamespace;
3295 
3296  rel->rd_rel->relkind = relkind;
3297  rel->rd_rel->relnatts = natts;
3298  rel->rd_rel->reltype = InvalidOid;
3299  /* needed when bootstrapping: */
3300  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3301 
3302  /* set up persistence and relcache fields dependent on it */
3303  rel->rd_rel->relpersistence = relpersistence;
3304  switch (relpersistence)
3305  {
3306  case RELPERSISTENCE_UNLOGGED:
3307  case RELPERSISTENCE_PERMANENT:
3309  rel->rd_islocaltemp = false;
3310  break;
3311  case RELPERSISTENCE_TEMP:
3312  Assert(isTempOrTempToastNamespace(relnamespace));
3314  rel->rd_islocaltemp = true;
3315  break;
3316  default:
3317  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3318  break;
3319  }
3320 
3321  /* if it's a materialized view, it's not populated initially */
3322  if (relkind == RELKIND_MATVIEW)
3323  rel->rd_rel->relispopulated = false;
3324  else
3325  rel->rd_rel->relispopulated = true;
3326 
3327  /* set replica identity -- system catalogs and non-tables don't have one */
3328  if (!IsCatalogNamespace(relnamespace) &&
3329  (relkind == RELKIND_RELATION ||
3330  relkind == RELKIND_MATVIEW ||
3331  relkind == RELKIND_PARTITIONED_TABLE))
3332  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3333  else
3334  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3335 
3336  /*
3337  * Insert relation physical and logical identifiers (OIDs) into the right
3338  * places. For a mapped relation, we set relfilenode to zero and rely on
3339  * RelationInitPhysicalAddr to consult the map.
3340  */
3341  rel->rd_rel->relisshared = shared_relation;
3342 
3343  RelationGetRelid(rel) = relid;
3344 
3345  for (i = 0; i < natts; i++)
3346  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3347 
3348  rel->rd_rel->reltablespace = reltablespace;
3349 
3350  if (mapped_relation)
3351  {
3352  rel->rd_rel->relfilenode = InvalidOid;
3353  /* Add it to the active mapping information */
3354  RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3355  }
3356  else
3357  rel->rd_rel->relfilenode = relfilenode;
3358 
3359  RelationInitLockInfo(rel); /* see lmgr.c */
3360 
3362 
3363  rel->rd_rel->relam = accessmtd;
3364 
3365  if (relkind == RELKIND_RELATION ||
3366  relkind == RELKIND_SEQUENCE ||
3367  relkind == RELKIND_TOASTVALUE ||
3368  relkind == RELKIND_MATVIEW)
3370 
3371  /*
3372  * Okay to insert into the relcache hash table.
3373  *
3374  * Ordinarily, there should certainly not be an existing hash entry for
3375  * the same OID; but during bootstrap, when we create a "real" relcache
3376  * entry for one of the bootstrap relations, we'll be overwriting the
3377  * phony one created with formrdesc. So allow that to happen for nailed
3378  * rels.
3379  */
3380  RelationCacheInsert(rel, nailit);
3381 
3382  /*
3383  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3384  * can't do this before storing relid in it.
3385  */
3386  EOXactListAdd(rel);
3387 
3388  /*
3389  * done building relcache entry.
3390  */
3391  MemoryContextSwitchTo(oldcxt);
3392 
3393  /* It's fully valid */
3394  rel->rd_isvalid = true;
3395 
3396  /*
3397  * Caller expects us to pin the returned entry.
3398  */
3400 
3401  return rel;
3402 }
3403 
3404 
3405 /*
3406  * RelationSetNewRelfilenode
3407  *
3408  * Assign a new relfilenode (physical file name), and possibly a new
3409  * persistence setting, to the relation.
3410  *
3411  * This allows a full rewrite of the relation to be done with transactional
3412  * safety (since the filenode assignment can be rolled back). Note however
3413  * that there is no simple way to access the relation's old data for the
3414  * remainder of the current transaction. This limits the usefulness to cases
3415  * such as TRUNCATE or rebuilding an index from scratch.
3416  *
3417  * Caller must already hold exclusive lock on the relation.
3418  */
3419 void
3420 RelationSetNewRelfilenode(Relation relation, char persistence)
3421 {
3422  Oid newrelfilenode;
3423  Relation pg_class;
3424  HeapTuple tuple;
3425  Form_pg_class classform;
3426  MultiXactId minmulti = InvalidMultiXactId;
3427  TransactionId freezeXid = InvalidTransactionId;
3428  RelFileNode newrnode;
3429 
3430  /* Allocate a new relfilenode */
3431  newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3432  persistence);
3433 
3434  /*
3435  * Get a writable copy of the pg_class tuple for the given relation.
3436  */
3437  pg_class = table_open(RelationRelationId, RowExclusiveLock);
3438 
3439  tuple = SearchSysCacheCopy1(RELOID,
3440  ObjectIdGetDatum(RelationGetRelid(relation)));
3441  if (!HeapTupleIsValid(tuple))
3442  elog(ERROR, "could not find tuple for relation %u",
3443  RelationGetRelid(relation));
3444  classform = (Form_pg_class) GETSTRUCT(tuple);
3445 
3446  /*
3447  * Schedule unlinking of the old storage at transaction commit.
3448  */
3449  RelationDropStorage(relation);
3450 
3451  /*
3452  * Create storage for the main fork of the new relfilenode. If it's a
3453  * table-like object, call into the table AM to do so, which'll also
3454  * create the table's init fork if needed.
3455  *
3456  * NOTE: If relevant for the AM, any conflict in relfilenode value will be
3457  * caught here, if GetNewRelFileNode messes up for any reason.
3458  */
3459  newrnode = relation->rd_node;
3460  newrnode.relNode = newrelfilenode;
3461 
3462  switch (relation->rd_rel->relkind)
3463  {
3464  case RELKIND_INDEX:
3465  case RELKIND_SEQUENCE:
3466  {
3467  /* handle these directly, at least for now */
3468  SMgrRelation srel;
3469 
3470  srel = RelationCreateStorage(newrnode, persistence);
3471  smgrclose(srel);
3472  }
3473  break;
3474 
3475  case RELKIND_RELATION:
3476  case RELKIND_TOASTVALUE:
3477  case RELKIND_MATVIEW:
3478  table_relation_set_new_filenode(relation, &newrnode,
3479  persistence,
3480  &freezeXid, &minmulti);
3481  break;
3482 
3483  default:
3484  /* we shouldn't be called for anything else */
3485  elog(ERROR, "relation \"%s\" does not have storage",
3486  RelationGetRelationName(relation));
3487  break;
3488  }
3489 
3490  /*
3491  * If we're dealing with a mapped index, pg_class.relfilenode doesn't
3492  * change; instead we have to send the update to the relation mapper.
3493  *
3494  * For mapped indexes, we don't actually change the pg_class entry at all;
3495  * this is essential when reindexing pg_class itself. That leaves us with
3496  * possibly-inaccurate values of relpages etc, but those will be fixed up
3497  * later.
3498  */
3499  if (RelationIsMapped(relation))
3500  {
3501  /* This case is only supported for indexes */
3502  Assert(relation->rd_rel->relkind == RELKIND_INDEX);
3503 
3504  /* Since we're not updating pg_class, these had better not change */
3505  Assert(classform->relfrozenxid == freezeXid);
3506  Assert(classform->relminmxid == minmulti);
3507  Assert(classform->relpersistence == persistence);
3508 
3509  /*
3510  * In some code paths it's possible that the tuple update we'd
3511  * otherwise do here is the only thing that would assign an XID for
3512  * the current transaction. However, we must have an XID to delete
3513  * files, so make sure one is assigned.
3514  */
3515  (void) GetCurrentTransactionId();
3516 
3517  /* Do the deed */
3519  newrelfilenode,
3520  relation->rd_rel->relisshared,
3521  false);
3522 
3523  /* Since we're not updating pg_class, must trigger inval manually */
3524  CacheInvalidateRelcache(relation);
3525  }
3526  else
3527  {
3528  /* Normal case, update the pg_class entry */
3529  classform->relfilenode = newrelfilenode;
3530 
3531  /* relpages etc. never change for sequences */
3532  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3533  {
3534  classform->relpages = 0; /* it's empty until further notice */
3535  classform->reltuples = 0;
3536  classform->relallvisible = 0;
3537  }
3538  classform->relfrozenxid = freezeXid;
3539  classform->relminmxid = minmulti;
3540  classform->relpersistence = persistence;
3541 
3542  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3543  }
3544 
3545  heap_freetuple(tuple);
3546 
3547  table_close(pg_class, RowExclusiveLock);
3548 
3549  /*
3550  * Make the pg_class row change or relation map change visible. This will
3551  * cause the relcache entry to get updated, too.
3552  */
3554 
3555  /*
3556  * Mark the rel as having been given a new relfilenode in the current
3557  * (sub) transaction. This is a hint that can be used to optimize later
3558  * operations on the rel in the same transaction.
3559  */
3561 
3562  /* Flag relation as needing eoxact cleanup (to remove the hint) */
3563  EOXactListAdd(relation);
3564 }
3565 
3566 
3567 /*
3568  * RelationCacheInitialize
3569  *
3570  * This initializes the relation descriptor cache. At the time
3571  * that this is invoked, we can't do database access yet (mainly
3572  * because the transaction subsystem is not up); all we are doing
3573  * is making an empty cache hashtable. This must be done before
3574  * starting the initialization transaction, because otherwise
3575  * AtEOXact_RelationCache would crash if that transaction aborts
3576  * before we can get the relcache set up.
3577  */
3578 
3579 #define INITRELCACHESIZE 400
3580 
3581 void
3583 {
3584  HASHCTL ctl;
3585 
3586  /*
3587  * make sure cache memory context exists
3588  */
3589  if (!CacheMemoryContext)
3591 
3592  /*
3593  * create hashtable that indexes the relcache
3594  */
3595  MemSet(&ctl, 0, sizeof(ctl));
3596  ctl.keysize = sizeof(Oid);
3597  ctl.entrysize = sizeof(RelIdCacheEnt);
3598  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3599  &ctl, HASH_ELEM | HASH_BLOBS);
3600 
3601  /*
3602  * relation mapper needs to be initialized too
3603  */
3605 }
3606 
3607 /*
3608  * RelationCacheInitializePhase2
3609  *
3610  * This is called to prepare for access to shared catalogs during startup.
3611  * We must at least set up nailed reldescs for pg_database, pg_authid,
3612  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3613  * for their indexes, too. We attempt to load this information from the
3614  * shared relcache init file. If that's missing or broken, just make
3615  * phony entries for the catalogs themselves.
3616  * RelationCacheInitializePhase3 will clean up as needed.
3617  */
3618 void
3620 {
3621  MemoryContext oldcxt;
3622 
3623  /*
3624  * relation mapper needs initialized too
3625  */
3627 
3628  /*
3629  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3630  * nothing.
3631  */
3633  return;
3634 
3635  /*
3636  * switch to cache memory context
3637  */
3639 
3640  /*
3641  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3642  * the cache with pre-made descriptors for the critical shared catalogs.
3643  */
3644  if (!load_relcache_init_file(true))
3645  {
3646  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3647  Natts_pg_database, Desc_pg_database);
3648  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3649  Natts_pg_authid, Desc_pg_authid);
3650  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3651  Natts_pg_auth_members, Desc_pg_auth_members);
3652  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3653  Natts_pg_shseclabel, Desc_pg_shseclabel);
3654  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3655  Natts_pg_subscription, Desc_pg_subscription);
3656 
3657 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3658  }
3659 
3660  MemoryContextSwitchTo(oldcxt);
3661 }
3662 
3663 /*
3664  * RelationCacheInitializePhase3
3665  *
3666  * This is called as soon as the catcache and transaction system
3667  * are functional and we have determined MyDatabaseId. At this point
3668  * we can actually read data from the database's system catalogs.
3669  * We first try to read pre-computed relcache entries from the local
3670  * relcache init file. If that's missing or broken, make phony entries
3671  * for the minimum set of nailed-in-cache relations. Then (unless
3672  * bootstrapping) make sure we have entries for the critical system
3673  * indexes. Once we've done all this, we have enough infrastructure to
3674  * open any system catalog or use any catcache. The last step is to
3675  * rewrite the cache files if needed.
3676  */
3677 void
3679 {
3681  RelIdCacheEnt *idhentry;
3682  MemoryContext oldcxt;
3683  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3684 
3685  /*
3686  * relation mapper needs initialized too
3687  */
3689 
3690  /*
3691  * switch to cache memory context
3692  */
3694 
3695  /*
3696  * Try to load the local relcache cache file. If unsuccessful, bootstrap
3697  * the cache with pre-made descriptors for the critical "nailed-in" system
3698  * catalogs.
3699  */
3700  if (IsBootstrapProcessingMode() ||
3701  !load_relcache_init_file(false))
3702  {
3703  needNewCacheFile = true;
3704 
3705  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3706  Natts_pg_class, Desc_pg_class);
3707  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3708  Natts_pg_attribute, Desc_pg_attribute);
3709  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3710  Natts_pg_proc, Desc_pg_proc);
3711  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3712  Natts_pg_type, Desc_pg_type);
3713 
3714 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3715  }
3716 
3717  MemoryContextSwitchTo(oldcxt);
3718 
3719  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3721  return;
3722 
3723  /*
3724  * If we didn't get the critical system indexes loaded into relcache, do
3725  * so now. These are critical because the catcache and/or opclass cache
3726  * depend on them for fetches done during relcache load. Thus, we have an
3727  * infinite-recursion problem. We can break the recursion by doing
3728  * heapscans instead of indexscans at certain key spots. To avoid hobbling
3729  * performance, we only want to do that until we have the critical indexes
3730  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3731  * decide whether to do heapscan or indexscan at the key spots, and we set
3732  * it true after we've loaded the critical indexes.
3733  *
3734  * The critical indexes are marked as "nailed in cache", partly to make it
3735  * easy for load_relcache_init_file to count them, but mainly because we
3736  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3737  * true. (NOTE: perhaps it would be possible to reload them by
3738  * temporarily setting criticalRelcachesBuilt to false again. For now,
3739  * though, we just nail 'em in.)
3740  *
3741  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3742  * in the same way as the others, because the critical catalogs don't
3743  * (currently) have any rules or triggers, and so these indexes can be
3744  * rebuilt without inducing recursion. However they are used during
3745  * relcache load when a rel does have rules or triggers, so we choose to
3746  * nail them for performance reasons.
3747  */
3749  {
3751  RelationRelationId);
3753  AttributeRelationId);
3755  IndexRelationId);
3757  OperatorClassRelationId);
3759  AccessMethodProcedureRelationId);
3761  RewriteRelationId);
3763  TriggerRelationId);
3764 
3765 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3766 
3767  criticalRelcachesBuilt = true;
3768  }
3769 
3770  /*
3771  * Process critical shared indexes too.
3772  *
3773  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3774  * initial lookup of MyDatabaseId, without which we'll never find any
3775  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3776  * database OID, so it instead depends on DatabaseOidIndexId. We also
3777  * need to nail up some indexes on pg_authid and pg_auth_members for use
3778  * during client authentication. SharedSecLabelObjectIndexId isn't
3779  * critical for the core system, but authentication hooks might be
3780  * interested in it.
3781  */
3783  {
3785  DatabaseRelationId);
3787  DatabaseRelationId);
3789  AuthIdRelationId);
3791  AuthIdRelationId);
3793  AuthMemRelationId);
3795  SharedSecLabelRelationId);
3796 
3797 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3798 
3800  }
3801 
3802  /*
3803  * Now, scan all the relcache entries and update anything that might be
3804  * wrong in the results from formrdesc or the relcache cache file. If we
3805  * faked up relcache entries using formrdesc, then read the real pg_class
3806  * rows and replace the fake entries with them. Also, if any of the
3807  * relcache entries have rules, triggers, or security policies, load that
3808  * info the hard way since it isn't recorded in the cache file.
3809  *
3810  * Whenever we access the catalogs to read data, there is a possibility of
3811  * a shared-inval cache flush causing relcache entries to be removed.
3812  * Since hash_seq_search only guarantees to still work after the *current*
3813  * entry is removed, it's unsafe to continue the hashtable scan afterward.
3814  * We handle this by restarting the scan from scratch after each access.
3815  * This is theoretically O(N^2), but the number of entries that actually
3816  * need to be fixed is small enough that it doesn't matter.
3817  */
3818  hash_seq_init(&status, RelationIdCache);
3819 
3820  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3821  {
3822  Relation relation = idhentry->reldesc;
3823  bool restart = false;
3824 
3825  /*
3826  * Make sure *this* entry doesn't get flushed while we work with it.
3827  */
3829 
3830  /*
3831  * If it's a faked-up entry, read the real pg_class tuple.
3832  */
3833  if (relation->rd_rel->relowner == InvalidOid)
3834  {
3835  HeapTuple htup;
3836  Form_pg_class relp;
3837 
3838  htup = SearchSysCache1(RELOID,
3839  ObjectIdGetDatum(RelationGetRelid(relation)));
3840  if (!HeapTupleIsValid(htup))
3841  elog(FATAL, "cache lookup failed for relation %u",
3842  RelationGetRelid(relation));
3843  relp = (Form_pg_class) GETSTRUCT(htup);
3844 
3845  /*
3846  * Copy tuple to relation->rd_rel. (See notes in
3847  * AllocateRelationDesc())
3848  */
3849  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
3850 
3851  /* Update rd_options while we have the tuple */
3852  if (relation->rd_options)
3853  pfree(relation->rd_options);
3854  RelationParseRelOptions(relation, htup);
3855 
3856  /*
3857  * Check the values in rd_att were set up correctly. (We cannot
3858  * just copy them over now: formrdesc must have set up the rd_att
3859  * data correctly to start with, because it may already have been
3860  * copied into one or more catcache entries.)
3861  */
3862  Assert(relation->rd_att->tdtypeid == relp->reltype);
3863  Assert(relation->rd_att->tdtypmod == -1);
3864 
3865  ReleaseSysCache(htup);
3866 
3867  /* relowner had better be OK now, else we'll loop forever */
3868  if (relation->rd_rel->relowner == InvalidOid)
3869  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3870  RelationGetRelationName(relation));
3871 
3872  restart = true;
3873  }
3874 
3875  /*
3876  * Fix data that isn't saved in relcache cache file.
3877  *
3878  * relhasrules or relhastriggers could possibly be wrong or out of
3879  * date. If we don't actually find any rules or triggers, clear the
3880  * local copy of the flag so that we don't get into an infinite loop
3881  * here. We don't make any attempt to fix the pg_class entry, though.
3882  */
3883  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3884  {
3885  RelationBuildRuleLock(relation);
3886  if (relation->rd_rules == NULL)
3887  relation->rd_rel->relhasrules = false;
3888  restart = true;
3889  }
3890  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3891  {
3892  RelationBuildTriggers(relation);
3893  if (relation->trigdesc == NULL)
3894  relation->rd_rel->relhastriggers = false;
3895  restart = true;
3896  }
3897 
3898  /*
3899  * Re-load the row security policies if the relation has them, since
3900  * they are not preserved in the cache. Note that we can never NOT
3901  * have a policy while relrowsecurity is true,
3902  * RelationBuildRowSecurity will create a single default-deny policy
3903  * if there is no policy defined in pg_policy.
3904  */
3905  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3906  {
3907  RelationBuildRowSecurity(relation);
3908 
3909  Assert(relation->rd_rsdesc != NULL);
3910  restart = true;
3911  }
3912 
3913  /* Reload tableam data if needed */
3914  if (relation->rd_tableam == NULL &&
3915  (relation->rd_rel->relkind == RELKIND_RELATION ||
3916  relation->rd_rel->relkind == RELKIND_SEQUENCE ||
3917  relation->rd_rel->relkind == RELKIND_TOASTVALUE ||
3918  relation->rd_rel->relkind == RELKIND_MATVIEW))
3919  {
3921  Assert(relation->rd_tableam != NULL);
3922 
3923  restart = true;
3924  }
3925 
3926  /* Release hold on the relation */
3928 
3929  /* Now, restart the hashtable scan if needed */
3930  if (restart)
3931  {
3932  hash_seq_term(&status);
3933  hash_seq_init(&status, RelationIdCache);
3934  }
3935  }
3936 
3937  /*
3938  * Lastly, write out new relcache cache files if needed. We don't bother
3939  * to distinguish cases where only one of the two needs an update.
3940  */
3941  if (needNewCacheFile)
3942  {
3943  /*
3944  * Force all the catcaches to finish initializing and thereby open the
3945  * catalogs and indexes they use. This will preload the relcache with
3946  * entries for all the most important system catalogs and indexes, so
3947  * that the init files will be most useful for future backends.
3948  */
3950 
3951  /* now write the files */
3953  write_relcache_init_file(false);
3954  }
3955 }
3956 
3957 /*
3958  * Load one critical system index into the relcache
3959  *
3960  * indexoid is the OID of the target index, heapoid is the OID of the catalog
3961  * it belongs to.
3962  */
3963 static void
3964 load_critical_index(Oid indexoid, Oid heapoid)
3965 {
3966  Relation ird;
3967 
3968  /*
3969  * We must lock the underlying catalog before locking the index to avoid
3970  * deadlock, since RelationBuildDesc might well need to read the catalog,
3971  * and if anyone else is exclusive-locking this catalog and index they'll
3972  * be doing it in that order.
3973  */
3974  LockRelationOid(heapoid, AccessShareLock);
3975  LockRelationOid(indexoid, AccessShareLock);
3976  ird = RelationBuildDesc(indexoid, true);
3977  if (ird == NULL)
3978  elog(PANIC, "could not open critical system index %u", indexoid);
3979  ird->rd_isnailed = true;
3980  ird->rd_refcnt = 1;
3983 }
3984 
3985 /*
3986  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
3987  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
3988  *
3989  * We need this kluge because we have to be able to access non-fixed-width
3990  * fields of pg_class and pg_index before we have the standard catalog caches
3991  * available. We use predefined data that's set up in just the same way as
3992  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
3993  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
3994  * does it have a TupleConstr field. But it's good enough for the purpose of
3995  * extracting fields.
3996  */
3997 static TupleDesc
3999 {
4000  TupleDesc result;
4001  MemoryContext oldcxt;
4002  int i;
4003 
4005 
4006  result = CreateTemplateTupleDesc(natts);
4007  result->tdtypeid = RECORDOID; /* not right, but we don't care */
4008  result->tdtypmod = -1;
4009 
4010  for (i = 0; i < natts; i++)
4011  {
4012  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
4013  /* make sure attcacheoff is valid */
4014  TupleDescAttr(result, i)->attcacheoff = -1;
4015  }
4016 
4017  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
4018  TupleDescAttr(result, 0)->attcacheoff = 0;
4019 
4020  /* Note: we don't bother to set up a TupleConstr entry */
4021 
4022  MemoryContextSwitchTo(oldcxt);
4023 
4024  return result;
4025 }
4026 
4027 static TupleDesc
4029 {
4030  static TupleDesc pgclassdesc = NULL;
4031 
4032  /* Already done? */
4033  if (pgclassdesc == NULL)
4034  pgclassdesc = BuildHardcodedDescriptor(Natts_pg_class,
4035  Desc_pg_class);
4036 
4037  return pgclassdesc;
4038 }
4039 
4040 static TupleDesc
4042 {
4043  static TupleDesc pgindexdesc = NULL;
4044 
4045  /* Already done? */
4046  if (pgindexdesc == NULL)
4047  pgindexdesc = BuildHardcodedDescriptor(Natts_pg_index,
4048  Desc_pg_index);
4049 
4050  return pgindexdesc;
4051 }
4052 
4053 /*
4054  * Load any default attribute value definitions for the relation.
4055  */
4056 static void
4058 {
4059  AttrDefault *attrdef = relation->rd_att->constr->defval;
4060  int ndef = relation->rd_att->constr->num_defval;
4061  Relation adrel;
4062  SysScanDesc adscan;
4063  ScanKeyData skey;
4064  HeapTuple htup;
4065  Datum val;
4066  bool isnull;
4067  int found;
4068  int i;
4069 
4070  ScanKeyInit(&skey,
4071  Anum_pg_attrdef_adrelid,
4072  BTEqualStrategyNumber, F_OIDEQ,
4073  ObjectIdGetDatum(RelationGetRelid(relation)));
4074 
4075  adrel = table_open(AttrDefaultRelationId, AccessShareLock);
4076  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4077  NULL, 1, &skey);
4078  found = 0;
4079 
4080  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4081  {
4082  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4083  Form_pg_attribute attr = TupleDescAttr(relation->rd_att, adform->adnum - 1);
4084 
4085  for (i = 0; i < ndef; i++)
4086  {
4087  if (adform->adnum != attrdef[i].adnum)
4088  continue;
4089  if (attrdef[i].adbin != NULL)
4090  elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4091  NameStr(attr->attname),
4092  RelationGetRelationName(relation));
4093  else
4094  found++;
4095 
4096  val = fastgetattr(htup,
4097  Anum_pg_attrdef_adbin,
4098  adrel->rd_att, &isnull);
4099  if (isnull)
4100  elog(WARNING, "null adbin for attr %s of rel %s",
4101  NameStr(attr->attname),
4102  RelationGetRelationName(relation));
4103  else
4104  {
4105  /* detoast and convert to cstring in caller's context */
4106  char *s = TextDatumGetCString(val);
4107 
4109  pfree(s);
4110  }
4111  break;
4112  }
4113 
4114  if (i >= ndef)
4115  elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4116  adform->adnum, RelationGetRelationName(relation));
4117  }
4118 
4119  systable_endscan(adscan);
4120  table_close(adrel, AccessShareLock);
4121 }
4122 
4123 /*
4124  * Load any check constraints for the relation.
4125  */
4126 static void
4128 {
4129  ConstrCheck *check = relation->rd_att->constr->check;
4130  int ncheck = relation->rd_att->constr->num_check;
4131  Relation conrel;
4132  SysScanDesc conscan;
4133  ScanKeyData skey[1];
4134  HeapTuple htup;
4135  int found = 0;
4136 
4137  ScanKeyInit(&skey[0],
4138  Anum_pg_constraint_conrelid,
4139  BTEqualStrategyNumber, F_OIDEQ,
4140  ObjectIdGetDatum(RelationGetRelid(relation)));
4141 
4142  conrel = table_open(ConstraintRelationId, AccessShareLock);
4143  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4144  NULL, 1, skey);
4145 
4146  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4147  {
4149  Datum val;
4150  bool isnull;
4151  char *s;
4152 
4153  /* We want check constraints only */
4154  if (conform->contype != CONSTRAINT_CHECK)
4155  continue;
4156 
4157  if (found >= ncheck)
4158  elog(ERROR, "unexpected constraint record found for rel %s",
4159  RelationGetRelationName(relation));
4160 
4161  check[found].ccvalid = conform->convalidated;
4162  check[found].ccnoinherit = conform->connoinherit;
4164  NameStr(conform->conname));
4165 
4166  /* Grab and test conbin is actually set */
4167  val = fastgetattr(htup,
4168  Anum_pg_constraint_conbin,
4169  conrel->rd_att, &isnull);
4170  if (isnull)
4171  elog(ERROR, "null conbin for rel %s",
4172  RelationGetRelationName(relation));
4173 
4174  /* detoast and convert to cstring in caller's context */
4175  s = TextDatumGetCString(val);
4176  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4177  pfree(s);
4178 
4179  found++;
4180  }
4181 
4182  systable_endscan(conscan);
4183  table_close(conrel, AccessShareLock);
4184 
4185  if (found != ncheck)
4186  elog(ERROR, "%d constraint record(s) missing for rel %s",
4187  ncheck - found, RelationGetRelationName(relation));
4188 
4189  /* Sort the records so that CHECKs are applied in a deterministic order */
4190  if (ncheck > 1)
4191  qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4192 }
4193 
4194 /*
4195  * qsort comparator to sort ConstrCheck entries by name
4196  */
4197 static int
4198 CheckConstraintCmp(const void *a, const void *b)
4199 {
4200  const ConstrCheck *ca = (const ConstrCheck *) a;
4201  const ConstrCheck *cb = (const ConstrCheck *) b;
4202 
4203  return strcmp(ca->ccname, cb->ccname);
4204 }
4205 
4206 /*
4207  * RelationGetFKeyList -- get a list of foreign key info for the relation
4208  *
4209  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4210  * the given relation. This data is a direct copy of relevant fields from
4211  * pg_constraint. The list items are in no particular order.
4212  *
4213  * CAUTION: the returned list is part of the relcache's data, and could
4214  * vanish in a relcache entry reset. Callers must inspect or copy it
4215  * before doing anything that might trigger a cache flush, such as
4216  * system catalog accesses. copyObject() can be used if desired.
4217  * (We define it this way because current callers want to filter and
4218  * modify the list entries anyway, so copying would be a waste of time.)
4219  */
4220 List *
4222 {
4223  List *result;
4224  Relation conrel;
4225  SysScanDesc conscan;
4226  ScanKeyData skey;
4227  HeapTuple htup;
4228  List *oldlist;
4229  MemoryContext oldcxt;
4230 
4231  /* Quick exit if we already computed the list. */
4232  if (relation->rd_fkeyvalid)
4233  return relation->rd_fkeylist;
4234 
4235  /* Fast path: non-partitioned tables without triggers can't have FKs */
4236  if (!relation->rd_rel->relhastriggers &&
4237  relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
4238  return NIL;
4239 
4240  /*
4241  * We build the list we intend to return (in the caller's context) while
4242  * doing the scan. After successfully completing the scan, we copy that
4243  * list into the relcache entry. This avoids cache-context memory leakage
4244  * if we get some sort of error partway through.
4245  */
4246  result = NIL;
4247 
4248  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4249  ScanKeyInit(&skey,
4250  Anum_pg_constraint_conrelid,
4251  BTEqualStrategyNumber, F_OIDEQ,
4252  ObjectIdGetDatum(RelationGetRelid(relation)));
4253 
4254  conrel = table_open(ConstraintRelationId, AccessShareLock);
4255  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
4256  NULL, 1, &skey);
4257 
4258  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4259  {
4260  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4261  ForeignKeyCacheInfo *info;
4262 
4263  /* consider only foreign keys */
4264  if (constraint->contype != CONSTRAINT_FOREIGN)
4265  continue;
4266 
4267  info = makeNode(ForeignKeyCacheInfo);
4268  info->conoid = constraint->oid;
4269  info->conrelid = constraint->conrelid;
4270  info->confrelid = constraint->confrelid;
4271 
4272  DeconstructFkConstraintRow(htup, &info->nkeys,
4273  info->conkey,
4274  info->confkey,
4275  info->conpfeqop,
4276  NULL, NULL);
4277 
4278  /* Add FK's node to the result list */
4279  result = lappend(result, info);
4280  }
4281 
4282  systable_endscan(conscan);
4283  table_close(conrel, AccessShareLock);
4284 
4285  /* Now save a copy of the completed list in the relcache entry. */
4287  oldlist = relation->rd_fkeylist;
4288  relation->rd_fkeylist = copyObject(result);
4289  relation->rd_fkeyvalid = true;
4290  MemoryContextSwitchTo(oldcxt);
4291 
4292  /* Don't leak the old list, if there is one */
4293  list_free_deep(oldlist);
4294 
4295  return result;
4296 }
4297 
4298 /*
4299  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4300  *
4301  * The index list is created only if someone requests it. We scan pg_index
4302  * to find relevant indexes, and add the list to the relcache entry so that
4303  * we won't have to compute it again. Note that shared cache inval of a
4304  * relcache entry will delete the old list and set rd_indexvalid to false,
4305  * so that we must recompute the index list on next request. This handles
4306  * creation or deletion of an index.
4307  *
4308  * Indexes that are marked not indislive are omitted from the returned list.
4309  * Such indexes are expected to be dropped momentarily, and should not be
4310  * touched at all by any caller of this function.
4311  *
4312  * The returned list is guaranteed to be sorted in order by OID. This is
4313  * needed by the executor, since for index types that we obtain exclusive
4314  * locks on when updating the index, all backends must lock the indexes in
4315  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4316  * consistent ordering would do, but ordering by OID is easy.
4317  *
4318  * Since shared cache inval causes the relcache's copy of the list to go away,
4319  * we return a copy of the list palloc'd in the caller's context. The caller
4320  * may list_free() the returned list after scanning it. This is necessary
4321  * since the caller will typically be doing syscache lookups on the relevant
4322  * indexes, and syscache lookup could cause SI messages to be processed!
4323  *
4324  * In exactly the same way, we update rd_pkindex, which is the OID of the
4325  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4326  * which is the pg_class OID of an index to be used as the relation's
4327  * replication identity index, or InvalidOid if there is no such index.
4328  */
4329 List *
4331 {
4332  Relation indrel;
4333  SysScanDesc indscan;
4334  ScanKeyData skey;
4335  HeapTuple htup;
4336  List *result;
4337  List *oldlist;
4338  char replident = relation->rd_rel->relreplident;
4339  Oid pkeyIndex = InvalidOid;
4340  Oid candidateIndex = InvalidOid;
4341  MemoryContext oldcxt;
4342 
4343  /* Quick exit if we already computed the list. */
4344  if (relation->rd_indexvalid)
4345  return list_copy(relation->rd_indexlist);
4346 
4347  /*
4348  * We build the list we intend to return (in the caller's context) while
4349  * doing the scan. After successfully completing the scan, we copy that
4350  * list into the relcache entry. This avoids cache-context memory leakage
4351  * if we get some sort of error partway through.
4352  */
4353  result = NIL;
4354 
4355  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4356  ScanKeyInit(&skey,
4357  Anum_pg_index_indrelid,
4358  BTEqualStrategyNumber, F_OIDEQ,
4359  ObjectIdGetDatum(RelationGetRelid(relation)));
4360 
4361  indrel = table_open(IndexRelationId, AccessShareLock);
4362  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4363  NULL, 1, &skey);
4364 
4365  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4366  {
4368 
4369  /*
4370  * Ignore any indexes that are currently being dropped. This will
4371  * prevent them from being searched, inserted into, or considered in
4372  * HOT-safety decisions. It's unsafe to touch such an index at all
4373  * since its catalog entries could disappear at any instant.
4374  */
4375  if (!index->indislive)
4376  continue;
4377 
4378  /* add index's OID to result list */
4379  result = lappend_oid(result, index->indexrelid);
4380 
4381  /*
4382  * Invalid, non-unique, non-immediate or predicate indexes aren't
4383  * interesting for either oid indexes or replication identity indexes,
4384  * so don't check them.
4385  */
4386  if (!index->indisvalid || !index->indisunique ||
4387  !index->indimmediate ||
4388  !heap_attisnull(htup, Anum_pg_index_indpred, NULL))
4389  continue;
4390 
4391  /* remember primary key index if any */
4392  if (index->indisprimary)
4393  pkeyIndex = index->indexrelid;
4394 
4395  /* remember explicitly chosen replica index */
4396  if (index->indisreplident)
4397  candidateIndex = index->indexrelid;
4398  }
4399 
4400  systable_endscan(indscan);
4401 
4402  table_close(indrel, AccessShareLock);
4403 
4404  /* Sort the result list into OID order, per API spec. */
4405  list_sort(result, list_oid_cmp);
4406 
4407  /* Now save a copy of the completed list in the relcache entry. */
4409  oldlist = relation->rd_indexlist;
4410  relation->rd_indexlist = list_copy(result);
4411  relation->rd_pkindex = pkeyIndex;
4412  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4413  relation->rd_replidindex = pkeyIndex;
4414  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4415  relation->rd_replidindex = candidateIndex;
4416  else
4417  relation->rd_replidindex = InvalidOid;
4418  relation->rd_indexvalid = true;
4419  MemoryContextSwitchTo(oldcxt);
4420 
4421  /* Don't leak the old list, if there is one */
4422  list_free(oldlist);
4423 
4424  return result;
4425 }
4426 
4427 /*
4428  * RelationGetStatExtList
4429  * get a list of OIDs of statistics objects on this relation
4430  *
4431  * The statistics list is created only if someone requests it, in a way
4432  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4433  * relevant statistics, and add the list to the relcache entry so that we
4434  * won't have to compute it again. Note that shared cache inval of a
4435  * relcache entry will delete the old list and set rd_statvalid to 0,
4436  * so that we must recompute the statistics list on next request. This
4437  * handles creation or deletion of a statistics object.
4438  *
4439  * The returned list is guaranteed to be sorted in order by OID, although
4440  * this is not currently needed.
4441  *
4442  * Since shared cache inval causes the relcache's copy of the list to go away,
4443  * we return a copy of the list palloc'd in the caller's context. The caller
4444  * may list_free() the returned list after scanning it. This is necessary
4445  * since the caller will typically be doing syscache lookups on the relevant
4446  * statistics, and syscache lookup could cause SI messages to be processed!
4447  */
4448 List *
4450 {
4451  Relation indrel;
4452  SysScanDesc indscan;
4453  ScanKeyData skey;
4454  HeapTuple htup;
4455  List *result;
4456  List *oldlist;
4457  MemoryContext oldcxt;
4458 
4459  /* Quick exit if we already computed the list. */
4460  if (relation->rd_statvalid != 0)
4461  return list_copy(relation->rd_statlist);
4462 
4463  /*
4464  * We build the list we intend to return (in the caller's context) while
4465  * doing the scan. After successfully completing the scan, we copy that
4466  * list into the relcache entry. This avoids cache-context memory leakage
4467  * if we get some sort of error partway through.
4468  */
4469  result = NIL;
4470 
4471  /*
4472  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4473  * rel.
4474  */
4475  ScanKeyInit(&skey,
4476  Anum_pg_statistic_ext_stxrelid,
4477  BTEqualStrategyNumber, F_OIDEQ,
4478  ObjectIdGetDatum(RelationGetRelid(relation)));
4479 
4480  indrel = table_open(StatisticExtRelationId, AccessShareLock);
4481  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4482  NULL, 1, &skey);
4483 
4484  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4485  {
4486  Oid oid = ((Form_pg_statistic_ext) GETSTRUCT(htup))->oid;
4487 
4488  result = lappend_oid(result, oid);
4489  }
4490 
4491  systable_endscan(indscan);
4492 
4493  table_close(indrel, AccessShareLock);
4494 
4495  /* Sort the result list into OID order, per API spec. */
4496  list_sort(result, list_oid_cmp);
4497 
4498  /* Now save a copy of the completed list in the relcache entry. */
4500  oldlist = relation->rd_statlist;
4501  relation->rd_statlist = list_copy(result);
4502 
4503  relation->rd_statvalid = true;
4504  MemoryContextSwitchTo(oldcxt);
4505 
4506  /* Don't leak the old list, if there is one */
4507  list_free(oldlist);
4508 
4509  return result;
4510 }
4511 
4512 /*
4513  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4514  *
4515  * Returns InvalidOid if there is no such index.
4516  */
4517 Oid
4519 {
4520  List *ilist;
4521 
4522  if (!relation->rd_indexvalid)
4523  {
4524  /* RelationGetIndexList does the heavy lifting. */
4525  ilist = RelationGetIndexList(relation);
4526  list_free(ilist);
4527  Assert(relation->rd_indexvalid);
4528  }
4529 
4530  return relation->rd_pkindex;
4531 }
4532 
4533 /*
4534  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4535  *
4536  * Returns InvalidOid if there is no such index.
4537  */
4538 Oid
4540 {
4541  List *ilist;
4542 
4543  if (!relation->rd_indexvalid)
4544  {
4545  /* RelationGetIndexList does the heavy lifting. */
4546  ilist = RelationGetIndexList(relation);
4547  list_free(ilist);
4548  Assert(relation->rd_indexvalid);
4549  }
4550 
4551  return relation->rd_replidindex;
4552 }
4553 
4554 /*
4555  * RelationGetIndexExpressions -- get the index expressions for an index
4556  *
4557  * We cache the result of transforming pg_index.indexprs into a node tree.
4558  * If the rel is not an index or has no expressional columns, we return NIL.
4559  * Otherwise, the returned tree is copied into the caller's memory context.
4560  * (We don't want to return a pointer to the relcache copy, since it could
4561  * disappear due to relcache invalidation.)
4562  */
4563 List *
4565 {
4566  List *result;
4567  Datum exprsDatum;
4568  bool isnull;
4569  char *exprsString;
4570  MemoryContext oldcxt;
4571 
4572  /* Quick exit if we already computed the result. */
4573  if (relation->rd_indexprs)
4574  return copyObject(relation->rd_indexprs);
4575 
4576  /* Quick exit if there is nothing to do. */
4577  if (relation->rd_indextuple == NULL ||
4578  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4579  return NIL;
4580 
4581  /*
4582  * We build the tree we intend to return in the caller's context. After
4583  * successfully completing the work, we copy it into the relcache entry.
4584  * This avoids problems if we get some sort of error partway through.
4585  */
4586  exprsDatum = heap_getattr(relation->rd_indextuple,
4587  Anum_pg_index_indexprs,
4589  &isnull);
4590  Assert(!isnull);
4591  exprsString = TextDatumGetCString(exprsDatum);
4592  result = (List *) stringToNode(exprsString);
4593  pfree(exprsString);
4594 
4595  /*
4596  * Run the expressions through eval_const_expressions. This is not just an
4597  * optimization, but is necessary, because the planner will be comparing
4598  * them to similarly-processed qual clauses, and may fail to detect valid
4599  * matches without this. We must not use canonicalize_qual, however,
4600  * since these aren't qual expressions.
4601  */
4602  result = (List *) eval_const_expressions(NULL, (Node *) result);
4603 
4604  /* May as well fix opfuncids too */
4605  fix_opfuncids((Node *) result);
4606 
4607  /* Now save a copy of the completed tree in the relcache entry. */
4608  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4609  relation->rd_indexprs = copyObject(result);
4610  MemoryContextSwitchTo(oldcxt);
4611 
4612  return result;
4613 }
4614 
4615 /*
4616  * RelationGetDummyIndexExpressions -- get dummy expressions for an index
4617  *
4618  * Return a list of dummy expressions (just Const nodes) with the same
4619  * types/typmods/collations as the index's real expressions. This is
4620  * useful in situations where we don't want to run any user-defined code.
4621  */
4622 List *
4624 {
4625  List *result;
4626  Datum exprsDatum;
4627  bool isnull;
4628  char *exprsString;
4629  List *rawExprs;
4630  ListCell *lc;
4631 
4632  /* Quick exit if there is nothing to do. */
4633  if (relation->rd_indextuple == NULL ||
4634  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indexprs, NULL))
4635  return NIL;
4636 
4637  /* Extract raw node tree(s) from index tuple. */
4638  exprsDatum = heap_getattr(relation->rd_indextuple,
4639  Anum_pg_index_indexprs,
4641  &isnull);
4642  Assert(!isnull);
4643  exprsString = TextDatumGetCString(exprsDatum);
4644  rawExprs = (List *) stringToNode(exprsString);
4645  pfree(exprsString);
4646 
4647  /* Construct null Consts; the typlen and typbyval are arbitrary. */
4648  result = NIL;
4649  foreach(lc, rawExprs)
4650  {
4651  Node *rawExpr = (Node *) lfirst(lc);
4652 
4653  result = lappend(result,
4654  makeConst(exprType(rawExpr),
4655  exprTypmod(rawExpr),
4656  exprCollation(rawExpr),
4657  1,
4658  (Datum) 0,
4659  true,
4660  true));
4661  }
4662 
4663  return result;
4664 }
4665 
4666 /*
4667  * RelationGetIndexPredicate -- get the index predicate for an index
4668  *
4669  * We cache the result of transforming pg_index.indpred into an implicit-AND
4670  * node tree (suitable for use in planning).
4671  * If the rel is not an index or has no predicate, we return NIL.
4672  * Otherwise, the returned tree is copied into the caller's memory context.
4673  * (We don't want to return a pointer to the relcache copy, since it could
4674  * disappear due to relcache invalidation.)
4675  */
4676 List *
4678 {
4679  List *result;
4680  Datum predDatum;
4681  bool isnull;
4682  char *predString;
4683  MemoryContext oldcxt;
4684 
4685  /* Quick exit if we already computed the result. */
4686  if (relation->rd_indpred)
4687  return copyObject(relation->rd_indpred);
4688 
4689  /* Quick exit if there is nothing to do. */
4690  if (relation->rd_indextuple == NULL ||
4691  heap_attisnull(relation->rd_indextuple, Anum_pg_index_indpred, NULL))
4692  return NIL;
4693 
4694  /*
4695  * We build the tree we intend to return in the caller's context. After
4696  * successfully completing the work, we copy it into the relcache entry.
4697  * This avoids problems if we get some sort of error partway through.
4698  */
4699  predDatum = heap_getattr(relation->rd_indextuple,
4700  Anum_pg_index_indpred,
4702  &isnull);
4703  Assert(!isnull);
4704  predString = TextDatumGetCString(predDatum);
4705  result = (List *) stringToNode(predString);
4706  pfree(predString);
4707 
4708  /*
4709  * Run the expression through const-simplification and canonicalization.
4710  * This is not just an optimization, but is necessary, because the planner
4711  * will be comparing it to similarly-processed qual clauses, and may fail
4712  * to detect valid matches without this. This must match the processing
4713  * done to qual clauses in preprocess_expression()! (We can skip the
4714  * stuff involving subqueries, however, since we don't allow any in index
4715  * predicates.)
4716  */
4717  result = (List *) eval_const_expressions(NULL, (Node *) result);
4718 
4719  result = (List *) canonicalize_qual((Expr *) result, false);
4720 
4721  /* Also convert to implicit-AND format */
4722  result = make_ands_implicit((Expr *) result);
4723 
4724  /* May as well fix opfuncids too */
4725  fix_opfuncids((Node *) result);
4726 
4727  /* Now save a copy of the completed tree in the relcache entry. */
4728  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4729  relation->rd_indpred = copyObject(result);
4730  MemoryContextSwitchTo(oldcxt);
4731 
4732  return result;
4733 }
4734 
4735 /*
4736  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4737  *
4738  * The result has a bit set for each attribute used anywhere in the index
4739  * definitions of all the indexes on this relation. (This includes not only
4740  * simple index keys, but attributes used in expressions and partial-index
4741  * predicates.)
4742  *
4743  * Depending on attrKind, a bitmap covering the attnums for all index columns,
4744  * for all potential foreign key columns, or for all columns in the configured
4745  * replica identity index is returned.
4746  *
4747  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4748  * we can include system attributes (e.g., OID) in the bitmap representation.
4749  *
4750  * Caller had better hold at least RowExclusiveLock on the target relation
4751  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4752  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4753  * that lock level doesn't guarantee a stable set of indexes, so we have to
4754  * be prepared to retry here in case of a change in the set of indexes.
4755  *
4756  * The returned result is palloc'd in the caller's memory context and should
4757  * be bms_free'd when not needed anymore.
4758  */
4759 Bitmapset *
4761 {
4762  Bitmapset *indexattrs; /* indexed columns */
4763  Bitmapset *uindexattrs; /* columns in unique indexes */
4764  Bitmapset *pkindexattrs; /* columns in the primary index */
4765  Bitmapset *idindexattrs; /* columns in the replica identity */
4766  List *indexoidlist;
4767  List *newindexoidlist;
4768  Oid relpkindex;
4769  Oid relreplindex;
4770  ListCell *l;
4771  MemoryContext oldcxt;
4772 
4773  /* Quick exit if we already computed the result. */
4774  if (relation->rd_indexattr != NULL)
4775  {
4776  switch (attrKind)
4777  {
4778  case INDEX_ATTR_BITMAP_ALL:
4779  return bms_copy(relation->rd_indexattr);
4780  case INDEX_ATTR_BITMAP_KEY:
4781  return bms_copy(relation->rd_keyattr);
4783  return bms_copy(relation->rd_pkattr);
4785  return bms_copy(relation->rd_idattr);
4786  default:
4787  elog(ERROR, "unknown attrKind %u", attrKind);
4788  }
4789  }
4790 
4791  /* Fast path if definitely no indexes */
4792  if (!RelationGetForm(relation)->relhasindex)
4793  return NULL;
4794 
4795  /*
4796  * Get cached list of index OIDs. If we have to start over, we do so here.
4797  */
4798 restart:
4799  indexoidlist = RelationGetIndexList(relation);
4800 
4801  /* Fall out if no indexes (but relhasindex was set) */
4802  if (indexoidlist == NIL)
4803  return NULL;
4804 
4805  /*
4806  * Copy the rd_pkindex and rd_replidindex values computed by
4807  * RelationGetIndexList before proceeding. This is needed because a
4808  * relcache flush could occur inside index_open below, resetting the
4809  * fields managed by RelationGetIndexList. We need to do the work with
4810  * stable values of these fields.
4811  */
4812  relpkindex = relation->rd_pkindex;
4813  relreplindex = relation->rd_replidindex;
4814 
4815  /*
4816  * For each index, add referenced attributes to indexattrs.
4817  *
4818  * Note: we consider all indexes returned by RelationGetIndexList, even if
4819  * they are not indisready or indisvalid. This is important because an
4820  * index for which CREATE INDEX CONCURRENTLY has just started must be
4821  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4822  * CONCURRENTLY is far enough along that we should ignore the index, it
4823  * won't be returned at all by RelationGetIndexList.
4824  */
4825  indexattrs = NULL;
4826  uindexattrs = NULL;
4827  pkindexattrs = NULL;
4828  idindexattrs = NULL;
4829  foreach(l, indexoidlist)
4830  {
4831  Oid indexOid = lfirst_oid(l);
4832  Relation indexDesc;
4833  Datum datum;
4834  bool isnull;
4835  Node *indexExpressions;
4836  Node *indexPredicate;
4837  int i;
4838  bool isKey; /* candidate key */
4839  bool isPK; /* primary key */
4840  bool isIDKey; /* replica identity index */
4841 
4842  indexDesc = index_open(indexOid, AccessShareLock);
4843 
4844  /*
4845  * Extract index expressions and index predicate. Note: Don't use
4846  * RelationGetIndexExpressions()/RelationGetIndexPredicate(), because
4847  * those might run constant expressions evaluation, which needs a
4848  * snapshot, which we might not have here. (Also, it's probably more
4849  * sound to collect the bitmaps before any transformations that might
4850  * eliminate columns, but the practical impact of this is limited.)
4851  */
4852 
4853  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indexprs,
4854  GetPgIndexDescriptor(), &isnull);
4855  if (!isnull)
4856  indexExpressions = stringToNode(TextDatumGetCString(datum));
4857  else
4858  indexExpressions = NULL;
4859 
4860  datum = heap_getattr(indexDesc->rd_indextuple, Anum_pg_index_indpred,
4861  GetPgIndexDescriptor(), &isnull);
4862  if (!isnull)
4863  indexPredicate = stringToNode(TextDatumGetCString(datum));
4864  else
4865  indexPredicate = NULL;
4866 
4867  /* Can this index be referenced by a foreign key? */
4868  isKey = indexDesc->rd_index->indisunique &&
4869  indexExpressions == NULL &&
4870  indexPredicate == NULL;
4871 
4872  /* Is this a primary key? */
4873  isPK = (indexOid == relpkindex);
4874 
4875  /* Is this index the configured (or default) replica identity? */
4876  isIDKey = (indexOid == relreplindex);
4877 
4878  /* Collect simple attribute references */
4879  for (i = 0; i < indexDesc->rd_index->indnatts; i++)
4880  {
4881  int attrnum = indexDesc->rd_index->indkey.values[i];
4882 
4883  /*
4884  * Since we have covering indexes with non-key columns, we must
4885  * handle them accurately here. non-key columns must be added into
4886  * indexattrs, since they are in index, and HOT-update shouldn't
4887  * miss them. Obviously, non-key columns couldn't be referenced by
4888  * foreign key or identity key. Hence we do not include them into
4889  * uindexattrs, pkindexattrs and idindexattrs bitmaps.
4890  */
4891  if (attrnum != 0)
4892  {
4893  indexattrs = bms_add_member(indexattrs,
4895 
4896  if (isKey && i < indexDesc->rd_index->indnkeyatts)
4897  uindexattrs = bms_add_member(uindexattrs,
4899 
4900  if (isPK && i < indexDesc->rd_index->indnkeyatts)
4901  pkindexattrs = bms_add_member(pkindexattrs,
4903 
4904  if (isIDKey && i < indexDesc->rd_index->indnkeyatts)
4905  idindexattrs = bms_add_member(idindexattrs,
4907  }
4908  }
4909 
4910  /* Collect all attributes used in expressions, too */
4911  pull_varattnos(indexExpressions, 1, &indexattrs);
4912 
4913  /* Collect all attributes in the index predicate, too */
4914  pull_varattnos(indexPredicate, 1, &indexattrs);
4915 
4916  index_close(indexDesc, AccessShareLock);
4917  }
4918 
4919  /*
4920  * During one of the index_opens in the above loop, we might have received
4921  * a relcache flush event on this relcache entry, which might have been
4922  * signaling a change in the rel's index list. If so, we'd better start
4923  * over to ensure we deliver up-to-date attribute bitmaps.
4924  */
4925  newindexoidlist = RelationGetIndexList(relation);
4926  if (equal(indexoidlist, newindexoidlist) &&
4927  relpkindex == relation->rd_pkindex &&
4928  relreplindex == relation->rd_replidindex)
4929  {
4930  /* Still the same index set, so proceed */
4931  list_free(newindexoidlist);
4932  list_free(indexoidlist);
4933  }
4934  else
4935  {
4936  /* Gotta do it over ... might as well not leak memory */
4937  list_free(newindexoidlist);
4938  list_free(indexoidlist);
4939  bms_free(uindexattrs);
4940  bms_free(pkindexattrs);
4941  bms_free(idindexattrs);
4942  bms_free(indexattrs);
4943 
4944  goto restart;
4945  }
4946 
4947  /* Don't leak the old values of these bitmaps, if any */
4948  bms_free(relation->rd_indexattr);
4949  relation->rd_indexattr = NULL;
4950  bms_free(relation->rd_keyattr);
4951  relation->rd_keyattr = NULL;
4952  bms_free(relation->rd_pkattr);
4953  relation->rd_pkattr = NULL;
4954  bms_free(relation->rd_idattr);
4955  relation->rd_idattr = NULL;
4956 
4957  /*
4958  * Now save copies of the bitmaps in the relcache entry. We intentionally
4959  * set rd_indexattr last, because that's the one that signals validity of
4960  * the values; if we run out of memory before making that copy, we won't
4961  * leave the relcache entry looking like the other ones are valid but
4962  * empty.
4963  */
4965  relation->rd_keyattr = bms_copy(uindexattrs);
4966  relation->rd_pkattr = bms_copy(pkindexattrs);
4967  relation->rd_idattr = bms_copy(idindexattrs);
4968  relation->rd_indexattr = bms_copy(indexattrs);
4969  MemoryContextSwitchTo(oldcxt);
4970 
4971  /* We return our original working copy for caller to play with */
4972  switch (attrKind)
4973  {
4974  case INDEX_ATTR_BITMAP_ALL:
4975  return indexattrs;
4976  case INDEX_ATTR_BITMAP_KEY:
4977  return uindexattrs;
4979  return pkindexattrs;
4981  return idindexattrs;
4982  default:
4983  elog(ERROR, "unknown attrKind %u", attrKind);
4984  return NULL;
4985  }
4986 }
4987 
4988 /*
4989  * RelationGetExclusionInfo -- get info about index's exclusion constraint
4990  *
4991  * This should be called only for an index that is known to have an
4992  * associated exclusion constraint. It returns arrays (palloc'd in caller's
4993  * context) of the exclusion operator OIDs, their underlying functions'
4994  * OIDs, and their strategy numbers in the index's opclasses. We cache
4995  * all this information since it requires a fair amount of work to get.
4996  */
4997 void
4999  Oid **operators,
5000  Oid **procs,
5001  uint16 **strategies)
5002 {
5003  int indnkeyatts;
5004  Oid *ops;
5005  Oid *funcs;
5006  uint16 *strats;
5007  Relation conrel;
5008  SysScanDesc conscan;
5009  ScanKeyData skey[1];
5010  HeapTuple htup;
5011  bool found;
5012  MemoryContext oldcxt;
5013  int i;
5014 
5015  indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
5016 
5017  /* Allocate result space in caller context */
5018  *operators = ops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5019  *procs = funcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5020  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5021 
5022  /* Quick exit if we have the data cached already */
5023  if (indexRelation->rd_exclstrats != NULL)
5024  {
5025  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * indnkeyatts);
5026  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * indnkeyatts);
5027  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * indnkeyatts);
5028  return;
5029  }
5030 
5031  /*
5032  * Search pg_constraint for the constraint associated with the index. To
5033  * make this not too painfully slow, we use the index on conrelid; that
5034  * will hold the parent relation's OID not the index's own OID.
5035  *
5036  * Note: if we wanted to rely on the constraint name matching the index's
5037  * name, we could just do a direct lookup using pg_constraint's unique
5038  * index. For the moment it doesn't seem worth requiring that.
5039  */
5040  ScanKeyInit(&skey[0],
5041  Anum_pg_constraint_conrelid,
5042  BTEqualStrategyNumber, F_OIDEQ,
5043  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5044 
5045  conrel = table_open(ConstraintRelationId, AccessShareLock);
5046  conscan = systable_beginscan(conrel, ConstraintRelidTypidNameIndexId, true,
5047  NULL, 1, skey);
5048  found = false;
5049 
5050  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5051  {
5053  Datum val;
5054  bool isnull;
5055  ArrayType *arr;
5056  int nelem;
5057 
5058  /* We want the exclusion constraint owning the index */
5059  if (conform->contype != CONSTRAINT_EXCLUSION ||
5060  conform->conindid != RelationGetRelid(indexRelation))
5061  continue;
5062 
5063  /* There should be only one */
5064  if (found)
5065  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5066  RelationGetRelationName(indexRelation));
5067  found = true;
5068 
5069  /* Extract the operator OIDS from conexclop */
5070  val = fastgetattr(htup,
5071  Anum_pg_constraint_conexclop,
5072  conrel->rd_att, &isnull);
5073  if (isnull)
5074  elog(ERROR, "null conexclop for rel %s",
5075  RelationGetRelationName(indexRelation));
5076 
5077  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5078  nelem = ARR_DIMS(arr)[0];
5079  if (ARR_NDIM(arr) != 1 ||
5080  nelem != indnkeyatts ||
5081  ARR_HASNULL(arr) ||
5082  ARR_ELEMTYPE(arr) != OIDOID)
5083  elog(ERROR, "conexclop is not a 1-D Oid array");
5084 
5085  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * indnkeyatts);
5086  }
5087 
5088  systable_endscan(conscan);
5089  table_close(conrel, AccessShareLock);
5090 
5091  if (!found)
5092  elog(ERROR, "exclusion constraint record missing for rel %s",
5093  RelationGetRelationName(indexRelation));
5094 
5095  /* We need the func OIDs and strategy numbers too */
5096  for (i = 0; i < indnkeyatts; i++)
5097  {
5098  funcs[i] = get_opcode(ops[i]);
5099  strats[i] = get_op_opfamily_strategy(ops[i],
5100  indexRelation->rd_opfamily[i]);
5101  /* shouldn't fail, since it was checked at index creation */
5102  if (strats[i] == InvalidStrategy)
5103  elog(ERROR, "could not find strategy for operator %u in family %u",
5104  ops[i], indexRelation->rd_opfamily[i]);
5105  }
5106 
5107  /* Save a copy of the results in the relcache entry. */
5108  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5109  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5110  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
5111  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
5112  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * indnkeyatts);
5113  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * indnkeyatts);
5114  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * indnkeyatts);
5115  MemoryContextSwitchTo(oldcxt);
5116 }
5117 
5118 /*
5119  * Get publication actions for the given relation.
5120  */
5121 struct PublicationActions *
5123 {
5124  List *puboids;
5125  ListCell *lc;
5126  MemoryContext oldcxt;
5127  PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5128 
5129  /*
5130  * If not publishable, it publishes no actions. (pgoutput_change() will
5131  * ignore it.)
5132  */
5133  if (!is_publishable_relation(relation))
5134  return pubactions;
5135 
5136  if (relation->rd_pubactions)
5137  return memcpy(pubactions, relation->rd_pubactions,
5138  sizeof(PublicationActions));
5139 
5140  /* Fetch the publication membership info. */
5141  puboids = GetRelationPublications(RelationGetRelid(relation));
5142  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5143 
5144  foreach(lc, puboids)
5145  {
5146  Oid pubid = lfirst_oid(lc);
5147  HeapTuple tup;
5148  Form_pg_publication pubform;
5149 
5151 
5152  if (!HeapTupleIsValid(tup))
5153  elog(ERROR, "cache lookup failed for publication %u", pubid);
5154 
5155  pubform = (Form_pg_publication) GETSTRUCT(tup);
5156 
5157  pubactions->pubinsert |= pubform->pubinsert;
5158  pubactions->pubupdate |= pubform->pubupdate;
5159  pubactions->pubdelete |= pubform->pubdelete;
5160  pubactions->pubtruncate |= pubform->pubtruncate;
5161 
5162  ReleaseSysCache(tup);
5163 
5164  /*
5165  * If we know everything is replicated, there is no point to check for
5166  * other publications.
5167  */
5168  if (pubactions->pubinsert && pubactions->pubupdate &&
5169  pubactions->pubdelete && pubactions->pubtruncate)
5170  break;
5171  }
5172 
5173  if (relation->rd_pubactions)
5174  {
5175  pfree(relation->rd_pubactions);
5176  relation->rd_pubactions = NULL;
5177  }
5178 
5179  /* Now save copy of the actions in the relcache entry. */
5181  relation->rd_pubactions = palloc(sizeof(PublicationActions));
5182  memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5183  MemoryContextSwitchTo(oldcxt);
5184 
5185  return pubactions;
5186 }
5187 
5188 /*
5189  * Routines to support ereport() reports of relation-related errors
5190  *
5191  * These could have been put into elog.c, but it seems like a module layering
5192  * violation to have elog.c calling relcache or syscache stuff --- and we
5193  * definitely don't want elog.h including rel.h. So we put them here.
5194  */
5195 
5196 /*
5197  * errtable --- stores schema_name and table_name of a table
5198  * within the current errordata.
5199  */
5200 int
5202 {
5206 
5207  return 0; /* return value does not matter */
5208 }
5209 
5210 /*
5211  * errtablecol --- stores schema_name, table_name and column_name
5212  * of a table column within the current errordata.
5213  *
5214  * The column is specified by attribute number --- for most callers, this is
5215  * easier and less error-prone than getting the column name for themselves.
5216  */
5217 int
5219 {
5221  const char *colname;
5222 
5223  /* Use reldesc if it's a user attribute, else consult the catalogs */
5224  if (attnum > 0 && attnum <= reldesc->natts)
5225  colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname);
5226  else
5227  colname = get_attname(RelationGetRelid(rel), attnum, false);
5228 
5229  return errtablecolname(rel, colname);
5230 }
5231 
5232 /*
5233  * errtablecolname --- stores schema_name, table_name and column_name
5234  * of a table column within the current errordata, where the column name is
5235  * given directly rather than extracted from the relation's catalog data.
5236  *
5237  * Don't use this directly unless errtablecol() is inconvenient for some
5238  * reason. This might possibly be needed during intermediate states in ALTER
5239  * TABLE, for instance.
5240  */
5241 int
5242 errtablecolname(Relation rel, const char *colname)
5243 {
5244  errtable(rel);
5246 
5247  return 0; /* return value does not matter */
5248 }
5249 
5250 /*
5251  * errtableconstraint --- stores schema_name, table_name and constraint_name
5252  * of a table-related constraint within the current errordata.
5253  */
5254 int
5255 errtableconstraint(Relation rel, const char *conname)
5256 {
5257  errtable(rel);
5259 
5260  return 0; /* return value does not matter */
5261 }
5262 
5263 
5264 /*
5265  * load_relcache_init_file, write_relcache_init_file
5266  *
5267  * In late 1992, we started regularly having databases with more than
5268  * a thousand classes in them. With this number of classes, it became
5269  * critical to do indexed lookups on the system catalogs.
5270  *
5271  * Bootstrapping these lookups is very hard. We want to be able to
5272  * use an index on pg_attribute, for example, but in order to do so,
5273  * we must have read pg_attribute for the attributes in the index,
5274  * which implies that we need to use the index.
5275  *
5276  * In order to get around the problem, we do the following:
5277  *
5278  * + When the database system is initialized (at initdb time), we
5279  * don't use indexes. We do sequential scans.
5280  *
5281  * + When the backend is started up in normal mode, we load an image
5282  * of the appropriate relation descriptors, in internal format,
5283  * from an initialization file in the data/base/... directory.
5284  *
5285  * + If the initialization file isn't there, then we create the
5286  * relation descriptors using sequential scans and write 'em to
5287  * the initialization file for use by subsequent backends.
5288  *
5289  * As of Postgres 9.0, there is one local initialization file in each
5290  * database, plus one shared initialization file for shared catalogs.
5291  *
5292  * We could dispense with the initialization files and just build the
5293  * critical reldescs the hard way on every backend startup, but that
5294  * slows down backend startup noticeably.
5295  *
5296  * We can in fact go further, and save more relcache entries than
5297  * just the ones that are absolutely critical; this allows us to speed
5298  * up backend startup by not having to build such entries the hard way.
5299  * Presently, all the catalog and index entries that are referred to
5300  * by catcaches are stored in the initialization files.
5301  *
5302  * The same mechanism that detects when catcache and relcache entries
5303  * need to be invalidated (due to catalog updates) also arranges to
5304  * unlink the initialization files when the contents may be out of date.
5305  * The files will then be rebuilt during the next backend startup.
5306  */
5307 
5308 /*
5309  * load_relcache_init_file -- attempt to load cache from the shared
5310  * or local cache init file
5311  *
5312  * If successful, return true and set criticalRelcachesBuilt or
5313  * criticalSharedRelcachesBuilt to true.
5314  * If not successful, return false.
5315  *
5316  * NOTE: we assume we are already switched into CacheMemoryContext.
5317  */
5318 static bool
5320 {
5321  FILE *fp;
5322  char initfilename[MAXPGPATH];
5323  Relation *rels;
5324  int relno,
5325  num_rels,
5326  max_rels,
5327  nailed_rels,
5328  nailed_indexes,
5329  magic;
5330  int i;
5331 
5332  if (shared)
5333  snprintf(initfilename, sizeof(initfilename), "global/%s",
5335  else
5336  snprintf(initfilename, sizeof(initfilename), "%s/%s",
5338 
5339  fp = AllocateFile(initfilename, PG_BINARY_R);
5340  if (fp == NULL)
5341  return false;
5342 
5343  /*
5344  * Read the index relcache entries from the file. Note we will not enter
5345  * any of them into the cache if the read fails partway through; this
5346  * helps to guard against broken init files.
5347  */
5348  max_rels = 100;
5349  rels = (Relation *) palloc(max_rels * sizeof(Relation));
5350  num_rels = 0;
5351  nailed_rels = nailed_indexes = 0;
5352 
5353  /* check for correct magic number (compatible version) */
5354  if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5355  goto read_failed;
5356  if (magic != RELCACHE_INIT_FILEMAGIC)
5357  goto read_failed;
5358 
5359  for (relno = 0;; relno++)
5360  {
5361  Size len;
5362  size_t nread;
5363  Relation rel;
5364  Form_pg_class relform;
5365  bool has_not_null;
5366 
5367  /* first read the relation descriptor length */
5368  nread = fread(&len, 1, sizeof(len), fp);
5369  if (nread != sizeof(len))
5370  {
5371  if (nread == 0)
5372  break; /* end of file */
5373  goto read_failed;
5374  }
5375 
5376  /* safety check for incompatible relcache layout */
5377  if (len != sizeof(RelationData))
5378  goto read_failed;
5379 
5380  /* allocate another relcache header */
5381  if (num_rels >= max_rels)
5382  {
5383  max_rels *= 2;
5384  rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5385  }
5386 
5387  rel = rels[num_rels++] = (Relation) palloc(len);
5388 
5389  /* then, read the Relation structure */
5390  if (fread(rel, 1, len, fp) != len)
5391  goto read_failed;
5392 
5393  /* next read the relation tuple form */
5394  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5395  goto read_failed;
5396 
5397  relform = (Form_pg_class) palloc(len);
5398  if (fread(relform, 1, len, fp) != len)
5399  goto read_failed;
5400 
5401  rel->rd_rel = relform;
5402 
5403  /* initialize attribute tuple forms */
5404  rel->rd_att = CreateTemplateTupleDesc(relform->relnatts);
5405  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5406 
5407  rel->rd_att->tdtypeid = relform->reltype;
5408  rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
5409 
5410  /* next read all the attribute tuple form data entries */
5411  has_not_null = false;
5412  for (i = 0; i < relform->relnatts; i++)
5413  {
5414  Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5415 
5416  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5417  goto read_failed;
5418  if (len != ATTRIBUTE_FIXED_PART_SIZE)
5419  goto read_failed;
5420  if (fread(attr, 1, len, fp) != len)
5421  goto read_failed;
5422 
5423  has_not_null |= attr->attnotnull;
5424  }
5425 
5426  /* next read the access method specific field */
5427  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5428  goto read_failed;
5429  if (len > 0)
5430  {
5431  rel->rd_options = palloc(len);
5432  if (fread(rel->rd_options, 1, len, fp) != len)
5433  goto read_failed;
5434  if (len != VARSIZE(rel->rd_options))
5435  goto read_failed; /* sanity check */
5436  }
5437  else
5438  {
5439  rel->rd_options = NULL;
5440  }
5441 
5442  /* mark not-null status */
5443  if (has_not_null)
5444  {
5445  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5446 
5447  constr->has_not_null = true;
5448  rel->rd_att->constr = constr;
5449  }
5450 
5451  /*
5452  * If it's an index, there's more to do. Note we explicitly ignore
5453  * partitioned indexes here.
5454  */
5455  if (rel->rd_rel->relkind == RELKIND_INDEX)
5456  {
5457  MemoryContext indexcxt;
5458  Oid *opfamily;
5459  Oid *opcintype;
5460  RegProcedure *support;
5461  int nsupport;
5462  int16 *indoption;
5463  Oid *indcollation;
5464 
5465  /* Count nailed indexes to ensure we have 'em all */
5466  if (rel->rd_isnailed)
5467  nailed_indexes++;
5468 
5469  /* next, read the pg_index tuple */
5470  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5471  goto read_failed;
5472 
5473  rel->rd_indextuple = (HeapTuple) palloc(len);
5474  if (fread(rel->rd_indextuple, 1, len, fp) != len)
5475  goto read_failed;
5476 
5477  /* Fix up internal pointers in the tuple -- see heap_copytuple */
5478  rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5480 
5481  /*
5482  * prepare index info context --- parameters should match
5483  * RelationInitIndexAccessInfo
5484  */
5486  "index info",
5488  rel->rd_indexcxt = indexcxt;
5491 
5492  /*
5493  * Now we can fetch the index AM's API struct. (We can't store
5494  * that in the init file, since it contains function pointers that
5495  * might vary across server executions. Fortunately, it should be
5496  * safe to call the amhandler even while bootstrapping indexes.)
5497  */
5498  InitIndexAmRoutine(rel);
5499 
5500  /* next, read the vector of opfamily OIDs */
5501  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5502  goto read_failed;
5503 
5504  opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5505  if (fread(opfamily, 1, len, fp) != len)
5506  goto read_failed;
5507 
5508  rel->rd_opfamily = opfamily;
5509 
5510  /* next, read the vector of opcintype OIDs */
5511  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5512  goto read_failed;
5513 
5514  opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5515  if (fread(opcintype, 1, len, fp) != len)
5516  goto read_failed;
5517 
5518  rel->rd_opcintype = opcintype;
5519 
5520  /* next, read the vector of support procedure OIDs */
5521  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5522  goto read_failed;
5523  support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5524  if (fread(support, 1, len, fp) != len)
5525  goto read_failed;
5526 
5527  rel->rd_support = support;
5528 
5529  /* next, read the vector of collation OIDs */
5530  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5531  goto read_failed;
5532 
5533  indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5534  if (fread(indcollation, 1, len, fp) != len)
5535  goto read_failed;
5536 
5537  rel->rd_indcollation = indcollation;
5538 
5539  /* finally, read the vector of indoption values */
5540  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5541  goto read_failed;
5542 
5543  indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5544  if (fread(indoption, 1, len, fp) != len)
5545  goto read_failed;
5546 
5547  rel->rd_indoption = indoption;
5548 
5549  /* set up zeroed fmgr-info vector */
5550  nsupport = relform->relnatts * rel->rd_indam->amsupport;
5551  rel->rd_supportinfo = (FmgrInfo *)
5552  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5553  }
5554  else
5555  {
5556  /* Count nailed rels to ensure we have 'em all */
5557  if (rel->rd_isnailed)
5558  nailed_rels++;
5559 
5560  /* Load table AM data */
5561  if (rel->rd_rel->relkind == RELKIND_RELATION ||
5562  rel->rd_rel->relkind == RELKIND_SEQUENCE ||
5563  rel->rd_rel->relkind == RELKIND_TOASTVALUE ||
5564  rel->rd_rel->relkind == RELKIND_MATVIEW)
5566 
5567  Assert(rel->rd_index == NULL);
5568  Assert(rel->rd_indextuple == NULL);
5569  Assert(rel->rd_indexcxt == NULL);
5570  Assert(rel->rd_indam == NULL);
5571  Assert(rel->rd_opfamily == NULL);
5572  Assert(rel->rd_opcintype == NULL);
5573  Assert(rel->rd_support == NULL);
5574  Assert(rel->rd_supportinfo == NULL);
5575  Assert(rel->rd_indoption == NULL);
5576  Assert(rel->rd_indcollation == NULL);
5577  }
5578 
5579  /*
5580  * Rules and triggers are not saved (mainly because the internal
5581  * format is complex and subject to change). They must be rebuilt if
5582  * needed by RelationCacheInitializePhase3. This is not expected to
5583  * be a big performance hit since few system catalogs have such. Ditto
5584  * for RLS policy data, partition info, index expressions, predicates,
5585  * exclusion info, and FDW info.
5586  */
5587  rel->rd_rules = NULL;
5588  rel->rd_rulescxt = NULL;
5589  rel->trigdesc = NULL;
5590  rel->rd_rsdesc = NULL;
5591  rel->rd_partkey = NULL;
5592  rel->rd_partkeycxt = NULL;
5593  rel->rd_partdesc = NULL;
5594  rel->rd_pdcxt = NULL;
5595  rel->rd_partcheck = NIL;
5596  rel->rd_partcheckvalid = false;
5597  rel->rd_partcheckcxt = NULL;
5598  rel->rd_indexprs = NIL;
5599  rel->rd_indpred = NIL;
5600  rel->rd_exclops = NULL;
5601  rel->rd_exclprocs = NULL;
5602  rel->rd_exclstrats = NULL;
5603  rel->rd_fdwroutine = NULL;
5604 
5605  /*
5606  * Reset transient-state fields in the relcache entry
5607  */
5608  rel->rd_smgr = NULL;
5609  if (rel->rd_isnailed)
5610  rel->rd_refcnt = 1;
5611  else
5612  rel->rd_refcnt = 0;
5613  rel->rd_indexvalid = false;
5614  rel->rd_indexlist = NIL;
5615  rel->rd_pkindex = InvalidOid;
5616  rel->rd_replidindex = InvalidOid;
5617  rel->rd_indexattr = NULL;
5618  rel->rd_keyattr = NULL;
5619  rel->rd_pkattr = NULL;
5620  rel->rd_idattr = NULL;
5621  rel->rd_pubactions = NULL;
5622  rel->rd_statvalid = false;
5623  rel->rd_statlist = NIL;
5624  rel->rd_fkeyvalid = false;
5625  rel->rd_fkeylist = NIL;
5628  rel->rd_amcache = NULL;
5629  MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
5630 
5631  /*
5632  * Recompute lock and physical addressing info. This is needed in
5633  * case the pg_internal.init file was copied from some other database
5634  * by CREATE DATABASE.
5635  */
5636  RelationInitLockInfo(rel);
5638  }
5639 
5640  /*
5641  * We reached the end of the init file without apparent problem. Did we
5642  * get the right number of nailed items? This is a useful crosscheck in
5643  * case the set of critical rels or indexes changes. However, that should
5644  * not happen in a normally-running system, so let's bleat if it does.
5645  *
5646  * For the shared init file, we're called before client authentication is
5647  * done, which means that elog(WARNING) will go only to the postmaster
5648  * log, where it's easily missed. To ensure that developers notice bad
5649  * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put
5650  * an Assert(false) there.
5651  */
5652  if (shared)
5653  {
5654  if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
5655  nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
5656  {
5657  elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively",
5658  nailed_rels, nailed_indexes,
5660  /* Make sure we get developers' attention about this */
5661  Assert(false);
5662  /* In production builds, recover by bootstrapping the relcache */
5663  goto read_failed;
5664  }
5665  }
5666  else
5667  {
5668  if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
5669  nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
5670  {
5671  elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively",
5672  nailed_rels, nailed_indexes,
5674  /* We don't need an Assert() in this case */
5675  goto read_failed;
5676  }
5677  }
5678 
5679  /*
5680  * OK, all appears well.
5681  *
5682  * Now insert all the new relcache entries into the cache.
5683  */
5684  for (relno = 0; relno < num_rels; relno++)
5685  {
5686  RelationCacheInsert(rels[relno], false);
5687  }
5688 
5689  pfree(rels);
5690  FreeFile(fp);
5691 
5692  if (shared)
5694  else
5695  criticalRelcachesBuilt = true;
5696  return true;
5697 
5698  /*
5699  * init file is broken, so do it the hard way. We don't bother trying to
5700  * free the clutter we just allocated; it's not in the relcache so it
5701  * won't hurt.
5702  */
5703 read_failed:
5704  pfree(rels);
5705  FreeFile(fp);
5706 
5707  return false;
5708 }
5709 
5710 /*
5711  * Write out a new initialization file with the current contents
5712  * of the relcache (either shared rels or local rels, as indicated).
5713  */
5714 static void
5716 {
5717  FILE *fp;
5718  char tempfilename[MAXPGPATH];
5719  char finalfilename[MAXPGPATH];
5720  int magic;
5722  RelIdCacheEnt *idhentry;
5723  int i;
5724 
5725  /*
5726  * If we have already received any relcache inval events, there's no
5727  * chance of succeeding so we may as well skip the whole thing.
5728  */
5729  if (relcacheInvalsReceived != 0L)
5730  return;
5731 
5732  /*
5733  * We must write a temporary file and rename it into place. Otherwise,
5734  * another backend starting at about the same time might crash trying to
5735  * read the partially-complete file.
5736  */
5737  if (shared)
5738  {
5739  snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
5741  snprintf(finalfilename, sizeof(finalfilename), "global/%s",
5743  }
5744  else
5745  {
5746  snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
5748  snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
5750  }
5751 
5752  unlink(tempfilename); /* in case it exists w/wrong permissions */
5753 
5754  fp = AllocateFile(tempfilename, PG_BINARY_W);
5755  if (fp == NULL)
5756  {
5757  /*
5758  * We used to consider this a fatal error, but we might as well
5759  * continue with backend startup ...
5760  */
5761  ereport(WARNING,
5763  errmsg("could not create relation-cache initialization file \"%s\": %m",
5764  tempfilename),
5765  errdetail("Continuing anyway, but there's something wrong.")));
5766  return;
5767  }
5768 
5769  /*
5770  * Write a magic number to serve as a file version identifier. We can
5771  * change the magic number whenever the relcache layout changes.
5772  */
5773  magic = RELCACHE_INIT_FILEMAGIC;
5774  if (fwrite(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5775  elog(FATAL, "could not write init file");
5776 
5777  /*
5778  * Write all the appropriate reldescs (in no particular order).
5779  */
5780  hash_seq_init(&status, RelationIdCache);
5781 
5782  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
5783  {
5784  Relation rel = idhentry->reldesc;
5785  Form_pg_class relform = rel->rd_rel;
5786 
5787  /* ignore if not correct group */
5788  if (relform->relisshared != shared)
5789  continue;
5790 
5791  /*
5792  * Ignore if not supposed to be in init file. We can allow any shared
5793  * relation that's been loaded so far to be in the shared init file,
5794  * but unshared relations must be ones that should be in the local
5795  * file per RelationIdIsInInitFile. (Note: if you want to change the
5796  * criterion for rels to be kept in the init file, see also inval.c.
5797  * The reason for filtering here is to be sure that we don't put
5798  * anything into the local init file for which a relcache inval would
5799  * not cause invalidation of that init file.)
5800  */
5801  if (!shared && !RelationIdIsInInitFile(RelationGetRelid(rel)))
5802  {
5803  /* Nailed rels had better get stored. */
5804  Assert(!rel->rd_isnailed);
5805  continue;
5806  }
5807 
5808  /* first write the relcache entry proper */
5809  write_item(rel, sizeof(RelationData), fp);
5810 
5811  /* next write the relation tuple form */
5812  write_item(relform, CLASS_TUPLE_SIZE, fp);
5813 
5814  /* next, do all the attribute tuple form data entries */
5815  for (i = 0; i < relform->relnatts; i++)
5816  {
5817  write_item(TupleDescAttr(rel->rd_att, i),
5819  }
5820 
5821  /* next, do the access method specific field */
5822  write_item(rel->rd_options,
5823  (rel->rd_options ? VARSIZE(rel->rd_options) : 0),
5824  fp);
5825 
5826  /*
5827  * If it's an index, there's more to do. Note we explicitly ignore
5828  * partitioned indexes here.
5829  */
5830  if (rel->rd_rel->relkind == RELKIND_INDEX)
5831  {
5832  /* write the pg_index tuple */
5833  /* we assume this was created by heap_copytuple! */
5836  fp);
5837 
5838  /* next, write the vector of opfamily OIDs */
5839  write_item(rel->rd_opfamily,
5840  relform->relnatts * sizeof(Oid),
5841  fp);
5842 
5843  /* next, write the vector of opcintype OIDs */
5844  write_item(rel->rd_opcintype,
5845  relform->relnatts * sizeof(Oid),
5846  fp);
5847 
5848  /* next, write the vector of support procedure OIDs */
5849  write_item(rel->rd_support,
5850  relform->relnatts * (rel->rd_indam->amsupport * sizeof(RegProcedure)),
5851  fp);
5852 
5853  /* next, write the vector of collation OIDs */
5855  relform->relnatts * sizeof(Oid),
5856  fp);
5857 
5858  /* finally, write the vector of indoption values */
5859  write_item(rel->rd_indoption,
5860  relform->relnatts * sizeof(int16),
5861  fp);
5862  }
5863  }
5864 
5865  if (FreeFile(fp))
5866  elog(FATAL, "could not write init file");
5867 
5868  /*
5869  * Now we have to check whether the data we've so painstakingly
5870  * accumulated is already obsolete due to someone else's just-committed
5871  * catalog changes. If so, we just delete the temp file and leave it to
5872  * the next backend to try again. (Our own relcache entries will be
5873  * updated by SI message processing, but we can't be sure whether what we
5874  * wrote out was up-to-date.)
5875  *
5876  * This mustn't run concurrently with the code that unlinks an init file
5877  * and sends SI messages, so grab a serialization lock for the duration.
5878  */
5879  LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
5880 
5881  /* Make sure we have seen all incoming SI messages */
5883 
5884  /*
5885  * If we have received any SI relcache invals since backend start, assume
5886  * we may have written out-of-date data.
5887  */
5888  if (relcacheInvalsReceived == 0L)
5889  {
5890  /*
5891  * OK, rename the temp file to its final name, deleting any
5892  * previously-existing init file.
5893  *
5894  * Note: a failure here is possible under Cygwin, if some other
5895  * backend is holding open an unlinked-but-not-yet-gone init file. So
5896  * treat this as a noncritical failure; just remove the useless temp
5897  * file on failure.
5898  */
5899  if (rename(tempfilename, finalfilename) < 0)
5900  unlink(tempfilename);
5901  }
5902  else
5903  {
5904  /* Delete the already-obsolete temp file */
5905  unlink(tempfilename);
5906  }
5907 
5908  LWLockRelease(RelCacheInitLock);
5909 }
5910 
5911 /* write a chunk of data preceded by its length */
5912 static void
5913 write_item(const void *data, Size len, FILE *fp)
5914 {
5915  if (fwrite(&len, 1, sizeof(len), fp) != sizeof(len))
5916  elog(FATAL, "could not write init file");
5917  if (fwrite(data, 1, len, fp) != len)
5918  elog(FATAL, "could not write init file");
5919 }
5920 
5921 /*
5922  * Determine whether a given relation (identified by OID) is one of the ones
5923  * we should store in a relcache init file.
5924  *
5925  * We must cache all nailed rels, and for efficiency we should cache every rel
5926  * that supports a syscache. The former set is almost but not quite a subset
5927  * of the latter. The special cases are relations where
5928  * RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but
5929  * which do not support any syscache.
5930  */
5931 bool
5933 {
5934  if (relationId == SharedSecLabelRelationId ||
5935  relationId == TriggerRelidNameIndexId ||
5936  relationId == DatabaseNameIndexId ||
5937  relationId == SharedSecLabelObjectIndexId)
5938  {
5939  /*
5940  * If this Assert fails, we don't need the applicable special case
5941  * anymore.
5942  */
5943  Assert(!RelationSupportsSysCache(relationId));
5944  return true;
5945  }
5946  return RelationSupportsSysCache(relationId);
5947 }
5948 
5949 /*
5950  * Invalidate (remove) the init file during commit of a transaction that
5951  * changed one or more of the relation cache entries that are kept in the
5952  * local init file.
5953  *
5954  * To be safe against concurrent inspection or rewriting of the init file,
5955  * we must take RelCacheInitLock, then remove the old init file, then send
5956  * the SI messages that include relcache inval for such relations, and then
5957  * release RelCacheInitLock. This serializes the whole affair against
5958  * write_relcache_init_file, so that we can be sure that any other process
5959  * that's concurrently trying to create a new init file won't move an
5960  *