PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/nbtree.h"
36 #include "access/reloptions.h"
37 #include "access/sysattr.h"
38 #include "access/xact.h"
39 #include "access/xlog.h"
40 #include "catalog/catalog.h"
41 #include "catalog/index.h"
42 #include "catalog/indexing.h"
43 #include "catalog/namespace.h"
44 #include "catalog/partition.h"
45 #include "catalog/pg_am.h"
46 #include "catalog/pg_amproc.h"
47 #include "catalog/pg_attrdef.h"
48 #include "catalog/pg_authid.h"
50 #include "catalog/pg_constraint.h"
51 #include "catalog/pg_database.h"
52 #include "catalog/pg_namespace.h"
53 #include "catalog/pg_opclass.h"
55 #include "catalog/pg_proc.h"
56 #include "catalog/pg_publication.h"
57 #include "catalog/pg_rewrite.h"
58 #include "catalog/pg_shseclabel.h"
61 #include "catalog/pg_tablespace.h"
62 #include "catalog/pg_trigger.h"
63 #include "catalog/pg_type.h"
64 #include "catalog/schemapg.h"
65 #include "catalog/storage.h"
66 #include "commands/policy.h"
67 #include "commands/trigger.h"
68 #include "miscadmin.h"
69 #include "nodes/nodeFuncs.h"
70 #include "optimizer/clauses.h"
71 #include "optimizer/prep.h"
72 #include "optimizer/var.h"
73 #include "rewrite/rewriteDefine.h"
74 #include "rewrite/rowsecurity.h"
75 #include "storage/lmgr.h"
76 #include "storage/smgr.h"
77 #include "utils/array.h"
78 #include "utils/builtins.h"
79 #include "utils/fmgroids.h"
80 #include "utils/inval.h"
81 #include "utils/lsyscache.h"
82 #include "utils/memutils.h"
83 #include "utils/relmapper.h"
84 #include "utils/resowner_private.h"
85 #include "utils/snapmgr.h"
86 #include "utils/syscache.h"
87 #include "utils/tqual.h"
88 
89 
90 /*
91  * name of relcache init file(s), used to speed up backend startup
92  */
93 #define RELCACHE_INIT_FILENAME "pg_internal.init"
94 
95 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
96 
97 /*
98  * hardcoded tuple descriptors, contents generated by genbki.pl
99  */
100 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
101 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
102 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
103 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
104 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
105 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
106 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
107 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
108 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
109 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
110 
111 /*
112  * Hash tables that index the relation cache
113  *
114  * We used to index the cache by both name and OID, but now there
115  * is only an index by OID.
116  */
117 typedef struct relidcacheent
118 {
121 } RelIdCacheEnt;
122 
124 
125 /*
126  * This flag is false until we have prepared the critical relcache entries
127  * that are needed to do indexscans on the tables read by relcache building.
128  */
130 
131 /*
132  * This flag is false until we have prepared the critical relcache entries
133  * for shared catalogs (which are the tables needed for login).
134  */
136 
137 /*
138  * This counter counts relcache inval events received since backend startup
139  * (but only for rels that are actually in cache). Presently, we use it only
140  * to detect whether data about to be written by write_relcache_init_file()
141  * might already be obsolete.
142  */
143 static long relcacheInvalsReceived = 0L;
144 
145 /*
146  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
147  * cleanup work. This list intentionally has limited size; if it overflows,
148  * we fall back to scanning the whole hashtable. There is no value in a very
149  * large list because (1) at some point, a hash_seq_search scan is faster than
150  * retail lookups, and (2) the value of this is to reduce EOXact work for
151  * short transactions, which can't have dirtied all that many tables anyway.
152  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
153  * cleanup processing must be idempotent.
154  */
155 #define MAX_EOXACT_LIST 32
157 static int eoxact_list_len = 0;
158 static bool eoxact_list_overflowed = false;
159 
160 #define EOXactListAdd(rel) \
161  do { \
162  if (eoxact_list_len < MAX_EOXACT_LIST) \
163  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
164  else \
165  eoxact_list_overflowed = true; \
166  } while (0)
167 
168 /*
169  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
170  * cleanup work. The array expands as needed; there is no hashtable because
171  * we don't need to access individual items except at EOXact.
172  */
174 static int NextEOXactTupleDescNum = 0;
175 static int EOXactTupleDescArrayLen = 0;
176 
177 /*
178  * macros to manipulate the lookup hashtable
179  */
180 #define RelationCacheInsert(RELATION, replace_allowed) \
181 do { \
182  RelIdCacheEnt *hentry; bool found; \
183  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
184  (void *) &((RELATION)->rd_id), \
185  HASH_ENTER, &found); \
186  if (found) \
187  { \
188  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
189  Relation _old_rel = hentry->reldesc; \
190  Assert(replace_allowed); \
191  hentry->reldesc = (RELATION); \
192  if (RelationHasReferenceCountZero(_old_rel)) \
193  RelationDestroyRelation(_old_rel, false); \
194  else if (!IsBootstrapProcessingMode()) \
195  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
196  RelationGetRelationName(_old_rel)); \
197  } \
198  else \
199  hentry->reldesc = (RELATION); \
200 } while(0)
201 
202 #define RelationIdCacheLookup(ID, RELATION) \
203 do { \
204  RelIdCacheEnt *hentry; \
205  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
206  (void *) &(ID), \
207  HASH_FIND, NULL); \
208  if (hentry) \
209  RELATION = hentry->reldesc; \
210  else \
211  RELATION = NULL; \
212 } while(0)
213 
214 #define RelationCacheDelete(RELATION) \
215 do { \
216  RelIdCacheEnt *hentry; \
217  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
218  (void *) &((RELATION)->rd_id), \
219  HASH_REMOVE, NULL); \
220  if (hentry == NULL) \
221  elog(WARNING, "failed to delete relcache entry for OID %u", \
222  (RELATION)->rd_id); \
223 } while(0)
224 
225 
226 /*
227  * Special cache for opclass-related information
228  *
229  * Note: only default support procs get cached, ie, those with
230  * lefttype = righttype = opcintype.
231  */
232 typedef struct opclasscacheent
233 {
234  Oid opclassoid; /* lookup key: OID of opclass */
235  bool valid; /* set TRUE after successful fill-in */
236  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
237  Oid opcfamily; /* OID of opclass's family */
238  Oid opcintype; /* OID of opclass's declared input type */
239  RegProcedure *supportProcs; /* OIDs of support procedures */
241 
243 
244 
245 /* non-export function prototypes */
246 
247 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
248 static void RelationClearRelation(Relation relation, bool rebuild);
249 
250 static void RelationReloadIndexInfo(Relation relation);
251 static void RelationFlushRelation(Relation relation);
253 static void AtEOXact_cleanup(Relation relation, bool isCommit);
254 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
255  SubTransactionId mySubid, SubTransactionId parentSubid);
256 static bool load_relcache_init_file(bool shared);
257 static void write_relcache_init_file(bool shared);
258 static void write_item(const void *data, Size len, FILE *fp);
259 
260 static void formrdesc(const char *relationName, Oid relationReltype,
261  bool isshared, bool hasoids,
262  int natts, const FormData_pg_attribute *attrs);
263 
264 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
266 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
267 static void RelationBuildTupleDesc(Relation relation);
268 static void RelationBuildPartitionKey(Relation relation);
270 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
271 static void RelationInitPhysicalAddr(Relation relation);
272 static void load_critical_index(Oid indexoid, Oid heapoid);
273 static TupleDesc GetPgClassDescriptor(void);
274 static TupleDesc GetPgIndexDescriptor(void);
275 static void AttrDefaultFetch(Relation relation);
276 static void CheckConstraintFetch(Relation relation);
277 static int CheckConstraintCmp(const void *a, const void *b);
278 static List *insert_ordered_oid(List *list, Oid datum);
279 static void InitIndexAmRoutine(Relation relation);
280 static void IndexSupportInitialize(oidvector *indclass,
281  RegProcedure *indexSupport,
282  Oid *opFamily,
283  Oid *opcInType,
284  StrategyNumber maxSupportNumber,
285  AttrNumber maxAttributeNumber);
286 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
287  StrategyNumber numSupport);
288 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
289 static void unlink_initfile(const char *initfilename);
290 static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
291  PartitionDesc partdesc2);
292 
293 
294 /*
295  * ScanPgRelation
296  *
297  * This is used by RelationBuildDesc to find a pg_class
298  * tuple matching targetRelId. The caller must hold at least
299  * AccessShareLock on the target relid to prevent concurrent-update
300  * scenarios; it isn't guaranteed that all scans used to build the
301  * relcache entry will use the same snapshot. If, for example,
302  * an attribute were to be added after scanning pg_class and before
303  * scanning pg_attribute, relnatts wouldn't match.
304  *
305  * NB: the returned tuple has been copied into palloc'd storage
306  * and must eventually be freed with heap_freetuple.
307  */
308 static HeapTuple
309 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
310 {
311  HeapTuple pg_class_tuple;
312  Relation pg_class_desc;
313  SysScanDesc pg_class_scan;
314  ScanKeyData key[1];
315  Snapshot snapshot;
316 
317  /*
318  * If something goes wrong during backend startup, we might find ourselves
319  * trying to read pg_class before we've selected a database. That ain't
320  * gonna work, so bail out with a useful error message. If this happens,
321  * it probably means a relcache entry that needs to be nailed isn't.
322  */
323  if (!OidIsValid(MyDatabaseId))
324  elog(FATAL, "cannot read pg_class without having selected a database");
325 
326  /*
327  * form a scan key
328  */
329  ScanKeyInit(&key[0],
331  BTEqualStrategyNumber, F_OIDEQ,
332  ObjectIdGetDatum(targetRelId));
333 
334  /*
335  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
336  * built the critical relcache entries (this includes initdb and startup
337  * without a pg_internal.init file). The caller can also force a heap
338  * scan by setting indexOK == false.
339  */
340  pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
341 
342  /*
343  * The caller might need a tuple that's newer than the one the historic
344  * snapshot; currently the only case requiring to do so is looking up the
345  * relfilenode of non mapped system relations during decoding.
346  */
347  if (force_non_historic)
349  else
351 
352  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
353  indexOK && criticalRelcachesBuilt,
354  snapshot,
355  1, key);
356 
357  pg_class_tuple = systable_getnext(pg_class_scan);
358 
359  /*
360  * Must copy tuple before releasing buffer.
361  */
362  if (HeapTupleIsValid(pg_class_tuple))
363  pg_class_tuple = heap_copytuple(pg_class_tuple);
364 
365  /* all done */
366  systable_endscan(pg_class_scan);
367  heap_close(pg_class_desc, AccessShareLock);
368 
369  return pg_class_tuple;
370 }
371 
372 /*
373  * AllocateRelationDesc
374  *
375  * This is used to allocate memory for a new relation descriptor
376  * and initialize the rd_rel field from the given pg_class tuple.
377  */
378 static Relation
380 {
381  Relation relation;
382  MemoryContext oldcxt;
383  Form_pg_class relationForm;
384 
385  /* Relcache entries must live in CacheMemoryContext */
387 
388  /*
389  * allocate and zero space for new relation descriptor
390  */
391  relation = (Relation) palloc0(sizeof(RelationData));
392 
393  /* make sure relation is marked as having no open file yet */
394  relation->rd_smgr = NULL;
395 
396  /*
397  * Copy the relation tuple form
398  *
399  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
400  * variable-length fields (relacl, reloptions) are NOT stored in the
401  * relcache --- there'd be little point in it, since we don't copy the
402  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
403  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
404  * it from the syscache if you need it. The same goes for the original
405  * form of reloptions (however, we do store the parsed form of reloptions
406  * in rd_options).
407  */
408  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
409 
410  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
411 
412  /* initialize relation tuple form */
413  relation->rd_rel = relationForm;
414 
415  /* and allocate attribute tuple form storage */
416  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
417  relationForm->relhasoids);
418  /* which we mark as a reference-counted tupdesc */
419  relation->rd_att->tdrefcount = 1;
420 
421  MemoryContextSwitchTo(oldcxt);
422 
423  return relation;
424 }
425 
426 /*
427  * RelationParseRelOptions
428  * Convert pg_class.reloptions into pre-parsed rd_options
429  *
430  * tuple is the real pg_class tuple (not rd_rel!) for relation
431  *
432  * Note: rd_rel and (if an index) rd_amroutine must be valid already
433  */
434 static void
436 {
437  bytea *options;
438 
439  relation->rd_options = NULL;
440 
441  /* Fall out if relkind should not have options */
442  switch (relation->rd_rel->relkind)
443  {
444  case RELKIND_RELATION:
445  case RELKIND_TOASTVALUE:
446  case RELKIND_INDEX:
447  case RELKIND_VIEW:
448  case RELKIND_MATVIEW:
450  break;
451  default:
452  return;
453  }
454 
455  /*
456  * Fetch reloptions from tuple; have to use a hardwired descriptor because
457  * we might not have any other for pg_class yet (consider executing this
458  * code for pg_class itself)
459  */
460  options = extractRelOptions(tuple,
462  relation->rd_rel->relkind == RELKIND_INDEX ?
463  relation->rd_amroutine->amoptions : NULL);
464 
465  /*
466  * Copy parsed data into CacheMemoryContext. To guard against the
467  * possibility of leaks in the reloptions code, we want to do the actual
468  * parsing in the caller's memory context and copy the results into
469  * CacheMemoryContext after the fact.
470  */
471  if (options)
472  {
474  VARSIZE(options));
475  memcpy(relation->rd_options, options, VARSIZE(options));
476  pfree(options);
477  }
478 }
479 
480 /*
481  * RelationBuildTupleDesc
482  *
483  * Form the relation's tuple descriptor from information in
484  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
485  */
486 static void
488 {
489  HeapTuple pg_attribute_tuple;
490  Relation pg_attribute_desc;
491  SysScanDesc pg_attribute_scan;
492  ScanKeyData skey[2];
493  int need;
494  TupleConstr *constr;
495  AttrDefault *attrdef = NULL;
496  int ndef = 0;
497 
498  /* copy some fields from pg_class row to rd_att */
499  relation->rd_att->tdtypeid = relation->rd_rel->reltype;
500  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
501  relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
502 
504  sizeof(TupleConstr));
505  constr->has_not_null = false;
506 
507  /*
508  * Form a scan key that selects only user attributes (attnum > 0).
509  * (Eliminating system attribute rows at the index level is lots faster
510  * than fetching them.)
511  */
512  ScanKeyInit(&skey[0],
514  BTEqualStrategyNumber, F_OIDEQ,
516  ScanKeyInit(&skey[1],
518  BTGreaterStrategyNumber, F_INT2GT,
519  Int16GetDatum(0));
520 
521  /*
522  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
523  * built the critical relcache entries (this includes initdb and startup
524  * without a pg_internal.init file).
525  */
526  pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
527  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
530  NULL,
531  2, skey);
532 
533  /*
534  * add attribute data to relation->rd_att
535  */
536  need = relation->rd_rel->relnatts;
537 
538  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
539  {
540  Form_pg_attribute attp;
541 
542  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
543 
544  if (attp->attnum <= 0 ||
545  attp->attnum > relation->rd_rel->relnatts)
546  elog(ERROR, "invalid attribute number %d for %s",
547  attp->attnum, RelationGetRelationName(relation));
548 
549  memcpy(relation->rd_att->attrs[attp->attnum - 1],
550  attp,
552 
553  /* Update constraint/default info */
554  if (attp->attnotnull)
555  constr->has_not_null = true;
556 
557  if (attp->atthasdef)
558  {
559  if (attrdef == NULL)
560  attrdef = (AttrDefault *)
562  relation->rd_rel->relnatts *
563  sizeof(AttrDefault));
564  attrdef[ndef].adnum = attp->attnum;
565  attrdef[ndef].adbin = NULL;
566  ndef++;
567  }
568  need--;
569  if (need == 0)
570  break;
571  }
572 
573  /*
574  * end the scan and close the attribute relation
575  */
576  systable_endscan(pg_attribute_scan);
577  heap_close(pg_attribute_desc, AccessShareLock);
578 
579  if (need != 0)
580  elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
581  need, RelationGetRelid(relation));
582 
583  /*
584  * The attcacheoff values we read from pg_attribute should all be -1
585  * ("unknown"). Verify this if assert checking is on. They will be
586  * computed when and if needed during tuple access.
587  */
588 #ifdef USE_ASSERT_CHECKING
589  {
590  int i;
591 
592  for (i = 0; i < relation->rd_rel->relnatts; i++)
593  Assert(relation->rd_att->attrs[i]->attcacheoff == -1);
594  }
595 #endif
596 
597  /*
598  * However, we can easily set the attcacheoff value for the first
599  * attribute: it must be zero. This eliminates the need for special cases
600  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
601  */
602  if (relation->rd_rel->relnatts > 0)
603  relation->rd_att->attrs[0]->attcacheoff = 0;
604 
605  /*
606  * Set up constraint/default info
607  */
608  if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
609  {
610  relation->rd_att->constr = constr;
611 
612  if (ndef > 0) /* DEFAULTs */
613  {
614  if (ndef < relation->rd_rel->relnatts)
615  constr->defval = (AttrDefault *)
616  repalloc(attrdef, ndef * sizeof(AttrDefault));
617  else
618  constr->defval = attrdef;
619  constr->num_defval = ndef;
620  AttrDefaultFetch(relation);
621  }
622  else
623  constr->num_defval = 0;
624 
625  if (relation->rd_rel->relchecks > 0) /* CHECKs */
626  {
627  constr->num_check = relation->rd_rel->relchecks;
628  constr->check = (ConstrCheck *)
630  constr->num_check * sizeof(ConstrCheck));
631  CheckConstraintFetch(relation);
632  }
633  else
634  constr->num_check = 0;
635  }
636  else
637  {
638  pfree(constr);
639  relation->rd_att->constr = NULL;
640  }
641 }
642 
643 /*
644  * RelationBuildRuleLock
645  *
646  * Form the relation's rewrite rules from information in
647  * the pg_rewrite system catalog.
648  *
649  * Note: The rule parsetrees are potentially very complex node structures.
650  * To allow these trees to be freed when the relcache entry is flushed,
651  * we make a private memory context to hold the RuleLock information for
652  * each relcache entry that has associated rules. The context is used
653  * just for rule info, not for any other subsidiary data of the relcache
654  * entry, because that keeps the update logic in RelationClearRelation()
655  * manageable. The other subsidiary data structures are simple enough
656  * to be easy to free explicitly, anyway.
657  */
658 static void
660 {
661  MemoryContext rulescxt;
662  MemoryContext oldcxt;
663  HeapTuple rewrite_tuple;
664  Relation rewrite_desc;
665  TupleDesc rewrite_tupdesc;
666  SysScanDesc rewrite_scan;
667  ScanKeyData key;
668  RuleLock *rulelock;
669  int numlocks;
670  RewriteRule **rules;
671  int maxlocks;
672 
673  /*
674  * Make the private context. Assume it'll not contain much data.
675  */
677  RelationGetRelationName(relation),
679  relation->rd_rulescxt = rulescxt;
680 
681  /*
682  * allocate an array to hold the rewrite rules (the array is extended if
683  * necessary)
684  */
685  maxlocks = 4;
686  rules = (RewriteRule **)
687  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
688  numlocks = 0;
689 
690  /*
691  * form a scan key
692  */
693  ScanKeyInit(&key,
695  BTEqualStrategyNumber, F_OIDEQ,
697 
698  /*
699  * open pg_rewrite and begin a scan
700  *
701  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
702  * be reading the rules in name order, except possibly during
703  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
704  * ensures that rules will be fired in name order.
705  */
707  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
708  rewrite_scan = systable_beginscan(rewrite_desc,
710  true, NULL,
711  1, &key);
712 
713  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
714  {
715  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
716  bool isnull;
717  Datum rule_datum;
718  char *rule_str;
719  RewriteRule *rule;
720 
721  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
722  sizeof(RewriteRule));
723 
724  rule->ruleId = HeapTupleGetOid(rewrite_tuple);
725 
726  rule->event = rewrite_form->ev_type - '0';
727  rule->enabled = rewrite_form->ev_enabled;
728  rule->isInstead = rewrite_form->is_instead;
729 
730  /*
731  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
732  * rule strings are often large enough to be toasted. To avoid
733  * leaking memory in the caller's context, do the detoasting here so
734  * we can free the detoasted version.
735  */
736  rule_datum = heap_getattr(rewrite_tuple,
738  rewrite_tupdesc,
739  &isnull);
740  Assert(!isnull);
741  rule_str = TextDatumGetCString(rule_datum);
742  oldcxt = MemoryContextSwitchTo(rulescxt);
743  rule->actions = (List *) stringToNode(rule_str);
744  MemoryContextSwitchTo(oldcxt);
745  pfree(rule_str);
746 
747  rule_datum = heap_getattr(rewrite_tuple,
749  rewrite_tupdesc,
750  &isnull);
751  Assert(!isnull);
752  rule_str = TextDatumGetCString(rule_datum);
753  oldcxt = MemoryContextSwitchTo(rulescxt);
754  rule->qual = (Node *) stringToNode(rule_str);
755  MemoryContextSwitchTo(oldcxt);
756  pfree(rule_str);
757 
758  /*
759  * We want the rule's table references to be checked as though by the
760  * table owner, not the user referencing the rule. Therefore, scan
761  * through the rule's actions and set the checkAsUser field on all
762  * rtable entries. We have to look at the qual as well, in case it
763  * contains sublinks.
764  *
765  * The reason for doing this when the rule is loaded, rather than when
766  * it is stored, is that otherwise ALTER TABLE OWNER would have to
767  * grovel through stored rules to update checkAsUser fields. Scanning
768  * the rule tree during load is relatively cheap (compared to
769  * constructing it in the first place), so we do it here.
770  */
771  setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
772  setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
773 
774  if (numlocks >= maxlocks)
775  {
776  maxlocks *= 2;
777  rules = (RewriteRule **)
778  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
779  }
780  rules[numlocks++] = rule;
781  }
782 
783  /*
784  * end the scan and close the attribute relation
785  */
786  systable_endscan(rewrite_scan);
787  heap_close(rewrite_desc, AccessShareLock);
788 
789  /*
790  * there might not be any rules (if relhasrules is out-of-date)
791  */
792  if (numlocks == 0)
793  {
794  relation->rd_rules = NULL;
795  relation->rd_rulescxt = NULL;
796  MemoryContextDelete(rulescxt);
797  return;
798  }
799 
800  /*
801  * form a RuleLock and insert into relation
802  */
803  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
804  rulelock->numLocks = numlocks;
805  rulelock->rules = rules;
806 
807  relation->rd_rules = rulelock;
808 }
809 
810 /*
811  * RelationBuildPartitionKey
812  * Build and attach to relcache partition key data of relation
813  *
814  * Partitioning key data is stored in CacheMemoryContext to ensure it survives
815  * as long as the relcache. To avoid leaking memory in that context in case
816  * of an error partway through this function, we build the structure in the
817  * working context (which must be short-lived) and copy the completed
818  * structure into the cache memory.
819  *
820  * Also, since the structure being created here is sufficiently complex, we
821  * make a private child context of CacheMemoryContext for each relation that
822  * has associated partition key information. That means no complicated logic
823  * to free individual elements whenever the relcache entry is flushed - just
824  * delete the context.
825  */
826 static void
828 {
830  HeapTuple tuple;
831  bool isnull;
832  int i;
833  PartitionKey key;
834  AttrNumber *attrs;
835  oidvector *opclass;
836  oidvector *collation;
837  ListCell *partexprs_item;
838  Datum datum;
839  MemoryContext partkeycxt,
840  oldcxt;
841 
842  tuple = SearchSysCache1(PARTRELID,
844 
845  /*
846  * The following happens when we have created our pg_class entry but not
847  * the pg_partitioned_table entry yet.
848  */
849  if (!HeapTupleIsValid(tuple))
850  return;
851 
852  key = (PartitionKey) palloc0(sizeof(PartitionKeyData));
853 
854  /* Fixed-length attributes */
855  form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
856  key->strategy = form->partstrat;
857  key->partnatts = form->partnatts;
858 
859  /*
860  * We can rely on the first variable-length attribute being mapped to the
861  * relevant field of the catalog's C struct, because all previous
862  * attributes are non-nullable and fixed-length.
863  */
864  attrs = form->partattrs.values;
865 
866  /* But use the hard way to retrieve further variable-length attributes */
867  /* Operator class */
868  datum = SysCacheGetAttr(PARTRELID, tuple,
870  Assert(!isnull);
871  opclass = (oidvector *) DatumGetPointer(datum);
872 
873  /* Collation */
874  datum = SysCacheGetAttr(PARTRELID, tuple,
876  Assert(!isnull);
877  collation = (oidvector *) DatumGetPointer(datum);
878 
879  /* Expressions */
880  datum = SysCacheGetAttr(PARTRELID, tuple,
882  if (!isnull)
883  {
884  char *exprString;
885  Node *expr;
886 
887  exprString = TextDatumGetCString(datum);
888  expr = stringToNode(exprString);
889  pfree(exprString);
890 
891  /*
892  * Run the expressions through const-simplification since the planner
893  * will be comparing them to similarly-processed qual clause operands,
894  * and may fail to detect valid matches without this step. We don't
895  * need to bother with canonicalize_qual() though, because partition
896  * expressions are not full-fledged qualification clauses.
897  */
898  expr = eval_const_expressions(NULL, (Node *) expr);
899 
900  /* May as well fix opfuncids too */
901  fix_opfuncids((Node *) expr);
902  key->partexprs = (List *) expr;
903  }
904 
905  key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
906  key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
907  key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
908  key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
909 
910  key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
911 
912  /* Gather type and collation info as well */
913  key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
914  key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
915  key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
916  key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
917  key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
918  key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
919 
920  /* Copy partattrs and fill other per-attribute info */
921  memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
922  partexprs_item = list_head(key->partexprs);
923  for (i = 0; i < key->partnatts; i++)
924  {
925  AttrNumber attno = key->partattrs[i];
926  HeapTuple opclasstup;
927  Form_pg_opclass opclassform;
928  Oid funcid;
929 
930  /* Collect opfamily information */
931  opclasstup = SearchSysCache1(CLAOID,
932  ObjectIdGetDatum(opclass->values[i]));
933  if (!HeapTupleIsValid(opclasstup))
934  elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
935 
936  opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
937  key->partopfamily[i] = opclassform->opcfamily;
938  key->partopcintype[i] = opclassform->opcintype;
939 
940  /*
941  * A btree support function covers the cases of list and range methods
942  * currently supported.
943  */
944  funcid = get_opfamily_proc(opclassform->opcfamily,
945  opclassform->opcintype,
946  opclassform->opcintype,
947  BTORDER_PROC);
948 
949  fmgr_info(funcid, &key->partsupfunc[i]);
950 
951  /* Collation */
952  key->partcollation[i] = collation->values[i];
953 
954  /* Collect type information */
955  if (attno != 0)
956  {
957  key->parttypid[i] = relation->rd_att->attrs[attno - 1]->atttypid;
958  key->parttypmod[i] = relation->rd_att->attrs[attno - 1]->atttypmod;
959  key->parttypcoll[i] = relation->rd_att->attrs[attno - 1]->attcollation;
960  }
961  else
962  {
963  key->parttypid[i] = exprType(lfirst(partexprs_item));
964  key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
965  key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
966  }
968  &key->parttyplen[i],
969  &key->parttypbyval[i],
970  &key->parttypalign[i]);
971 
972  ReleaseSysCache(opclasstup);
973  }
974 
975  ReleaseSysCache(tuple);
976 
977  /* Success --- now copy to the cache memory */
979  RelationGetRelationName(relation),
981  relation->rd_partkeycxt = partkeycxt;
982  oldcxt = MemoryContextSwitchTo(relation->rd_partkeycxt);
983  relation->rd_partkey = copy_partition_key(key);
984  MemoryContextSwitchTo(oldcxt);
985 }
986 
987 /*
988  * copy_partition_key
989  *
990  * The copy is allocated in the current memory context.
991  */
992 static PartitionKey
994 {
995  PartitionKey newkey;
996  int n;
997 
998  newkey = (PartitionKey) palloc(sizeof(PartitionKeyData));
999 
1000  newkey->strategy = fromkey->strategy;
1001  newkey->partnatts = n = fromkey->partnatts;
1002 
1003  newkey->partattrs = (AttrNumber *) palloc(n * sizeof(AttrNumber));
1004  memcpy(newkey->partattrs, fromkey->partattrs, n * sizeof(AttrNumber));
1005 
1006  newkey->partexprs = copyObject(fromkey->partexprs);
1007 
1008  newkey->partopfamily = (Oid *) palloc(n * sizeof(Oid));
1009  memcpy(newkey->partopfamily, fromkey->partopfamily, n * sizeof(Oid));
1010 
1011  newkey->partopcintype = (Oid *) palloc(n * sizeof(Oid));
1012  memcpy(newkey->partopcintype, fromkey->partopcintype, n * sizeof(Oid));
1013 
1014  newkey->partsupfunc = (FmgrInfo *) palloc(n * sizeof(FmgrInfo));
1015  memcpy(newkey->partsupfunc, fromkey->partsupfunc, n * sizeof(FmgrInfo));
1016 
1017  newkey->partcollation = (Oid *) palloc(n * sizeof(Oid));
1018  memcpy(newkey->partcollation, fromkey->partcollation, n * sizeof(Oid));
1019 
1020  newkey->parttypid = (Oid *) palloc(n * sizeof(Oid));
1021  memcpy(newkey->parttypid, fromkey->parttypid, n * sizeof(Oid));
1022 
1023  newkey->parttypmod = (int32 *) palloc(n * sizeof(int32));
1024  memcpy(newkey->parttypmod, fromkey->parttypmod, n * sizeof(int32));
1025 
1026  newkey->parttyplen = (int16 *) palloc(n * sizeof(int16));
1027  memcpy(newkey->parttyplen, fromkey->parttyplen, n * sizeof(int16));
1028 
1029  newkey->parttypbyval = (bool *) palloc(n * sizeof(bool));
1030  memcpy(newkey->parttypbyval, fromkey->parttypbyval, n * sizeof(bool));
1031 
1032  newkey->parttypalign = (char *) palloc(n * sizeof(bool));
1033  memcpy(newkey->parttypalign, fromkey->parttypalign, n * sizeof(char));
1034 
1035  newkey->parttypcoll = (Oid *) palloc(n * sizeof(Oid));
1036  memcpy(newkey->parttypcoll, fromkey->parttypcoll, n * sizeof(Oid));
1037 
1038  return newkey;
1039 }
1040 
1041 /*
1042  * equalRuleLocks
1043  *
1044  * Determine whether two RuleLocks are equivalent
1045  *
1046  * Probably this should be in the rules code someplace...
1047  */
1048 static bool
1050 {
1051  int i;
1052 
1053  /*
1054  * As of 7.3 we assume the rule ordering is repeatable, because
1055  * RelationBuildRuleLock should read 'em in a consistent order. So just
1056  * compare corresponding slots.
1057  */
1058  if (rlock1 != NULL)
1059  {
1060  if (rlock2 == NULL)
1061  return false;
1062  if (rlock1->numLocks != rlock2->numLocks)
1063  return false;
1064  for (i = 0; i < rlock1->numLocks; i++)
1065  {
1066  RewriteRule *rule1 = rlock1->rules[i];
1067  RewriteRule *rule2 = rlock2->rules[i];
1068 
1069  if (rule1->ruleId != rule2->ruleId)
1070  return false;
1071  if (rule1->event != rule2->event)
1072  return false;
1073  if (rule1->enabled != rule2->enabled)
1074  return false;
1075  if (rule1->isInstead != rule2->isInstead)
1076  return false;
1077  if (!equal(rule1->qual, rule2->qual))
1078  return false;
1079  if (!equal(rule1->actions, rule2->actions))
1080  return false;
1081  }
1082  }
1083  else if (rlock2 != NULL)
1084  return false;
1085  return true;
1086 }
1087 
1088 /*
1089  * equalPolicy
1090  *
1091  * Determine whether two policies are equivalent
1092  */
1093 static bool
1095 {
1096  int i;
1097  Oid *r1,
1098  *r2;
1099 
1100  if (policy1 != NULL)
1101  {
1102  if (policy2 == NULL)
1103  return false;
1104 
1105  if (policy1->polcmd != policy2->polcmd)
1106  return false;
1107  if (policy1->hassublinks != policy2->hassublinks)
1108  return false;
1109  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
1110  return false;
1111  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
1112  return false;
1113 
1114  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
1115  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
1116 
1117  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
1118  {
1119  if (r1[i] != r2[i])
1120  return false;
1121  }
1122 
1123  if (!equal(policy1->qual, policy2->qual))
1124  return false;
1125  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
1126  return false;
1127  }
1128  else if (policy2 != NULL)
1129  return false;
1130 
1131  return true;
1132 }
1133 
1134 /*
1135  * equalRSDesc
1136  *
1137  * Determine whether two RowSecurityDesc's are equivalent
1138  */
1139 static bool
1141 {
1142  ListCell *lc,
1143  *rc;
1144 
1145  if (rsdesc1 == NULL && rsdesc2 == NULL)
1146  return true;
1147 
1148  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
1149  (rsdesc1 == NULL && rsdesc2 != NULL))
1150  return false;
1151 
1152  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
1153  return false;
1154 
1155  /* RelationBuildRowSecurity should build policies in order */
1156  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1157  {
1160 
1161  if (!equalPolicy(l, r))
1162  return false;
1163  }
1164 
1165  return true;
1166 }
1167 
1168 /*
1169  * equalPartitionDescs
1170  * Compare two partition descriptors for logical equality
1171  */
1172 static bool
1174  PartitionDesc partdesc2)
1175 {
1176  int i;
1177 
1178  if (partdesc1 != NULL)
1179  {
1180  if (partdesc2 == NULL)
1181  return false;
1182  if (partdesc1->nparts != partdesc2->nparts)
1183  return false;
1184 
1185  Assert(key != NULL || partdesc1->nparts == 0);
1186 
1187  /*
1188  * Same oids? If the partitioning structure did not change, that is,
1189  * no partitions were added or removed to the relation, the oids array
1190  * should still match element-by-element.
1191  */
1192  for (i = 0; i < partdesc1->nparts; i++)
1193  {
1194  if (partdesc1->oids[i] != partdesc2->oids[i])
1195  return false;
1196  }
1197 
1198  /*
1199  * Now compare partition bound collections. The logic to iterate over
1200  * the collections is private to partition.c.
1201  */
1202  if (partdesc1->boundinfo != NULL)
1203  {
1204  if (partdesc2->boundinfo == NULL)
1205  return false;
1206 
1207  if (!partition_bounds_equal(key, partdesc1->boundinfo,
1208  partdesc2->boundinfo))
1209  return false;
1210  }
1211  else if (partdesc2->boundinfo != NULL)
1212  return false;
1213  }
1214  else if (partdesc2 != NULL)
1215  return false;
1216 
1217  return true;
1218 }
1219 
1220 /*
1221  * RelationBuildDesc
1222  *
1223  * Build a relation descriptor. The caller must hold at least
1224  * AccessShareLock on the target relid.
1225  *
1226  * The new descriptor is inserted into the hash table if insertIt is true.
1227  *
1228  * Returns NULL if no pg_class row could be found for the given relid
1229  * (suggesting we are trying to access a just-deleted relation).
1230  * Any other error is reported via elog.
1231  */
1232 static Relation
1233 RelationBuildDesc(Oid targetRelId, bool insertIt)
1234 {
1235  Relation relation;
1236  Oid relid;
1237  HeapTuple pg_class_tuple;
1238  Form_pg_class relp;
1239 
1240  /*
1241  * find the tuple in pg_class corresponding to the given relation id
1242  */
1243  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1244 
1245  /*
1246  * if no such tuple exists, return NULL
1247  */
1248  if (!HeapTupleIsValid(pg_class_tuple))
1249  return NULL;
1250 
1251  /*
1252  * get information from the pg_class_tuple
1253  */
1254  relid = HeapTupleGetOid(pg_class_tuple);
1255  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1256  Assert(relid == targetRelId);
1257 
1258  /*
1259  * allocate storage for the relation descriptor, and copy pg_class_tuple
1260  * to relation->rd_rel.
1261  */
1262  relation = AllocateRelationDesc(relp);
1263 
1264  /*
1265  * initialize the relation's relation id (relation->rd_id)
1266  */
1267  RelationGetRelid(relation) = relid;
1268 
1269  /*
1270  * normal relations are not nailed into the cache; nor can a pre-existing
1271  * relation be new. It could be temp though. (Actually, it could be new
1272  * too, but it's okay to forget that fact if forced to flush the entry.)
1273  */
1274  relation->rd_refcnt = 0;
1275  relation->rd_isnailed = false;
1278  switch (relation->rd_rel->relpersistence)
1279  {
1282  relation->rd_backend = InvalidBackendId;
1283  relation->rd_islocaltemp = false;
1284  break;
1285  case RELPERSISTENCE_TEMP:
1286  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1287  {
1288  relation->rd_backend = BackendIdForTempRelations();
1289  relation->rd_islocaltemp = true;
1290  }
1291  else
1292  {
1293  /*
1294  * If it's a temp table, but not one of ours, we have to use
1295  * the slow, grotty method to figure out the owning backend.
1296  *
1297  * Note: it's possible that rd_backend gets set to MyBackendId
1298  * here, in case we are looking at a pg_class entry left over
1299  * from a crashed backend that coincidentally had the same
1300  * BackendId we're using. We should *not* consider such a
1301  * table to be "ours"; this is why we need the separate
1302  * rd_islocaltemp flag. The pg_class entry will get flushed
1303  * if/when we clean out the corresponding temp table namespace
1304  * in preparation for using it.
1305  */
1306  relation->rd_backend =
1307  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1308  Assert(relation->rd_backend != InvalidBackendId);
1309  relation->rd_islocaltemp = false;
1310  }
1311  break;
1312  default:
1313  elog(ERROR, "invalid relpersistence: %c",
1314  relation->rd_rel->relpersistence);
1315  break;
1316  }
1317 
1318  /*
1319  * initialize the tuple descriptor (relation->rd_att).
1320  */
1321  RelationBuildTupleDesc(relation);
1322 
1323  /*
1324  * Fetch rules and triggers that affect this relation
1325  */
1326  if (relation->rd_rel->relhasrules)
1327  RelationBuildRuleLock(relation);
1328  else
1329  {
1330  relation->rd_rules = NULL;
1331  relation->rd_rulescxt = NULL;
1332  }
1333 
1334  if (relation->rd_rel->relhastriggers)
1335  RelationBuildTriggers(relation);
1336  else
1337  relation->trigdesc = NULL;
1338 
1339  if (relation->rd_rel->relrowsecurity)
1340  RelationBuildRowSecurity(relation);
1341  else
1342  relation->rd_rsdesc = NULL;
1343 
1344  /* foreign key data is not loaded till asked for */
1345  relation->rd_fkeylist = NIL;
1346  relation->rd_fkeyvalid = false;
1347 
1348  /* if a partitioned table, initialize key and partition descriptor info */
1349  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1350  {
1351  RelationBuildPartitionKey(relation);
1352  RelationBuildPartitionDesc(relation);
1353  }
1354  else
1355  {
1356  relation->rd_partkeycxt = NULL;
1357  relation->rd_partkey = NULL;
1358  relation->rd_partdesc = NULL;
1359  relation->rd_pdcxt = NULL;
1360  }
1361 
1362  /*
1363  * if it's an index, initialize index-related information
1364  */
1365  if (OidIsValid(relation->rd_rel->relam))
1366  RelationInitIndexAccessInfo(relation);
1367 
1368  /* extract reloptions if any */
1369  RelationParseRelOptions(relation, pg_class_tuple);
1370 
1371  /*
1372  * initialize the relation lock manager information
1373  */
1374  RelationInitLockInfo(relation); /* see lmgr.c */
1375 
1376  /*
1377  * initialize physical addressing information for the relation
1378  */
1379  RelationInitPhysicalAddr(relation);
1380 
1381  /* make sure relation is marked as having no open file yet */
1382  relation->rd_smgr = NULL;
1383 
1384  /*
1385  * now we can free the memory allocated for pg_class_tuple
1386  */
1387  heap_freetuple(pg_class_tuple);
1388 
1389  /*
1390  * Insert newly created relation into relcache hash table, if requested.
1391  *
1392  * There is one scenario in which we might find a hashtable entry already
1393  * present, even though our caller failed to find it: if the relation is a
1394  * system catalog or index that's used during relcache load, we might have
1395  * recursively created the same relcache entry during the preceding steps.
1396  * So allow RelationCacheInsert to delete any already-present relcache
1397  * entry for the same OID. The already-present entry should have refcount
1398  * zero (else somebody forgot to close it); in the event that it doesn't,
1399  * we'll elog a WARNING and leak the already-present entry.
1400  */
1401  if (insertIt)
1402  RelationCacheInsert(relation, true);
1403 
1404  /* It's fully valid */
1405  relation->rd_isvalid = true;
1406 
1407  return relation;
1408 }
1409 
1410 /*
1411  * Initialize the physical addressing info (RelFileNode) for a relcache entry
1412  *
1413  * Note: at the physical level, relations in the pg_global tablespace must
1414  * be treated as shared, even if relisshared isn't set. Hence we do not
1415  * look at relisshared here.
1416  */
1417 static void
1419 {
1420  if (relation->rd_rel->reltablespace)
1421  relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1422  else
1423  relation->rd_node.spcNode = MyDatabaseTableSpace;
1424  if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1425  relation->rd_node.dbNode = InvalidOid;
1426  else
1427  relation->rd_node.dbNode = MyDatabaseId;
1428 
1429  if (relation->rd_rel->relfilenode)
1430  {
1431  /*
1432  * Even if we are using a decoding snapshot that doesn't represent the
1433  * current state of the catalog we need to make sure the filenode
1434  * points to the current file since the older file will be gone (or
1435  * truncated). The new file will still contain older rows so lookups
1436  * in them will work correctly. This wouldn't work correctly if
1437  * rewrites were allowed to change the schema in an incompatible way,
1438  * but those are prevented both on catalog tables and on user tables
1439  * declared as additional catalog tables.
1440  */
1443  && IsTransactionState())
1444  {
1445  HeapTuple phys_tuple;
1446  Form_pg_class physrel;
1447 
1448  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1449  RelationGetRelid(relation) != ClassOidIndexId,
1450  true);
1451  if (!HeapTupleIsValid(phys_tuple))
1452  elog(ERROR, "could not find pg_class entry for %u",
1453  RelationGetRelid(relation));
1454  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1455 
1456  relation->rd_rel->reltablespace = physrel->reltablespace;
1457  relation->rd_rel->relfilenode = physrel->relfilenode;
1458  heap_freetuple(phys_tuple);
1459  }
1460 
1461  relation->rd_node.relNode = relation->rd_rel->relfilenode;
1462  }
1463  else
1464  {
1465  /* Consult the relation mapper */
1466  relation->rd_node.relNode =
1467  RelationMapOidToFilenode(relation->rd_id,
1468  relation->rd_rel->relisshared);
1469  if (!OidIsValid(relation->rd_node.relNode))
1470  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1471  RelationGetRelationName(relation), relation->rd_id);
1472  }
1473 }
1474 
1475 /*
1476  * Fill in the IndexAmRoutine for an index relation.
1477  *
1478  * relation's rd_amhandler and rd_indexcxt must be valid already.
1479  */
1480 static void
1482 {
1483  IndexAmRoutine *cached,
1484  *tmp;
1485 
1486  /*
1487  * Call the amhandler in current, short-lived memory context, just in case
1488  * it leaks anything (it probably won't, but let's be paranoid).
1489  */
1490  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1491 
1492  /* OK, now transfer the data into relation's rd_indexcxt. */
1493  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1494  sizeof(IndexAmRoutine));
1495  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1496  relation->rd_amroutine = cached;
1497 
1498  pfree(tmp);
1499 }
1500 
1501 /*
1502  * Initialize index-access-method support data for an index relation
1503  */
1504 void
1506 {
1507  HeapTuple tuple;
1508  Form_pg_am aform;
1509  Datum indcollDatum;
1510  Datum indclassDatum;
1511  Datum indoptionDatum;
1512  bool isnull;
1513  oidvector *indcoll;
1514  oidvector *indclass;
1515  int2vector *indoption;
1516  MemoryContext indexcxt;
1517  MemoryContext oldcontext;
1518  int natts;
1519  uint16 amsupport;
1520 
1521  /*
1522  * Make a copy of the pg_index entry for the index. Since pg_index
1523  * contains variable-length and possibly-null fields, we have to do this
1524  * honestly rather than just treating it as a Form_pg_index struct.
1525  */
1526  tuple = SearchSysCache1(INDEXRELID,
1527  ObjectIdGetDatum(RelationGetRelid(relation)));
1528  if (!HeapTupleIsValid(tuple))
1529  elog(ERROR, "cache lookup failed for index %u",
1530  RelationGetRelid(relation));
1532  relation->rd_indextuple = heap_copytuple(tuple);
1533  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1534  MemoryContextSwitchTo(oldcontext);
1535  ReleaseSysCache(tuple);
1536 
1537  /*
1538  * Look up the index's access method, save the OID of its handler function
1539  */
1540  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1541  if (!HeapTupleIsValid(tuple))
1542  elog(ERROR, "cache lookup failed for access method %u",
1543  relation->rd_rel->relam);
1544  aform = (Form_pg_am) GETSTRUCT(tuple);
1545  relation->rd_amhandler = aform->amhandler;
1546  ReleaseSysCache(tuple);
1547 
1548  natts = relation->rd_rel->relnatts;
1549  if (natts != relation->rd_index->indnatts)
1550  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1551  RelationGetRelid(relation));
1552 
1553  /*
1554  * Make the private context to hold index access info. The reason we need
1555  * a context, and not just a couple of pallocs, is so that we won't leak
1556  * any subsidiary info attached to fmgr lookup records.
1557  */
1559  RelationGetRelationName(relation),
1561  relation->rd_indexcxt = indexcxt;
1562 
1563  /*
1564  * Now we can fetch the index AM's API struct
1565  */
1566  InitIndexAmRoutine(relation);
1567 
1568  /*
1569  * Allocate arrays to hold data
1570  */
1571  relation->rd_opfamily = (Oid *)
1572  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1573  relation->rd_opcintype = (Oid *)
1574  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1575 
1576  amsupport = relation->rd_amroutine->amsupport;
1577  if (amsupport > 0)
1578  {
1579  int nsupport = natts * amsupport;
1580 
1581  relation->rd_support = (RegProcedure *)
1582  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1583  relation->rd_supportinfo = (FmgrInfo *)
1584  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1585  }
1586  else
1587  {
1588  relation->rd_support = NULL;
1589  relation->rd_supportinfo = NULL;
1590  }
1591 
1592  relation->rd_indcollation = (Oid *)
1593  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1594 
1595  relation->rd_indoption = (int16 *)
1596  MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1597 
1598  /*
1599  * indcollation cannot be referenced directly through the C struct,
1600  * because it comes after the variable-width indkey field. Must extract
1601  * the datum the hard way...
1602  */
1603  indcollDatum = fastgetattr(relation->rd_indextuple,
1606  &isnull);
1607  Assert(!isnull);
1608  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1609  memcpy(relation->rd_indcollation, indcoll->values, natts * sizeof(Oid));
1610 
1611  /*
1612  * indclass cannot be referenced directly through the C struct, because it
1613  * comes after the variable-width indkey field. Must extract the datum
1614  * the hard way...
1615  */
1616  indclassDatum = fastgetattr(relation->rd_indextuple,
1619  &isnull);
1620  Assert(!isnull);
1621  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1622 
1623  /*
1624  * Fill the support procedure OID array, as well as the info about
1625  * opfamilies and opclass input types. (aminfo and supportinfo are left
1626  * as zeroes, and are filled on-the-fly when used)
1627  */
1628  IndexSupportInitialize(indclass, relation->rd_support,
1629  relation->rd_opfamily, relation->rd_opcintype,
1630  amsupport, natts);
1631 
1632  /*
1633  * Similarly extract indoption and copy it to the cache entry
1634  */
1635  indoptionDatum = fastgetattr(relation->rd_indextuple,
1638  &isnull);
1639  Assert(!isnull);
1640  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1641  memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1642 
1643  /*
1644  * expressions, predicate, exclusion caches will be filled later
1645  */
1646  relation->rd_indexprs = NIL;
1647  relation->rd_indpred = NIL;
1648  relation->rd_exclops = NULL;
1649  relation->rd_exclprocs = NULL;
1650  relation->rd_exclstrats = NULL;
1651  relation->rd_amcache = NULL;
1652 }
1653 
1654 /*
1655  * IndexSupportInitialize
1656  * Initializes an index's cached opclass information,
1657  * given the index's pg_index.indclass entry.
1658  *
1659  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1660  * which are arrays allocated by the caller.
1661  *
1662  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1663  * indicate the size of the arrays it has allocated --- but in practice these
1664  * numbers must always match those obtainable from the system catalog entries
1665  * for the index and access method.
1666  */
1667 static void
1669  RegProcedure *indexSupport,
1670  Oid *opFamily,
1671  Oid *opcInType,
1672  StrategyNumber maxSupportNumber,
1673  AttrNumber maxAttributeNumber)
1674 {
1675  int attIndex;
1676 
1677  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1678  {
1679  OpClassCacheEnt *opcentry;
1680 
1681  if (!OidIsValid(indclass->values[attIndex]))
1682  elog(ERROR, "bogus pg_index tuple");
1683 
1684  /* look up the info for this opclass, using a cache */
1685  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1686  maxSupportNumber);
1687 
1688  /* copy cached data into relcache entry */
1689  opFamily[attIndex] = opcentry->opcfamily;
1690  opcInType[attIndex] = opcentry->opcintype;
1691  if (maxSupportNumber > 0)
1692  memcpy(&indexSupport[attIndex * maxSupportNumber],
1693  opcentry->supportProcs,
1694  maxSupportNumber * sizeof(RegProcedure));
1695  }
1696 }
1697 
1698 /*
1699  * LookupOpclassInfo
1700  *
1701  * This routine maintains a per-opclass cache of the information needed
1702  * by IndexSupportInitialize(). This is more efficient than relying on
1703  * the catalog cache, because we can load all the info about a particular
1704  * opclass in a single indexscan of pg_amproc.
1705  *
1706  * The information from pg_am about expected range of support function
1707  * numbers is passed in, rather than being looked up, mainly because the
1708  * caller will have it already.
1709  *
1710  * Note there is no provision for flushing the cache. This is OK at the
1711  * moment because there is no way to ALTER any interesting properties of an
1712  * existing opclass --- all you can do is drop it, which will result in
1713  * a useless but harmless dead entry in the cache. To support altering
1714  * opclass membership (not the same as opfamily membership!), we'd need to
1715  * be able to flush this cache as well as the contents of relcache entries
1716  * for indexes.
1717  */
1718 static OpClassCacheEnt *
1719 LookupOpclassInfo(Oid operatorClassOid,
1720  StrategyNumber numSupport)
1721 {
1722  OpClassCacheEnt *opcentry;
1723  bool found;
1724  Relation rel;
1725  SysScanDesc scan;
1726  ScanKeyData skey[3];
1727  HeapTuple htup;
1728  bool indexOK;
1729 
1730  if (OpClassCache == NULL)
1731  {
1732  /* First time through: initialize the opclass cache */
1733  HASHCTL ctl;
1734 
1735  MemSet(&ctl, 0, sizeof(ctl));
1736  ctl.keysize = sizeof(Oid);
1737  ctl.entrysize = sizeof(OpClassCacheEnt);
1738  OpClassCache = hash_create("Operator class cache", 64,
1739  &ctl, HASH_ELEM | HASH_BLOBS);
1740 
1741  /* Also make sure CacheMemoryContext exists */
1742  if (!CacheMemoryContext)
1744  }
1745 
1746  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1747  (void *) &operatorClassOid,
1748  HASH_ENTER, &found);
1749 
1750  if (!found)
1751  {
1752  /* Need to allocate memory for new entry */
1753  opcentry->valid = false; /* until known OK */
1754  opcentry->numSupport = numSupport;
1755 
1756  if (numSupport > 0)
1757  opcentry->supportProcs = (RegProcedure *)
1759  numSupport * sizeof(RegProcedure));
1760  else
1761  opcentry->supportProcs = NULL;
1762  }
1763  else
1764  {
1765  Assert(numSupport == opcentry->numSupport);
1766  }
1767 
1768  /*
1769  * When testing for cache-flush hazards, we intentionally disable the
1770  * operator class cache and force reloading of the info on each call. This
1771  * is helpful because we want to test the case where a cache flush occurs
1772  * while we are loading the info, and it's very hard to provoke that if
1773  * this happens only once per opclass per backend.
1774  */
1775 #if defined(CLOBBER_CACHE_ALWAYS)
1776  opcentry->valid = false;
1777 #endif
1778 
1779  if (opcentry->valid)
1780  return opcentry;
1781 
1782  /*
1783  * Need to fill in new entry.
1784  *
1785  * To avoid infinite recursion during startup, force heap scans if we're
1786  * looking up info for the opclasses used by the indexes we would like to
1787  * reference here.
1788  */
1789  indexOK = criticalRelcachesBuilt ||
1790  (operatorClassOid != OID_BTREE_OPS_OID &&
1791  operatorClassOid != INT2_BTREE_OPS_OID);
1792 
1793  /*
1794  * We have to fetch the pg_opclass row to determine its opfamily and
1795  * opcintype, which are needed to look up related operators and functions.
1796  * It'd be convenient to use the syscache here, but that probably doesn't
1797  * work while bootstrapping.
1798  */
1799  ScanKeyInit(&skey[0],
1801  BTEqualStrategyNumber, F_OIDEQ,
1802  ObjectIdGetDatum(operatorClassOid));
1804  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1805  NULL, 1, skey);
1806 
1807  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1808  {
1809  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1810 
1811  opcentry->opcfamily = opclassform->opcfamily;
1812  opcentry->opcintype = opclassform->opcintype;
1813  }
1814  else
1815  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1816 
1817  systable_endscan(scan);
1819 
1820  /*
1821  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1822  * the default ones (those with lefttype = righttype = opcintype).
1823  */
1824  if (numSupport > 0)
1825  {
1826  ScanKeyInit(&skey[0],
1828  BTEqualStrategyNumber, F_OIDEQ,
1829  ObjectIdGetDatum(opcentry->opcfamily));
1830  ScanKeyInit(&skey[1],
1832  BTEqualStrategyNumber, F_OIDEQ,
1833  ObjectIdGetDatum(opcentry->opcintype));
1834  ScanKeyInit(&skey[2],
1836  BTEqualStrategyNumber, F_OIDEQ,
1837  ObjectIdGetDatum(opcentry->opcintype));
1839  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1840  NULL, 3, skey);
1841 
1842  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1843  {
1844  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1845 
1846  if (amprocform->amprocnum <= 0 ||
1847  (StrategyNumber) amprocform->amprocnum > numSupport)
1848  elog(ERROR, "invalid amproc number %d for opclass %u",
1849  amprocform->amprocnum, operatorClassOid);
1850 
1851  opcentry->supportProcs[amprocform->amprocnum - 1] =
1852  amprocform->amproc;
1853  }
1854 
1855  systable_endscan(scan);
1857  }
1858 
1859  opcentry->valid = true;
1860  return opcentry;
1861 }
1862 
1863 
1864 /*
1865  * formrdesc
1866  *
1867  * This is a special cut-down version of RelationBuildDesc(),
1868  * used while initializing the relcache.
1869  * The relation descriptor is built just from the supplied parameters,
1870  * without actually looking at any system table entries. We cheat
1871  * quite a lot since we only need to work for a few basic system
1872  * catalogs.
1873  *
1874  * formrdesc is currently used for: pg_database, pg_authid, pg_auth_members,
1875  * pg_shseclabel, pg_class, pg_attribute, pg_proc, and pg_type
1876  * (see RelationCacheInitializePhase2/3).
1877  *
1878  * Note that these catalogs can't have constraints (except attnotnull),
1879  * default values, rules, or triggers, since we don't cope with any of that.
1880  * (Well, actually, this only matters for properties that need to be valid
1881  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1882  * these properties matter then...)
1883  *
1884  * NOTE: we assume we are already switched into CacheMemoryContext.
1885  */
1886 static void
1887 formrdesc(const char *relationName, Oid relationReltype,
1888  bool isshared, bool hasoids,
1889  int natts, const FormData_pg_attribute *attrs)
1890 {
1891  Relation relation;
1892  int i;
1893  bool has_not_null;
1894 
1895  /*
1896  * allocate new relation desc, clear all fields of reldesc
1897  */
1898  relation = (Relation) palloc0(sizeof(RelationData));
1899 
1900  /* make sure relation is marked as having no open file yet */
1901  relation->rd_smgr = NULL;
1902 
1903  /*
1904  * initialize reference count: 1 because it is nailed in cache
1905  */
1906  relation->rd_refcnt = 1;
1907 
1908  /*
1909  * all entries built with this routine are nailed-in-cache; none are for
1910  * new or temp relations.
1911  */
1912  relation->rd_isnailed = true;
1915  relation->rd_backend = InvalidBackendId;
1916  relation->rd_islocaltemp = false;
1917 
1918  /*
1919  * initialize relation tuple form
1920  *
1921  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1922  * get us launched. RelationCacheInitializePhase3() will read the real
1923  * data from pg_class and replace what we've done here. Note in
1924  * particular that relowner is left as zero; this cues
1925  * RelationCacheInitializePhase3 that the real data isn't there yet.
1926  */
1928 
1929  namestrcpy(&relation->rd_rel->relname, relationName);
1930  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1931  relation->rd_rel->reltype = relationReltype;
1932 
1933  /*
1934  * It's important to distinguish between shared and non-shared relations,
1935  * even at bootstrap time, to make sure we know where they are stored.
1936  */
1937  relation->rd_rel->relisshared = isshared;
1938  if (isshared)
1939  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1940 
1941  /* formrdesc is used only for permanent relations */
1942  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1943 
1944  /* ... and they're always populated, too */
1945  relation->rd_rel->relispopulated = true;
1946 
1947  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1948  relation->rd_rel->relpages = 0;
1949  relation->rd_rel->reltuples = 0;
1950  relation->rd_rel->relallvisible = 0;
1951  relation->rd_rel->relkind = RELKIND_RELATION;
1952  relation->rd_rel->relhasoids = hasoids;
1953  relation->rd_rel->relnatts = (int16) natts;
1954 
1955  /*
1956  * initialize attribute tuple form
1957  *
1958  * Unlike the case with the relation tuple, this data had better be right
1959  * because it will never be replaced. The data comes from
1960  * src/include/catalog/ headers via genbki.pl.
1961  */
1962  relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1963  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1964 
1965  relation->rd_att->tdtypeid = relationReltype;
1966  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1967 
1968  /*
1969  * initialize tuple desc info
1970  */
1971  has_not_null = false;
1972  for (i = 0; i < natts; i++)
1973  {
1974  memcpy(relation->rd_att->attrs[i],
1975  &attrs[i],
1977  has_not_null |= attrs[i].attnotnull;
1978  /* make sure attcacheoff is valid */
1979  relation->rd_att->attrs[i]->attcacheoff = -1;
1980  }
1981 
1982  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1983  relation->rd_att->attrs[0]->attcacheoff = 0;
1984 
1985  /* mark not-null status */
1986  if (has_not_null)
1987  {
1988  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1989 
1990  constr->has_not_null = true;
1991  relation->rd_att->constr = constr;
1992  }
1993 
1994  /*
1995  * initialize relation id from info in att array (my, this is ugly)
1996  */
1997  RelationGetRelid(relation) = relation->rd_att->attrs[0]->attrelid;
1998 
1999  /*
2000  * All relations made with formrdesc are mapped. This is necessarily so
2001  * because there is no other way to know what filenode they currently
2002  * have. In bootstrap mode, add them to the initial relation mapper data,
2003  * specifying that the initial filenode is the same as the OID.
2004  */
2005  relation->rd_rel->relfilenode = InvalidOid;
2008  RelationGetRelid(relation),
2009  isshared, true);
2010 
2011  /*
2012  * initialize the relation lock manager information
2013  */
2014  RelationInitLockInfo(relation); /* see lmgr.c */
2015 
2016  /*
2017  * initialize physical addressing information for the relation
2018  */
2019  RelationInitPhysicalAddr(relation);
2020 
2021  /*
2022  * initialize the rel-has-index flag, using hardwired knowledge
2023  */
2025  {
2026  /* In bootstrap mode, we have no indexes */
2027  relation->rd_rel->relhasindex = false;
2028  }
2029  else
2030  {
2031  /* Otherwise, all the rels formrdesc is used for have indexes */
2032  relation->rd_rel->relhasindex = true;
2033  }
2034 
2035  /*
2036  * add new reldesc to relcache
2037  */
2038  RelationCacheInsert(relation, false);
2039 
2040  /* It's fully valid */
2041  relation->rd_isvalid = true;
2042 }
2043 
2044 
2045 /* ----------------------------------------------------------------
2046  * Relation Descriptor Lookup Interface
2047  * ----------------------------------------------------------------
2048  */
2049 
2050 /*
2051  * RelationIdGetRelation
2052  *
2053  * Lookup a reldesc by OID; make one if not already in cache.
2054  *
2055  * Returns NULL if no pg_class row could be found for the given relid
2056  * (suggesting we are trying to access a just-deleted relation).
2057  * Any other error is reported via elog.
2058  *
2059  * NB: caller should already have at least AccessShareLock on the
2060  * relation ID, else there are nasty race conditions.
2061  *
2062  * NB: relation ref count is incremented, or set to 1 if new entry.
2063  * Caller should eventually decrement count. (Usually,
2064  * that happens by calling RelationClose().)
2065  */
2066 Relation
2068 {
2069  Relation rd;
2070 
2071  /* Make sure we're in an xact, even if this ends up being a cache hit */
2073 
2074  /*
2075  * first try to find reldesc in the cache
2076  */
2077  RelationIdCacheLookup(relationId, rd);
2078 
2079  if (RelationIsValid(rd))
2080  {
2082  /* revalidate cache entry if necessary */
2083  if (!rd->rd_isvalid)
2084  {
2085  /*
2086  * Indexes only have a limited number of possible schema changes,
2087  * and we don't want to use the full-blown procedure because it's
2088  * a headache for indexes that reload itself depends on.
2089  */
2090  if (rd->rd_rel->relkind == RELKIND_INDEX)
2092  else
2093  RelationClearRelation(rd, true);
2094  Assert(rd->rd_isvalid);
2095  }
2096  return rd;
2097  }
2098 
2099  /*
2100  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2101  * it.
2102  */
2103  rd = RelationBuildDesc(relationId, true);
2104  if (RelationIsValid(rd))
2106  return rd;
2107 }
2108 
2109 /* ----------------------------------------------------------------
2110  * cache invalidation support routines
2111  * ----------------------------------------------------------------
2112  */
2113 
2114 /*
2115  * RelationIncrementReferenceCount
2116  * Increments relation reference count.
2117  *
2118  * Note: bootstrap mode has its own weird ideas about relation refcount
2119  * behavior; we ought to fix it someday, but for now, just disable
2120  * reference count ownership tracking in bootstrap mode.
2121  */
2122 void
2124 {
2126  rel->rd_refcnt += 1;
2129 }
2130 
2131 /*
2132  * RelationDecrementReferenceCount
2133  * Decrements relation reference count.
2134  */
2135 void
2137 {
2138  Assert(rel->rd_refcnt > 0);
2139  rel->rd_refcnt -= 1;
2142 }
2143 
2144 /*
2145  * RelationClose - close an open relation
2146  *
2147  * Actually, we just decrement the refcount.
2148  *
2149  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2150  * will be freed as soon as their refcount goes to zero. In combination
2151  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2152  * to catch references to already-released relcache entries. It slows
2153  * things down quite a bit, however.
2154  */
2155 void
2157 {
2158  /* Note: no locking manipulations needed */
2160 
2161 #ifdef RELCACHE_FORCE_RELEASE
2162  if (RelationHasReferenceCountZero(relation) &&
2163  relation->rd_createSubid == InvalidSubTransactionId &&
2165  RelationClearRelation(relation, false);
2166 #endif
2167 }
2168 
2169 /*
2170  * RelationReloadIndexInfo - reload minimal information for an open index
2171  *
2172  * This function is used only for indexes. A relcache inval on an index
2173  * can mean that its pg_class or pg_index row changed. There are only
2174  * very limited changes that are allowed to an existing index's schema,
2175  * so we can update the relcache entry without a complete rebuild; which
2176  * is fortunate because we can't rebuild an index entry that is "nailed"
2177  * and/or in active use. We support full replacement of the pg_class row,
2178  * as well as updates of a few simple fields of the pg_index row.
2179  *
2180  * We can't necessarily reread the catalog rows right away; we might be
2181  * in a failed transaction when we receive the SI notification. If so,
2182  * RelationClearRelation just marks the entry as invalid by setting
2183  * rd_isvalid to false. This routine is called to fix the entry when it
2184  * is next needed.
2185  *
2186  * We assume that at the time we are called, we have at least AccessShareLock
2187  * on the target index. (Note: in the calls from RelationClearRelation,
2188  * this is legitimate because we know the rel has positive refcount.)
2189  *
2190  * If the target index is an index on pg_class or pg_index, we'd better have
2191  * previously gotten at least AccessShareLock on its underlying catalog,
2192  * else we are at risk of deadlock against someone trying to exclusive-lock
2193  * the heap and index in that order. This is ensured in current usage by
2194  * only applying this to indexes being opened or having positive refcount.
2195  */
2196 static void
2198 {
2199  bool indexOK;
2200  HeapTuple pg_class_tuple;
2201  Form_pg_class relp;
2202 
2203  /* Should be called only for invalidated indexes */
2204  Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
2205  !relation->rd_isvalid);
2206 
2207  /* Ensure it's closed at smgr level */
2208  RelationCloseSmgr(relation);
2209 
2210  /* Must free any AM cached data upon relcache flush */
2211  if (relation->rd_amcache)
2212  pfree(relation->rd_amcache);
2213  relation->rd_amcache = NULL;
2214 
2215  /*
2216  * If it's a shared index, we might be called before backend startup has
2217  * finished selecting a database, in which case we have no way to read
2218  * pg_class yet. However, a shared index can never have any significant
2219  * schema updates, so it's okay to ignore the invalidation signal. Just
2220  * mark it valid and return without doing anything more.
2221  */
2222  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2223  {
2224  relation->rd_isvalid = true;
2225  return;
2226  }
2227 
2228  /*
2229  * Read the pg_class row
2230  *
2231  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2232  * for pg_class_oid_index ...
2233  */
2234  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2235  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2236  if (!HeapTupleIsValid(pg_class_tuple))
2237  elog(ERROR, "could not find pg_class tuple for index %u",
2238  RelationGetRelid(relation));
2239  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2240  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2241  /* Reload reloptions in case they changed */
2242  if (relation->rd_options)
2243  pfree(relation->rd_options);
2244  RelationParseRelOptions(relation, pg_class_tuple);
2245  /* done with pg_class tuple */
2246  heap_freetuple(pg_class_tuple);
2247  /* We must recalculate physical address in case it changed */
2248  RelationInitPhysicalAddr(relation);
2249 
2250  /*
2251  * For a non-system index, there are fields of the pg_index row that are
2252  * allowed to change, so re-read that row and update the relcache entry.
2253  * Most of the info derived from pg_index (such as support function lookup
2254  * info) cannot change, and indeed the whole point of this routine is to
2255  * update the relcache entry without clobbering that data; so wholesale
2256  * replacement is not appropriate.
2257  */
2258  if (!IsSystemRelation(relation))
2259  {
2260  HeapTuple tuple;
2262 
2263  tuple = SearchSysCache1(INDEXRELID,
2264  ObjectIdGetDatum(RelationGetRelid(relation)));
2265  if (!HeapTupleIsValid(tuple))
2266  elog(ERROR, "cache lookup failed for index %u",
2267  RelationGetRelid(relation));
2268  index = (Form_pg_index) GETSTRUCT(tuple);
2269 
2270  /*
2271  * Basically, let's just copy all the bool fields. There are one or
2272  * two of these that can't actually change in the current code, but
2273  * it's not worth it to track exactly which ones they are. None of
2274  * the array fields are allowed to change, though.
2275  */
2276  relation->rd_index->indisunique = index->indisunique;
2277  relation->rd_index->indisprimary = index->indisprimary;
2278  relation->rd_index->indisexclusion = index->indisexclusion;
2279  relation->rd_index->indimmediate = index->indimmediate;
2280  relation->rd_index->indisclustered = index->indisclustered;
2281  relation->rd_index->indisvalid = index->indisvalid;
2282  relation->rd_index->indcheckxmin = index->indcheckxmin;
2283  relation->rd_index->indisready = index->indisready;
2284  relation->rd_index->indislive = index->indislive;
2285 
2286  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2288  HeapTupleHeaderGetXmin(tuple->t_data));
2289 
2290  ReleaseSysCache(tuple);
2291  }
2292 
2293  /* Okay, now it's valid again */
2294  relation->rd_isvalid = true;
2295 }
2296 
2297 /*
2298  * RelationDestroyRelation
2299  *
2300  * Physically delete a relation cache entry and all subsidiary data.
2301  * Caller must already have unhooked the entry from the hash table.
2302  */
2303 static void
2304 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2305 {
2307 
2308  /*
2309  * Make sure smgr and lower levels close the relation's files, if they
2310  * weren't closed already. (This was probably done by caller, but let's
2311  * just be real sure.)
2312  */
2313  RelationCloseSmgr(relation);
2314 
2315  /*
2316  * Free all the subsidiary data structures of the relcache entry, then the
2317  * entry itself.
2318  */
2319  if (relation->rd_rel)
2320  pfree(relation->rd_rel);
2321  /* can't use DecrTupleDescRefCount here */
2322  Assert(relation->rd_att->tdrefcount > 0);
2323  if (--relation->rd_att->tdrefcount == 0)
2324  {
2325  /*
2326  * If we Rebuilt a relcache entry during a transaction then its
2327  * possible we did that because the TupDesc changed as the result of
2328  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2329  * possible someone copied that TupDesc, in which case the copy would
2330  * point to free'd memory. So if we rebuild an entry we keep the
2331  * TupDesc around until end of transaction, to be safe.
2332  */
2333  if (remember_tupdesc)
2335  else
2336  FreeTupleDesc(relation->rd_att);
2337  }
2338  FreeTriggerDesc(relation->trigdesc);
2339  list_free_deep(relation->rd_fkeylist);
2340  list_free(relation->rd_indexlist);
2341  bms_free(relation->rd_indexattr);
2342  bms_free(relation->rd_keyattr);
2343  bms_free(relation->rd_pkattr);
2344  bms_free(relation->rd_idattr);
2345  if (relation->rd_pubactions)
2346  pfree(relation->rd_pubactions);
2347  if (relation->rd_options)
2348  pfree(relation->rd_options);
2349  if (relation->rd_indextuple)
2350  pfree(relation->rd_indextuple);
2351  if (relation->rd_indexcxt)
2352  MemoryContextDelete(relation->rd_indexcxt);
2353  if (relation->rd_rulescxt)
2354  MemoryContextDelete(relation->rd_rulescxt);
2355  if (relation->rd_rsdesc)
2356  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2357  if (relation->rd_partkeycxt)
2359  if (relation->rd_pdcxt)
2360  MemoryContextDelete(relation->rd_pdcxt);
2361  if (relation->rd_partcheck)
2362  pfree(relation->rd_partcheck);
2363  if (relation->rd_fdwroutine)
2364  pfree(relation->rd_fdwroutine);
2365  pfree(relation);
2366 }
2367 
2368 /*
2369  * RelationClearRelation
2370  *
2371  * Physically blow away a relation cache entry, or reset it and rebuild
2372  * it from scratch (that is, from catalog entries). The latter path is
2373  * used when we are notified of a change to an open relation (one with
2374  * refcount > 0).
2375  *
2376  * NB: when rebuilding, we'd better hold some lock on the relation,
2377  * else the catalog data we need to read could be changing under us.
2378  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2379  * an sinval reset could happen while we're accessing the catalogs, and
2380  * the rel would get blown away underneath us by RelationCacheInvalidate
2381  * if it has zero refcnt.
2382  *
2383  * The "rebuild" parameter is redundant in current usage because it has
2384  * to match the relation's refcnt status, but we keep it as a crosscheck
2385  * that we're doing what the caller expects.
2386  */
2387 static void
2388 RelationClearRelation(Relation relation, bool rebuild)
2389 {
2390  /*
2391  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2392  * course it would be an equally bad idea to blow away one with nonzero
2393  * refcnt, since that would leave someone somewhere with a dangling
2394  * pointer. All callers are expected to have verified that this holds.
2395  */
2396  Assert(rebuild ?
2397  !RelationHasReferenceCountZero(relation) :
2398  RelationHasReferenceCountZero(relation));
2399 
2400  /*
2401  * Make sure smgr and lower levels close the relation's files, if they
2402  * weren't closed already. If the relation is not getting deleted, the
2403  * next smgr access should reopen the files automatically. This ensures
2404  * that the low-level file access state is updated after, say, a vacuum
2405  * truncation.
2406  */
2407  RelationCloseSmgr(relation);
2408 
2409  /*
2410  * Never, never ever blow away a nailed-in system relation, because we'd
2411  * be unable to recover. However, we must redo RelationInitPhysicalAddr
2412  * in case it is a mapped relation whose mapping changed.
2413  *
2414  * If it's a nailed-but-not-mapped index, then we need to re-read the
2415  * pg_class row to see if its relfilenode changed. We do that immediately
2416  * if we're inside a valid transaction and the relation is open (not
2417  * counting the nailed refcount). Otherwise just mark the entry as
2418  * possibly invalid, and it'll be fixed when next opened.
2419  */
2420  if (relation->rd_isnailed)
2421  {
2422  RelationInitPhysicalAddr(relation);
2423 
2424  if (relation->rd_rel->relkind == RELKIND_INDEX)
2425  {
2426  relation->rd_isvalid = false; /* needs to be revalidated */
2427  if (relation->rd_refcnt > 1 && IsTransactionState())
2428  RelationReloadIndexInfo(relation);
2429  }
2430  return;
2431  }
2432 
2433  /*
2434  * Even non-system indexes should not be blown away if they are open and
2435  * have valid index support information. This avoids problems with active
2436  * use of the index support information. As with nailed indexes, we
2437  * re-read the pg_class row to handle possible physical relocation of the
2438  * index, and we check for pg_index updates too.
2439  */
2440  if (relation->rd_rel->relkind == RELKIND_INDEX &&
2441  relation->rd_refcnt > 0 &&
2442  relation->rd_indexcxt != NULL)
2443  {
2444  relation->rd_isvalid = false; /* needs to be revalidated */
2445  if (IsTransactionState())
2446  RelationReloadIndexInfo(relation);
2447  return;
2448  }
2449 
2450  /* Mark it invalid until we've finished rebuild */
2451  relation->rd_isvalid = false;
2452 
2453  /*
2454  * If we're really done with the relcache entry, blow it away. But if
2455  * someone is still using it, reconstruct the whole deal without moving
2456  * the physical RelationData record (so that the someone's pointer is
2457  * still valid).
2458  */
2459  if (!rebuild)
2460  {
2461  /* Remove it from the hash table */
2462  RelationCacheDelete(relation);
2463 
2464  /* And release storage */
2465  RelationDestroyRelation(relation, false);
2466  }
2467  else if (!IsTransactionState())
2468  {
2469  /*
2470  * If we're not inside a valid transaction, we can't do any catalog
2471  * access so it's not possible to rebuild yet. Just exit, leaving
2472  * rd_isvalid = false so that the rebuild will occur when the entry is
2473  * next opened.
2474  *
2475  * Note: it's possible that we come here during subtransaction abort,
2476  * and the reason for wanting to rebuild is that the rel is open in
2477  * the outer transaction. In that case it might seem unsafe to not
2478  * rebuild immediately, since whatever code has the rel already open
2479  * will keep on using the relcache entry as-is. However, in such a
2480  * case the outer transaction should be holding a lock that's
2481  * sufficient to prevent any significant change in the rel's schema,
2482  * so the existing entry contents should be good enough for its
2483  * purposes; at worst we might be behind on statistics updates or the
2484  * like. (See also CheckTableNotInUse() and its callers.) These same
2485  * remarks also apply to the cases above where we exit without having
2486  * done RelationReloadIndexInfo() yet.
2487  */
2488  return;
2489  }
2490  else
2491  {
2492  /*
2493  * Our strategy for rebuilding an open relcache entry is to build a
2494  * new entry from scratch, swap its contents with the old entry, and
2495  * finally delete the new entry (along with any infrastructure swapped
2496  * over from the old entry). This is to avoid trouble in case an
2497  * error causes us to lose control partway through. The old entry
2498  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2499  * on next access. Meanwhile it's not any less valid than it was
2500  * before, so any code that might expect to continue accessing it
2501  * isn't hurt by the rebuild failure. (Consider for example a
2502  * subtransaction that ALTERs a table and then gets canceled partway
2503  * through the cache entry rebuild. The outer transaction should
2504  * still see the not-modified cache entry as valid.) The worst
2505  * consequence of an error is leaking the necessarily-unreferenced new
2506  * entry, and this shouldn't happen often enough for that to be a big
2507  * problem.
2508  *
2509  * When rebuilding an open relcache entry, we must preserve ref count,
2510  * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2511  * attempt to preserve the pg_class entry (rd_rel), tupledesc,
2512  * rewrite-rule, partition key, and partition descriptor substructures
2513  * in place, because various places assume that these structures won't
2514  * move while they are working with an open relcache entry. (Note:
2515  * the refcount mechanism for tupledescs might someday allow us to
2516  * remove this hack for the tupledesc.)
2517  *
2518  * Note that this process does not touch CurrentResourceOwner; which
2519  * is good because whatever ref counts the entry may have do not
2520  * necessarily belong to that resource owner.
2521  */
2522  Relation newrel;
2523  Oid save_relid = RelationGetRelid(relation);
2524  bool keep_tupdesc;
2525  bool keep_rules;
2526  bool keep_policies;
2527  bool keep_partkey;
2528  bool keep_partdesc;
2529 
2530  /* Build temporary entry, but don't link it into hashtable */
2531  newrel = RelationBuildDesc(save_relid, false);
2532  if (newrel == NULL)
2533  {
2534  /*
2535  * We can validly get here, if we're using a historic snapshot in
2536  * which a relation, accessed from outside logical decoding, is
2537  * still invisible. In that case it's fine to just mark the
2538  * relation as invalid and return - it'll fully get reloaded by
2539  * the cache reset at the end of logical decoding (or at the next
2540  * access). During normal processing we don't want to ignore this
2541  * case as it shouldn't happen there, as explained below.
2542  */
2543  if (HistoricSnapshotActive())
2544  return;
2545 
2546  /*
2547  * This shouldn't happen as dropping a relation is intended to be
2548  * impossible if still referenced (c.f. CheckTableNotInUse()). But
2549  * if we get here anyway, we can't just delete the relcache entry,
2550  * as it possibly could get accessed later (as e.g. the error
2551  * might get trapped and handled via a subtransaction rollback).
2552  */
2553  elog(ERROR, "relation %u deleted while still in use", save_relid);
2554  }
2555 
2556  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2557  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2558  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2559  keep_partkey = (relation->rd_partkey != NULL);
2560  keep_partdesc = equalPartitionDescs(relation->rd_partkey,
2561  relation->rd_partdesc,
2562  newrel->rd_partdesc);
2563 
2564  /*
2565  * Perform swapping of the relcache entry contents. Within this
2566  * process the old entry is momentarily invalid, so there *must* be no
2567  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2568  * all-in-line code for safety.
2569  *
2570  * Since the vast majority of fields should be swapped, our method is
2571  * to swap the whole structures and then re-swap those few fields we
2572  * didn't want swapped.
2573  */
2574 #define SWAPFIELD(fldtype, fldname) \
2575  do { \
2576  fldtype _tmp = newrel->fldname; \
2577  newrel->fldname = relation->fldname; \
2578  relation->fldname = _tmp; \
2579  } while (0)
2580 
2581  /* swap all Relation struct fields */
2582  {
2583  RelationData tmpstruct;
2584 
2585  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2586  memcpy(newrel, relation, sizeof(RelationData));
2587  memcpy(relation, &tmpstruct, sizeof(RelationData));
2588  }
2589 
2590  /* rd_smgr must not be swapped, due to back-links from smgr level */
2591  SWAPFIELD(SMgrRelation, rd_smgr);
2592  /* rd_refcnt must be preserved */
2593  SWAPFIELD(int, rd_refcnt);
2594  /* isnailed shouldn't change */
2595  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2596  /* creation sub-XIDs must be preserved */
2597  SWAPFIELD(SubTransactionId, rd_createSubid);
2598  SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2599  /* un-swap rd_rel pointers, swap contents instead */
2600  SWAPFIELD(Form_pg_class, rd_rel);
2601  /* ... but actually, we don't have to update newrel->rd_rel */
2602  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2603  /* preserve old tupledesc and rules if no logical change */
2604  if (keep_tupdesc)
2605  SWAPFIELD(TupleDesc, rd_att);
2606  if (keep_rules)
2607  {
2608  SWAPFIELD(RuleLock *, rd_rules);
2609  SWAPFIELD(MemoryContext, rd_rulescxt);
2610  }
2611  if (keep_policies)
2612  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2613  /* toast OID override must be preserved */
2614  SWAPFIELD(Oid, rd_toastoid);
2615  /* pgstat_info must be preserved */
2616  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2617  /* partition key must be preserved, if we have one */
2618  if (keep_partkey)
2619  {
2620  SWAPFIELD(PartitionKey, rd_partkey);
2621  SWAPFIELD(MemoryContext, rd_partkeycxt);
2622  }
2623  /* preserve old partdesc if no logical change */
2624  if (keep_partdesc)
2625  {
2626  SWAPFIELD(PartitionDesc, rd_partdesc);
2627  SWAPFIELD(MemoryContext, rd_pdcxt);
2628  }
2629 
2630 #undef SWAPFIELD
2631 
2632  /* And now we can throw away the temporary entry */
2633  RelationDestroyRelation(newrel, !keep_tupdesc);
2634  }
2635 }
2636 
2637 /*
2638  * RelationFlushRelation
2639  *
2640  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2641  * This is used when we receive a cache invalidation event for the rel.
2642  */
2643 static void
2645 {
2646  if (relation->rd_createSubid != InvalidSubTransactionId ||
2648  {
2649  /*
2650  * New relcache entries are always rebuilt, not flushed; else we'd
2651  * forget the "new" status of the relation, which is a useful
2652  * optimization to have. Ditto for the new-relfilenode status.
2653  *
2654  * The rel could have zero refcnt here, so temporarily increment the
2655  * refcnt to ensure it's safe to rebuild it. We can assume that the
2656  * current transaction has some lock on the rel already.
2657  */
2659  RelationClearRelation(relation, true);
2661  }
2662  else
2663  {
2664  /*
2665  * Pre-existing rels can be dropped from the relcache if not open.
2666  */
2667  bool rebuild = !RelationHasReferenceCountZero(relation);
2668 
2669  RelationClearRelation(relation, rebuild);
2670  }
2671 }
2672 
2673 /*
2674  * RelationForgetRelation - unconditionally remove a relcache entry
2675  *
2676  * External interface for destroying a relcache entry when we
2677  * drop the relation.
2678  */
2679 void
2681 {
2682  Relation relation;
2683 
2684  RelationIdCacheLookup(rid, relation);
2685 
2686  if (!PointerIsValid(relation))
2687  return; /* not in cache, nothing to do */
2688 
2689  if (!RelationHasReferenceCountZero(relation))
2690  elog(ERROR, "relation %u is still open", rid);
2691 
2692  /* Unconditionally destroy the relcache entry */
2693  RelationClearRelation(relation, false);
2694 }
2695 
2696 /*
2697  * RelationCacheInvalidateEntry
2698  *
2699  * This routine is invoked for SI cache flush messages.
2700  *
2701  * Any relcache entry matching the relid must be flushed. (Note: caller has
2702  * already determined that the relid belongs to our database or is a shared
2703  * relation.)
2704  *
2705  * We used to skip local relations, on the grounds that they could
2706  * not be targets of cross-backend SI update messages; but it seems
2707  * safer to process them, so that our *own* SI update messages will
2708  * have the same effects during CommandCounterIncrement for both
2709  * local and nonlocal relations.
2710  */
2711 void
2713 {
2714  Relation relation;
2715 
2716  RelationIdCacheLookup(relationId, relation);
2717 
2718  if (PointerIsValid(relation))
2719  {
2721  RelationFlushRelation(relation);
2722  }
2723 }
2724 
2725 /*
2726  * RelationCacheInvalidate
2727  * Blow away cached relation descriptors that have zero reference counts,
2728  * and rebuild those with positive reference counts. Also reset the smgr
2729  * relation cache and re-read relation mapping data.
2730  *
2731  * This is currently used only to recover from SI message buffer overflow,
2732  * so we do not touch new-in-transaction relations; they cannot be targets
2733  * of cross-backend SI updates (and our own updates now go through a
2734  * separate linked list that isn't limited by the SI message buffer size).
2735  * Likewise, we need not discard new-relfilenode-in-transaction hints,
2736  * since any invalidation of those would be a local event.
2737  *
2738  * We do this in two phases: the first pass deletes deletable items, and
2739  * the second one rebuilds the rebuildable items. This is essential for
2740  * safety, because hash_seq_search only copes with concurrent deletion of
2741  * the element it is currently visiting. If a second SI overflow were to
2742  * occur while we are walking the table, resulting in recursive entry to
2743  * this routine, we could crash because the inner invocation blows away
2744  * the entry next to be visited by the outer scan. But this way is OK,
2745  * because (a) during the first pass we won't process any more SI messages,
2746  * so hash_seq_search will complete safely; (b) during the second pass we
2747  * only hold onto pointers to nondeletable entries.
2748  *
2749  * The two-phase approach also makes it easy to update relfilenodes for
2750  * mapped relations before we do anything else, and to ensure that the
2751  * second pass processes nailed-in-cache items before other nondeletable
2752  * items. This should ensure that system catalogs are up to date before
2753  * we attempt to use them to reload information about other open relations.
2754  */
2755 void
2757 {
2759  RelIdCacheEnt *idhentry;
2760  Relation relation;
2761  List *rebuildFirstList = NIL;
2762  List *rebuildList = NIL;
2763  ListCell *l;
2764 
2765  /*
2766  * Reload relation mapping data before starting to reconstruct cache.
2767  */
2769 
2770  /* Phase 1 */
2771  hash_seq_init(&status, RelationIdCache);
2772 
2773  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2774  {
2775  relation = idhentry->reldesc;
2776 
2777  /* Must close all smgr references to avoid leaving dangling ptrs */
2778  RelationCloseSmgr(relation);
2779 
2780  /*
2781  * Ignore new relations; no other backend will manipulate them before
2782  * we commit. Likewise, before replacing a relation's relfilenode, we
2783  * shall have acquired AccessExclusiveLock and drained any applicable
2784  * pending invalidations.
2785  */
2786  if (relation->rd_createSubid != InvalidSubTransactionId ||
2788  continue;
2789 
2791 
2792  if (RelationHasReferenceCountZero(relation))
2793  {
2794  /* Delete this entry immediately */
2795  Assert(!relation->rd_isnailed);
2796  RelationClearRelation(relation, false);
2797  }
2798  else
2799  {
2800  /*
2801  * If it's a mapped relation, immediately update its rd_node in
2802  * case its relfilenode changed. We must do this during phase 1
2803  * in case the relation is consulted during rebuild of other
2804  * relcache entries in phase 2. It's safe since consulting the
2805  * map doesn't involve any access to relcache entries.
2806  */
2807  if (RelationIsMapped(relation))
2808  RelationInitPhysicalAddr(relation);
2809 
2810  /*
2811  * Add this entry to list of stuff to rebuild in second pass.
2812  * pg_class goes to the front of rebuildFirstList while
2813  * pg_class_oid_index goes to the back of rebuildFirstList, so
2814  * they are done first and second respectively. Other nailed
2815  * relations go to the front of rebuildList, so they'll be done
2816  * next in no particular order; and everything else goes to the
2817  * back of rebuildList.
2818  */
2819  if (RelationGetRelid(relation) == RelationRelationId)
2820  rebuildFirstList = lcons(relation, rebuildFirstList);
2821  else if (RelationGetRelid(relation) == ClassOidIndexId)
2822  rebuildFirstList = lappend(rebuildFirstList, relation);
2823  else if (relation->rd_isnailed)
2824  rebuildList = lcons(relation, rebuildList);
2825  else
2826  rebuildList = lappend(rebuildList, relation);
2827  }
2828  }
2829 
2830  /*
2831  * Now zap any remaining smgr cache entries. This must happen before we
2832  * start to rebuild entries, since that may involve catalog fetches which
2833  * will re-open catalog files.
2834  */
2835  smgrcloseall();
2836 
2837  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2838  foreach(l, rebuildFirstList)
2839  {
2840  relation = (Relation) lfirst(l);
2841  RelationClearRelation(relation, true);
2842  }
2843  list_free(rebuildFirstList);
2844  foreach(l, rebuildList)
2845  {
2846  relation = (Relation) lfirst(l);
2847  RelationClearRelation(relation, true);
2848  }
2849  list_free(rebuildList);
2850 }
2851 
2852 /*
2853  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2854  *
2855  * Needed in some cases where we are changing a relation's physical mapping.
2856  * The link will be automatically reopened on next use.
2857  */
2858 void
2860 {
2861  Relation relation;
2862 
2863  RelationIdCacheLookup(relationId, relation);
2864 
2865  if (!PointerIsValid(relation))
2866  return; /* not in cache, nothing to do */
2867 
2868  RelationCloseSmgr(relation);
2869 }
2870 
2871 static void
2873 {
2874  if (EOXactTupleDescArray == NULL)
2875  {
2876  MemoryContext oldcxt;
2877 
2879 
2880  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2883  MemoryContextSwitchTo(oldcxt);
2884  }
2886  {
2887  int32 newlen = EOXactTupleDescArrayLen * 2;
2888 
2890 
2891  EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2892  newlen * sizeof(TupleDesc));
2893  EOXactTupleDescArrayLen = newlen;
2894  }
2895 
2896  EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2897 }
2898 
2899 /*
2900  * AtEOXact_RelationCache
2901  *
2902  * Clean up the relcache at main-transaction commit or abort.
2903  *
2904  * Note: this must be called *before* processing invalidation messages.
2905  * In the case of abort, we don't want to try to rebuild any invalidated
2906  * cache entries (since we can't safely do database accesses). Therefore
2907  * we must reset refcnts before handling pending invalidations.
2908  *
2909  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2910  * ResourceOwner mechanism. This routine just does a debugging
2911  * cross-check that no pins remain. However, we also need to do special
2912  * cleanup when the current transaction created any relations or made use
2913  * of forced index lists.
2914  */
2915 void
2917 {
2919  RelIdCacheEnt *idhentry;
2920  int i;
2921 
2922  /*
2923  * Unless the eoxact_list[] overflowed, we only need to examine the rels
2924  * listed in it. Otherwise fall back on a hash_seq_search scan.
2925  *
2926  * For simplicity, eoxact_list[] entries are not deleted till end of
2927  * top-level transaction, even though we could remove them at
2928  * subtransaction end in some cases, or remove relations from the list if
2929  * they are cleared for other reasons. Therefore we should expect the
2930  * case that list entries are not found in the hashtable; if not, there's
2931  * nothing to do for them.
2932  */
2934  {
2935  hash_seq_init(&status, RelationIdCache);
2936  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2937  {
2938  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2939  }
2940  }
2941  else
2942  {
2943  for (i = 0; i < eoxact_list_len; i++)
2944  {
2945  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2946  (void *) &eoxact_list[i],
2947  HASH_FIND,
2948  NULL);
2949  if (idhentry != NULL)
2950  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2951  }
2952  }
2953 
2954  if (EOXactTupleDescArrayLen > 0)
2955  {
2956  Assert(EOXactTupleDescArray != NULL);
2957  for (i = 0; i < NextEOXactTupleDescNum; i++)
2958  FreeTupleDesc(EOXactTupleDescArray[i]);
2959  pfree(EOXactTupleDescArray);
2960  EOXactTupleDescArray = NULL;
2961  }
2962 
2963  /* Now we're out of the transaction and can clear the lists */
2964  eoxact_list_len = 0;
2965  eoxact_list_overflowed = false;
2968 }
2969 
2970 /*
2971  * AtEOXact_cleanup
2972  *
2973  * Clean up a single rel at main-transaction commit or abort
2974  *
2975  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
2976  * bother to prevent duplicate entries in eoxact_list[].
2977  */
2978 static void
2979 AtEOXact_cleanup(Relation relation, bool isCommit)
2980 {
2981  /*
2982  * The relcache entry's ref count should be back to its normal
2983  * not-in-a-transaction state: 0 unless it's nailed in cache.
2984  *
2985  * In bootstrap mode, this is NOT true, so don't check it --- the
2986  * bootstrap code expects relations to stay open across start/commit
2987  * transaction calls. (That seems bogus, but it's not worth fixing.)
2988  *
2989  * Note: ideally this check would be applied to every relcache entry, not
2990  * just those that have eoxact work to do. But it's not worth forcing a
2991  * scan of the whole relcache just for this. (Moreover, doing so would
2992  * mean that assert-enabled testing never tests the hash_search code path
2993  * above, which seems a bad idea.)
2994  */
2995 #ifdef USE_ASSERT_CHECKING
2997  {
2998  int expected_refcnt;
2999 
3000  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3001  Assert(relation->rd_refcnt == expected_refcnt);
3002  }
3003 #endif
3004 
3005  /*
3006  * Is it a relation created in the current transaction?
3007  *
3008  * During commit, reset the flag to zero, since we are now out of the
3009  * creating transaction. During abort, simply delete the relcache entry
3010  * --- it isn't interesting any longer. (NOTE: if we have forgotten the
3011  * new-ness of a new relation due to a forced cache flush, the entry will
3012  * get deleted anyway by shared-cache-inval processing of the aborted
3013  * pg_class insertion.)
3014  */
3015  if (relation->rd_createSubid != InvalidSubTransactionId)
3016  {
3017  if (isCommit)
3019  else if (RelationHasReferenceCountZero(relation))
3020  {
3021  RelationClearRelation(relation, false);
3022  return;
3023  }
3024  else
3025  {
3026  /*
3027  * Hmm, somewhere there's a (leaked?) reference to the relation.
3028  * We daren't remove the entry for fear of dereferencing a
3029  * dangling pointer later. Bleat, and mark it as not belonging to
3030  * the current transaction. Hopefully it'll get cleaned up
3031  * eventually. This must be just a WARNING to avoid
3032  * error-during-error-recovery loops.
3033  */
3035  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3036  RelationGetRelationName(relation));
3037  }
3038  }
3039 
3040  /*
3041  * Likewise, reset the hint about the relfilenode being new.
3042  */
3044 
3045  /*
3046  * Flush any temporary index list.
3047  */
3048  if (relation->rd_indexvalid == 2)
3049  {
3050  list_free(relation->rd_indexlist);
3051  relation->rd_indexlist = NIL;
3052  relation->rd_oidindex = InvalidOid;
3053  relation->rd_pkindex = InvalidOid;
3054  relation->rd_replidindex = InvalidOid;
3055  relation->rd_indexvalid = 0;
3056  }
3057 }
3058 
3059 /*
3060  * AtEOSubXact_RelationCache
3061  *
3062  * Clean up the relcache at sub-transaction commit or abort.
3063  *
3064  * Note: this must be called *before* processing invalidation messages.
3065  */
3066 void
3068  SubTransactionId parentSubid)
3069 {
3071  RelIdCacheEnt *idhentry;
3072  int i;
3073 
3074  /*
3075  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3076  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3077  * logic as in AtEOXact_RelationCache.
3078  */
3080  {
3081  hash_seq_init(&status, RelationIdCache);
3082  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3083  {
3084  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3085  mySubid, parentSubid);
3086  }
3087  }
3088  else
3089  {
3090  for (i = 0; i < eoxact_list_len; i++)
3091  {
3092  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3093  (void *) &eoxact_list[i],
3094  HASH_FIND,
3095  NULL);
3096  if (idhentry != NULL)
3097  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3098  mySubid, parentSubid);
3099  }
3100  }
3101 
3102  /* Don't reset the list; we still need more cleanup later */
3103 }
3104 
3105 /*
3106  * AtEOSubXact_cleanup
3107  *
3108  * Clean up a single rel at subtransaction commit or abort
3109  *
3110  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3111  * bother to prevent duplicate entries in eoxact_list[].
3112  */
3113 static void
3114 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3115  SubTransactionId mySubid, SubTransactionId parentSubid)
3116 {
3117  /*
3118  * Is it a relation created in the current subtransaction?
3119  *
3120  * During subcommit, mark it as belonging to the parent, instead. During
3121  * subabort, simply delete the relcache entry.
3122  */
3123  if (relation->rd_createSubid == mySubid)
3124  {
3125  if (isCommit)
3126  relation->rd_createSubid = parentSubid;
3127  else if (RelationHasReferenceCountZero(relation))
3128  {
3129  RelationClearRelation(relation, false);
3130  return;
3131  }
3132  else
3133  {
3134  /*
3135  * Hmm, somewhere there's a (leaked?) reference to the relation.
3136  * We daren't remove the entry for fear of dereferencing a
3137  * dangling pointer later. Bleat, and transfer it to the parent
3138  * subtransaction so we can try again later. This must be just a
3139  * WARNING to avoid error-during-error-recovery loops.
3140  */
3141  relation->rd_createSubid = parentSubid;
3142  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3143  RelationGetRelationName(relation));
3144  }
3145  }
3146 
3147  /*
3148  * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3149  */
3150  if (relation->rd_newRelfilenodeSubid == mySubid)
3151  {
3152  if (isCommit)
3153  relation->rd_newRelfilenodeSubid = parentSubid;
3154  else
3156  }
3157 
3158  /*
3159  * Flush any temporary index list.
3160  */
3161  if (relation->rd_indexvalid == 2)
3162  {
3163  list_free(relation->rd_indexlist);
3164  relation->rd_indexlist = NIL;
3165  relation->rd_oidindex = InvalidOid;
3166  relation->rd_pkindex = InvalidOid;
3167  relation->rd_replidindex = InvalidOid;
3168  relation->rd_indexvalid = 0;
3169  }
3170 }
3171 
3172 
3173 /*
3174  * RelationBuildLocalRelation
3175  * Build a relcache entry for an about-to-be-created relation,
3176  * and enter it into the relcache.
3177  */
3178 Relation
3179 RelationBuildLocalRelation(const char *relname,
3180  Oid relnamespace,
3181  TupleDesc tupDesc,
3182  Oid relid,
3183  Oid relfilenode,
3184  Oid reltablespace,
3185  bool shared_relation,
3186  bool mapped_relation,
3187  char relpersistence,
3188  char relkind)
3189 {
3190  Relation rel;
3191  MemoryContext oldcxt;
3192  int natts = tupDesc->natts;
3193  int i;
3194  bool has_not_null;
3195  bool nailit;
3196 
3197  AssertArg(natts >= 0);
3198 
3199  /*
3200  * check for creation of a rel that must be nailed in cache.
3201  *
3202  * XXX this list had better match the relations specially handled in
3203  * RelationCacheInitializePhase2/3.
3204  */
3205  switch (relid)
3206  {
3207  case DatabaseRelationId:
3208  case AuthIdRelationId:
3209  case AuthMemRelationId:
3210  case RelationRelationId:
3211  case AttributeRelationId:
3212  case ProcedureRelationId:
3213  case TypeRelationId:
3214  nailit = true;
3215  break;
3216  default:
3217  nailit = false;
3218  break;
3219  }
3220 
3221  /*
3222  * check that hardwired list of shared rels matches what's in the
3223  * bootstrap .bki file. If you get a failure here during initdb, you
3224  * probably need to fix IsSharedRelation() to match whatever you've done
3225  * to the set of shared relations.
3226  */
3227  if (shared_relation != IsSharedRelation(relid))
3228  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3229  relname, relid);
3230 
3231  /* Shared relations had better be mapped, too */
3232  Assert(mapped_relation || !shared_relation);
3233 
3234  /*
3235  * switch to the cache context to create the relcache entry.
3236  */
3237  if (!CacheMemoryContext)
3239 
3241 
3242  /*
3243  * allocate a new relation descriptor and fill in basic state fields.
3244  */
3245  rel = (Relation) palloc0(sizeof(RelationData));
3246 
3247  /* make sure relation is marked as having no open file yet */
3248  rel->rd_smgr = NULL;
3249 
3250  /* mark it nailed if appropriate */
3251  rel->rd_isnailed = nailit;
3252 
3253  rel->rd_refcnt = nailit ? 1 : 0;
3254 
3255  /* it's being created in this transaction */
3258 
3259  /*
3260  * create a new tuple descriptor from the one passed in. We do this
3261  * partly to copy it into the cache context, and partly because the new
3262  * relation can't have any defaults or constraints yet; they have to be
3263  * added in later steps, because they require additions to multiple system
3264  * catalogs. We can copy attnotnull constraints here, however.
3265  */
3266  rel->rd_att = CreateTupleDescCopy(tupDesc);
3267  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3268  has_not_null = false;
3269  for (i = 0; i < natts; i++)
3270  {
3271  rel->rd_att->attrs[i]->attnotnull = tupDesc->attrs[i]->attnotnull;
3272  has_not_null |= tupDesc->attrs[i]->attnotnull;
3273  }
3274 
3275  if (has_not_null)
3276  {
3277  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3278 
3279  constr->has_not_null = true;
3280  rel->rd_att->constr = constr;
3281  }
3282 
3283  /*
3284  * initialize relation tuple form (caller may add/override data later)
3285  */
3287 
3288  namestrcpy(&rel->rd_rel->relname, relname);
3289  rel->rd_rel->relnamespace = relnamespace;
3290 
3291  rel->rd_rel->relkind = relkind;
3292  rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
3293  rel->rd_rel->relnatts = natts;
3294  rel->rd_rel->reltype = InvalidOid;
3295  /* needed when bootstrapping: */
3296  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3297 
3298  /* set up persistence and relcache fields dependent on it */
3299  rel->rd_rel->relpersistence = relpersistence;
3300  switch (relpersistence)
3301  {
3305  rel->rd_islocaltemp = false;
3306  break;
3307  case RELPERSISTENCE_TEMP:
3308  Assert(isTempOrTempToastNamespace(relnamespace));
3310  rel->rd_islocaltemp = true;
3311  break;
3312  default:
3313  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3314  break;
3315  }
3316 
3317  /* if it's a materialized view, it's not populated initially */
3318  if (relkind == RELKIND_MATVIEW)
3319  rel->rd_rel->relispopulated = false;
3320  else
3321  rel->rd_rel->relispopulated = true;
3322 
3323  /* system relations and non-table objects don't have one */
3324  if (!IsSystemNamespace(relnamespace) &&
3325  (relkind == RELKIND_RELATION ||
3326  relkind == RELKIND_MATVIEW ||
3327  relkind == RELKIND_PARTITIONED_TABLE))
3328  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3329  else
3330  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3331 
3332  /*
3333  * Insert relation physical and logical identifiers (OIDs) into the right
3334  * places. For a mapped relation, we set relfilenode to zero and rely on
3335  * RelationInitPhysicalAddr to consult the map.
3336  */
3337  rel->rd_rel->relisshared = shared_relation;
3338 
3339  RelationGetRelid(rel) = relid;
3340 
3341  for (i = 0; i < natts; i++)
3342  rel->rd_att->attrs[i]->attrelid = relid;
3343 
3344  rel->rd_rel->reltablespace = reltablespace;
3345 
3346  if (mapped_relation)
3347  {
3348  rel->rd_rel->relfilenode = InvalidOid;
3349  /* Add it to the active mapping information */
3350  RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3351  }
3352  else
3353  rel->rd_rel->relfilenode = relfilenode;
3354 
3355  RelationInitLockInfo(rel); /* see lmgr.c */
3356 
3358 
3359  /*
3360  * Okay to insert into the relcache hash table.
3361  *
3362  * Ordinarily, there should certainly not be an existing hash entry for
3363  * the same OID; but during bootstrap, when we create a "real" relcache
3364  * entry for one of the bootstrap relations, we'll be overwriting the
3365  * phony one created with formrdesc. So allow that to happen for nailed
3366  * rels.
3367  */
3368  RelationCacheInsert(rel, nailit);
3369 
3370  /*
3371  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3372  * can't do this before storing relid in it.
3373  */
3374  EOXactListAdd(rel);
3375 
3376  /*
3377  * done building relcache entry.
3378  */
3379  MemoryContextSwitchTo(oldcxt);
3380 
3381  /* It's fully valid */
3382  rel->rd_isvalid = true;
3383 
3384  /*
3385  * Caller expects us to pin the returned entry.
3386  */
3388 
3389  return rel;
3390 }
3391 
3392 
3393 /*
3394  * RelationSetNewRelfilenode
3395  *
3396  * Assign a new relfilenode (physical file name) to the relation.
3397  *
3398  * This allows a full rewrite of the relation to be done with transactional
3399  * safety (since the filenode assignment can be rolled back). Note however
3400  * that there is no simple way to access the relation's old data for the
3401  * remainder of the current transaction. This limits the usefulness to cases
3402  * such as TRUNCATE or rebuilding an index from scratch.
3403  *
3404  * Caller must already hold exclusive lock on the relation.
3405  *
3406  * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId
3407  * must be passed for indexes and sequences). This should be a lower bound on
3408  * the XIDs that will be put into the new relation contents.
3409  *
3410  * The new filenode's persistence is set to the given value. This is useful
3411  * for the cases that are changing the relation's persistence; other callers
3412  * need to pass the original relpersistence value.
3413  */
3414 void
3415 RelationSetNewRelfilenode(Relation relation, char persistence,
3416  TransactionId freezeXid, MultiXactId minmulti)
3417 {
3418  Oid newrelfilenode;
3419  RelFileNodeBackend newrnode;
3420  Relation pg_class;
3421  HeapTuple tuple;
3422  Form_pg_class classform;
3423 
3424  /* Indexes, sequences must have Invalid frozenxid; other rels must not */
3425  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
3426  relation->rd_rel->relkind == RELKIND_SEQUENCE) ?
3427  freezeXid == InvalidTransactionId :
3428  TransactionIdIsNormal(freezeXid));
3429  Assert(TransactionIdIsNormal(freezeXid) == MultiXactIdIsValid(minmulti));
3430 
3431  /* Allocate a new relfilenode */
3432  newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3433  persistence);
3434 
3435  /*
3436  * Get a writable copy of the pg_class tuple for the given relation.
3437  */
3439 
3440  tuple = SearchSysCacheCopy1(RELOID,
3441  ObjectIdGetDatum(RelationGetRelid(relation)));
3442  if (!HeapTupleIsValid(tuple))
3443  elog(ERROR, "could not find tuple for relation %u",
3444  RelationGetRelid(relation));
3445  classform = (Form_pg_class) GETSTRUCT(tuple);
3446 
3447  /*
3448  * Create storage for the main fork of the new relfilenode.
3449  *
3450  * NOTE: any conflict in relfilenode value will be caught here, if
3451  * GetNewRelFileNode messes up for any reason.
3452  */
3453  newrnode.node = relation->rd_node;
3454  newrnode.node.relNode = newrelfilenode;
3455  newrnode.backend = relation->rd_backend;
3456  RelationCreateStorage(newrnode.node, persistence);
3457  smgrclosenode(newrnode);
3458 
3459  /*
3460  * Schedule unlinking of the old storage at transaction commit.
3461  */
3462  RelationDropStorage(relation);
3463 
3464  /*
3465  * Now update the pg_class row. However, if we're dealing with a mapped
3466  * index, pg_class.relfilenode doesn't change; instead we have to send the
3467  * update to the relation mapper.
3468  */
3469  if (RelationIsMapped(relation))
3471  newrelfilenode,
3472  relation->rd_rel->relisshared,
3473  false);
3474  else
3475  classform->relfilenode = newrelfilenode;
3476 
3477  /* These changes are safe even for a mapped relation */
3478  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3479  {
3480  classform->relpages = 0; /* it's empty until further notice */
3481  classform->reltuples = 0;
3482  classform->relallvisible = 0;
3483  }
3484  classform->relfrozenxid = freezeXid;
3485  classform->relminmxid = minmulti;
3486  classform->relpersistence = persistence;
3487 
3488  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3489 
3490  heap_freetuple(tuple);
3491 
3492  heap_close(pg_class, RowExclusiveLock);
3493 
3494  /*
3495  * Make the pg_class row change visible, as well as the relation map
3496  * change if any. This will cause the relcache entry to get updated, too.
3497  */
3499 
3500  /*
3501  * Mark the rel as having been given a new relfilenode in the current
3502  * (sub) transaction. This is a hint that can be used to optimize later
3503  * operations on the rel in the same transaction.
3504  */
3506 
3507  /* Flag relation as needing eoxact cleanup (to remove the hint) */
3508  EOXactListAdd(relation);
3509 }
3510 
3511 
3512 /*
3513  * RelationCacheInitialize
3514  *
3515  * This initializes the relation descriptor cache. At the time
3516  * that this is invoked, we can't do database access yet (mainly
3517  * because the transaction subsystem is not up); all we are doing
3518  * is making an empty cache hashtable. This must be done before
3519  * starting the initialization transaction, because otherwise
3520  * AtEOXact_RelationCache would crash if that transaction aborts
3521  * before we can get the relcache set up.
3522  */
3523 
3524 #define INITRELCACHESIZE 400
3525 
3526 void
3528 {
3529  HASHCTL ctl;
3530 
3531  /*
3532  * make sure cache memory context exists
3533  */
3534  if (!CacheMemoryContext)
3536 
3537  /*
3538  * create hashtable that indexes the relcache
3539  */
3540  MemSet(&ctl, 0, sizeof(ctl));
3541  ctl.keysize = sizeof(Oid);
3542  ctl.entrysize = sizeof(RelIdCacheEnt);
3543  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3544  &ctl, HASH_ELEM | HASH_BLOBS);
3545 
3546  /*
3547  * relation mapper needs to be initialized too
3548  */
3550 }
3551 
3552 /*
3553  * RelationCacheInitializePhase2
3554  *
3555  * This is called to prepare for access to shared catalogs during startup.
3556  * We must at least set up nailed reldescs for pg_database, pg_authid,
3557  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3558  * for their indexes, too. We attempt to load this information from the
3559  * shared relcache init file. If that's missing or broken, just make
3560  * phony entries for the catalogs themselves.
3561  * RelationCacheInitializePhase3 will clean up as needed.
3562  */
3563 void
3565 {
3566  MemoryContext oldcxt;
3567 
3568  /*
3569  * relation mapper needs initialized too
3570  */
3572 
3573  /*
3574  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3575  * nothing.
3576  */
3578  return;
3579 
3580  /*
3581  * switch to cache memory context
3582  */
3584 
3585  /*
3586  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3587  * the cache with pre-made descriptors for the critical shared catalogs.
3588  */
3589  if (!load_relcache_init_file(true))
3590  {
3591  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3593  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3595  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3597  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3599  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3601 
3602 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3603  }
3604 
3605  MemoryContextSwitchTo(oldcxt);
3606 }
3607 
3608 /*
3609  * RelationCacheInitializePhase3
3610  *
3611  * This is called as soon as the catcache and transaction system
3612  * are functional and we have determined MyDatabaseId. At this point
3613  * we can actually read data from the database's system catalogs.
3614  * We first try to read pre-computed relcache entries from the local
3615  * relcache init file. If that's missing or broken, make phony entries
3616  * for the minimum set of nailed-in-cache relations. Then (unless
3617  * bootstrapping) make sure we have entries for the critical system
3618  * indexes. Once we've done all this, we have enough infrastructure to
3619  * open any system catalog or use any catcache. The last step is to
3620  * rewrite the cache files if needed.
3621  */
3622 void
3624 {
3626  RelIdCacheEnt *idhentry;
3627  MemoryContext oldcxt;
3628  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3629 
3630  /*
3631  * relation mapper needs initialized too
3632  */
3634 
3635  /*
3636  * switch to cache memory context
3637  */
3639 
3640  /*
3641  * Try to load the local relcache cache file. If unsuccessful, bootstrap
3642  * the cache with pre-made descriptors for the critical "nailed-in" system
3643  * catalogs.
3644  */
3645  if (IsBootstrapProcessingMode() ||
3646  !load_relcache_init_file(false))
3647  {
3648  needNewCacheFile = true;
3649 
3650  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3651  true, Natts_pg_class, Desc_pg_class);
3652  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3654  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3655  true, Natts_pg_proc, Desc_pg_proc);
3656  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3657  true, Natts_pg_type, Desc_pg_type);
3658 
3659 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3660  }
3661 
3662  MemoryContextSwitchTo(oldcxt);
3663 
3664  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3666  return;
3667 
3668  /*
3669  * If we didn't get the critical system indexes loaded into relcache, do
3670  * so now. These are critical because the catcache and/or opclass cache
3671  * depend on them for fetches done during relcache load. Thus, we have an
3672  * infinite-recursion problem. We can break the recursion by doing
3673  * heapscans instead of indexscans at certain key spots. To avoid hobbling
3674  * performance, we only want to do that until we have the critical indexes
3675  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3676  * decide whether to do heapscan or indexscan at the key spots, and we set
3677  * it true after we've loaded the critical indexes.
3678  *
3679  * The critical indexes are marked as "nailed in cache", partly to make it
3680  * easy for load_relcache_init_file to count them, but mainly because we
3681  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3682  * true. (NOTE: perhaps it would be possible to reload them by
3683  * temporarily setting criticalRelcachesBuilt to false again. For now,
3684  * though, we just nail 'em in.)
3685  *
3686  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3687  * in the same way as the others, because the critical catalogs don't
3688  * (currently) have any rules or triggers, and so these indexes can be
3689  * rebuilt without inducing recursion. However they are used during
3690  * relcache load when a rel does have rules or triggers, so we choose to
3691  * nail them for performance reasons.
3692  */
3694  {
3700  IndexRelationId);
3709 
3710 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3711 
3712  criticalRelcachesBuilt = true;
3713  }
3714 
3715  /*
3716  * Process critical shared indexes too.
3717  *
3718  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3719  * initial lookup of MyDatabaseId, without which we'll never find any
3720  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3721  * database OID, so it instead depends on DatabaseOidIndexId. We also
3722  * need to nail up some indexes on pg_authid and pg_auth_members for use
3723  * during client authentication. SharedSecLabelObjectIndexId isn't
3724  * critical for the core system, but authentication hooks might be
3725  * interested in it.
3726  */
3728  {
3741 
3742 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3743 
3745  }
3746 
3747  /*
3748  * Now, scan all the relcache entries and update anything that might be
3749  * wrong in the results from formrdesc or the relcache cache file. If we
3750  * faked up relcache entries using formrdesc, then read the real pg_class
3751  * rows and replace the fake entries with them. Also, if any of the
3752  * relcache entries have rules, triggers, or security policies, load that
3753  * info the hard way since it isn't recorded in the cache file.
3754  *
3755  * Whenever we access the catalogs to read data, there is a possibility of
3756  * a shared-inval cache flush causing relcache entries to be removed.
3757  * Since hash_seq_search only guarantees to still work after the *current*
3758  * entry is removed, it's unsafe to continue the hashtable scan afterward.
3759  * We handle this by restarting the scan from scratch after each access.
3760  * This is theoretically O(N^2), but the number of entries that actually
3761  * need to be fixed is small enough that it doesn't matter.
3762  */
3763  hash_seq_init(&status, RelationIdCache);
3764 
3765  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3766  {
3767  Relation relation = idhentry->reldesc;
3768  bool restart = false;
3769 
3770  /*
3771  * Make sure *this* entry doesn't get flushed while we work with it.
3772  */
3774 
3775  /*
3776  * If it's a faked-up entry, read the real pg_class tuple.
3777  */
3778  if (relation->rd_rel->relowner == InvalidOid)
3779  {
3780  HeapTuple htup;
3781  Form_pg_class relp;
3782 
3783  htup = SearchSysCache1(RELOID,
3784  ObjectIdGetDatum(RelationGetRelid(relation)));
3785  if (!HeapTupleIsValid(htup))
3786  elog(FATAL, "cache lookup failed for relation %u",
3787  RelationGetRelid(relation));
3788  relp = (Form_pg_class) GETSTRUCT(htup);
3789 
3790  /*
3791  * Copy tuple to relation->rd_rel. (See notes in
3792  * AllocateRelationDesc())
3793  */
3794  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
3795 
3796  /* Update rd_options while we have the tuple */
3797  if (relation->rd_options)
3798  pfree(relation->rd_options);
3799  RelationParseRelOptions(relation, htup);
3800 
3801  /*
3802  * Check the values in rd_att were set up correctly. (We cannot
3803  * just copy them over now: formrdesc must have set up the rd_att
3804  * data correctly to start with, because it may already have been
3805  * copied into one or more catcache entries.)
3806  */
3807  Assert(relation->rd_att->tdtypeid == relp->reltype);
3808  Assert(relation->rd_att->tdtypmod == -1);
3809  Assert(relation->rd_att->tdhasoid == relp->relhasoids);
3810 
3811  ReleaseSysCache(htup);
3812 
3813  /* relowner had better be OK now, else we'll loop forever */
3814  if (relation->rd_rel->relowner == InvalidOid)
3815  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3816  RelationGetRelationName(relation));
3817 
3818  restart = true;
3819  }
3820 
3821  /*
3822  * Fix data that isn't saved in relcache cache file.
3823  *
3824  * relhasrules or relhastriggers could possibly be wrong or out of
3825  * date. If we don't actually find any rules or triggers, clear the
3826  * local copy of the flag so that we don't get into an infinite loop
3827  * here. We don't make any attempt to fix the pg_class entry, though.
3828  */
3829  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3830  {
3831  RelationBuildRuleLock(relation);
3832  if (relation->rd_rules == NULL)
3833  relation->rd_rel->relhasrules = false;
3834  restart = true;
3835  }
3836  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3837  {
3838  RelationBuildTriggers(relation);
3839  if (relation->trigdesc == NULL)
3840  relation->rd_rel->relhastriggers = false;
3841  restart = true;
3842  }
3843 
3844  /*
3845  * Re-load the row security policies if the relation has them, since
3846  * they are not preserved in the cache. Note that we can never NOT
3847  * have a policy while relrowsecurity is true,
3848  * RelationBuildRowSecurity will create a single default-deny policy
3849  * if there is no policy defined in pg_policy.
3850  */
3851  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3852  {
3853  RelationBuildRowSecurity(relation);
3854 
3855  Assert(relation->rd_rsdesc != NULL);
3856  restart = true;
3857  }
3858 
3859  /*
3860  * Reload partition key and descriptor for a partitioned table.
3861  */
3862  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3863  {
3864  RelationBuildPartitionKey(relation);
3865  Assert(relation->rd_partkey != NULL);
3866 
3867  RelationBuildPartitionDesc(relation);
3868  Assert(relation->rd_partdesc != NULL);
3869 
3870  restart = true;
3871  }
3872 
3873  /* Release hold on the relation */
3875 
3876  /* Now, restart the hashtable scan if needed */
3877  if (restart)
3878  {
3879  hash_seq_term(&status);
3880  hash_seq_init(&status, RelationIdCache);
3881  }
3882  }
3883 
3884  /*
3885  * Lastly, write out new relcache cache files if needed. We don't bother
3886  * to distinguish cases where only one of the two needs an update.
3887  */
3888  if (needNewCacheFile)
3889  {
3890  /*
3891  * Force all the catcaches to finish initializing and thereby open the
3892  * catalogs and indexes they use. This will preload the relcache with
3893  * entries for all the most important system catalogs and indexes, so
3894  * that the init files will be most useful for future backends.
3895  */
3897 
3898  /* now write the files */
3900  write_relcache_init_file(false);
3901  }
3902 }
3903 
3904 /*
3905  * Load one critical system index into the relcache
3906  *
3907  * indexoid is the OID of the target index, heapoid is the OID of the catalog
3908  * it belongs to.
3909  */
3910 static void
3911 load_critical_index(Oid indexoid, Oid heapoid)
3912 {
3913  Relation ird;
3914 
3915  /*
3916  * We must lock the underlying catalog before locking the index to avoid
3917  * deadlock, since RelationBuildDesc might well need to read the catalog,
3918  * and if anyone else is exclusive-locking this catalog and index they'll
3919  * be doing it in that order.
3920  */
3921  LockRelationOid(heapoid, AccessShareLock);
3922  LockRelationOid(indexoid, AccessShareLock);
3923  ird = RelationBuildDesc(indexoid, true);
3924  if (ird == NULL)
3925  elog(PANIC, "could not open critical system index %u", indexoid);
3926  ird->rd_isnailed = true;
3927  ird->rd_refcnt = 1;
3930 }
3931 
3932 /*
3933  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
3934  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
3935  *
3936  * We need this kluge because we have to be able to access non-fixed-width
3937  * fields of pg_class and pg_index before we have the standard catalog caches
3938  * available. We use predefined data that's set up in just the same way as
3939  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
3940  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
3941  * does it have a TupleConstr field. But it's good enough for the purpose of
3942  * extracting fields.
3943  */
3944 static TupleDesc
3946  bool hasoids)
3947 {
3948  TupleDesc result;
3949  MemoryContext oldcxt;
3950  int i;
3951 
3953 
3954  result = CreateTemplateTupleDesc(natts, hasoids);
3955  result->tdtypeid = RECORDOID; /* not right, but we don't care */
3956  result->tdtypmod = -1;
3957 
3958  for (i = 0; i < natts; i++)
3959  {
3960  memcpy(result->attrs[i], &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
3961  /* make sure attcacheoff is valid */
3962  result->attrs[i]->attcacheoff = -1;
3963  }
3964 
3965  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
3966  result->attrs[0]->attcacheoff = 0;
3967 
3968  /* Note: we don't bother to set up a TupleConstr entry */
3969 
3970  MemoryContextSwitchTo(oldcxt);
3971 
3972  return result;
3973 }
3974 
3975 static TupleDesc
3977 {
3978  static TupleDesc pgclassdesc = NULL;
3979 
3980  /* Already done? */
3981  if (pgclassdesc == NULL)
3983  Desc_pg_class,
3984  true);
3985 
3986  return pgclassdesc;
3987 }
3988 
3989 static TupleDesc
3991 {
3992  static TupleDesc pgindexdesc = NULL;
3993 
3994  /* Already done? */
3995  if (pgindexdesc == NULL)
3997  Desc_pg_index,
3998  false);
3999 
4000  return pgindexdesc;
4001 }
4002 
4003 /*
4004  * Load any default attribute value definitions for the relation.
4005  */
4006 static void
4008 {
4009  AttrDefault *attrdef = relation->rd_att->constr->defval;
4010  int ndef = relation->rd_att->constr->num_defval;
4011  Relation adrel;
4012  SysScanDesc adscan;
4013  ScanKeyData skey;
4014  HeapTuple htup;
4015  Datum val;
4016  bool isnull;
4017  int found;
4018  int i;
4019 
4020  ScanKeyInit(&skey,
4022  BTEqualStrategyNumber, F_OIDEQ,
4023  ObjectIdGetDatum(RelationGetRelid(relation)));
4024 
4026  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4027  NULL, 1, &skey);
4028  found = 0;
4029 
4030  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4031  {
4032  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4033 
4034  for (i = 0; i < ndef; i++)
4035  {
4036  if (adform->adnum != attrdef[i].adnum)
4037  continue;
4038  if (attrdef[i].adbin != NULL)
4039  elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4040  NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
4041  RelationGetRelationName(relation));
4042  else
4043  found++;
4044 
4045  val = fastgetattr(htup,
4047  adrel->rd_att, &isnull);
4048  if (isnull)
4049  elog(WARNING, "null adbin for attr %s of rel %s",
4050  NameStr(relation->rd_att->attrs[adform->adnum - 1]->attname),
4051  RelationGetRelationName(relation));
4052  else
4053  {
4054  /* detoast and convert to cstring in caller's context */
4055  char *s = TextDatumGetCString(val);
4056 
4058  pfree(s);
4059  }
4060  break;
4061  }
4062 
4063  if (i >= ndef)
4064  elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4065  adform->adnum, RelationGetRelationName(relation));
4066  }
4067 
4068  systable_endscan(adscan);
4069  heap_close(adrel, AccessShareLock);
4070 
4071  if (found != ndef)
4072  elog(WARNING, "%d attrdef record(s) missing for rel %s",
4073  ndef - found, RelationGetRelationName(relation));
4074 }
4075 
4076 /*
4077  * Load any check constraints for the relation.
4078  */
4079 static void
4081 {
4082  ConstrCheck *check = relation->rd_att->constr->check;
4083  int ncheck = relation->rd_att->constr->num_check;
4084  Relation conrel;
4085  SysScanDesc conscan;
4086  ScanKeyData skey[1];
4087  HeapTuple htup;
4088  int found = 0;
4089 
4090  ScanKeyInit(&skey[0],
4092  BTEqualStrategyNumber, F_OIDEQ,
4093  ObjectIdGetDatum(RelationGetRelid(relation)));
4094 
4096  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4097  NULL, 1, skey);
4098 
4099  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4100  {
4102  Datum val;
4103  bool isnull;
4104  char *s;
4105 
4106  /* We want check constraints only */
4107  if (conform->contype != CONSTRAINT_CHECK)
4108  continue;
4109 
4110  if (found >= ncheck)
4111  elog(ERROR, "unexpected constraint record found for rel %s",
4112  RelationGetRelationName(relation));
4113 
4114  check[found].ccvalid = conform->convalidated;
4115  check[found].ccnoinherit = conform->connoinherit;
4117  NameStr(conform->conname));
4118 
4119  /* Grab and test conbin is actually set */
4120  val = fastgetattr(htup,
4122  conrel->rd_att, &isnull);
4123  if (isnull)
4124  elog(ERROR, "null conbin for rel %s",
4125  RelationGetRelationName(relation));
4126 
4127  /* detoast and convert to cstring in caller's context */
4128  s = TextDatumGetCString(val);
4129  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4130  pfree(s);
4131 
4132  found++;
4133  }
4134 
4135  systable_endscan(conscan);
4136  heap_close(conrel, AccessShareLock);
4137 
4138  if (found != ncheck)
4139  elog(ERROR, "%d constraint record(s) missing for rel %s",
4140  ncheck - found, RelationGetRelationName(relation));
4141 
4142  /* Sort the records so that CHECKs are applied in a deterministic order */
4143  if (ncheck > 1)
4144  qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4145 }
4146 
4147 /*
4148  * qsort comparator to sort ConstrCheck entries by name
4149  */
4150 static int
4151 CheckConstraintCmp(const void *a, const void *b)
4152 {
4153  const ConstrCheck *ca = (const ConstrCheck *) a;
4154  const ConstrCheck *cb = (const ConstrCheck *) b;
4155 
4156  return strcmp(ca->ccname, cb->ccname);
4157 }
4158 
4159 /*
4160  * RelationGetFKeyList -- get a list of foreign key info for the relation
4161  *
4162  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4163  * the given relation. This data is a direct copy of relevant fields from
4164  * pg_constraint. The list items are in no particular order.
4165  *
4166  * CAUTION: the returned list is part of the relcache's data, and could
4167  * vanish in a relcache entry reset. Callers must inspect or copy it
4168  * before doing anything that might trigger a cache flush, such as
4169  * system catalog accesses. copyObject() can be used if desired.
4170  * (We define it this way because current callers want to filter and
4171  * modify the list entries anyway, so copying would be a waste of time.)
4172  */
4173 List *
4175 {
4176  List *result;
4177  Relation conrel;
4178  SysScanDesc conscan;
4179  ScanKeyData skey;
4180  HeapTuple htup;
4181  List *oldlist;
4182  MemoryContext oldcxt;
4183 
4184  /* Quick exit if we already computed the list. */
4185  if (relation->rd_fkeyvalid)
4186  return relation->rd_fkeylist;
4187 
4188  /* Fast path: if it doesn't have any triggers, it can't have FKs */
4189  if (!relation->rd_rel->relhastriggers)
4190  return NIL;
4191 
4192  /*
4193  * We build the list we intend to return (in the caller's context) while
4194  * doing the scan. After successfully completing the scan, we copy that
4195  * list into the relcache entry. This avoids cache-context memory leakage
4196  * if we get some sort of error partway through.
4197  */
4198  result = NIL;
4199 
4200  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4201  ScanKeyInit(&skey,
4203  BTEqualStrategyNumber, F_OIDEQ,
4204  ObjectIdGetDatum(RelationGetRelid(relation)));
4205 
4207  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4208  NULL, 1, &skey);
4209 
4210  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4211  {
4212  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4213  ForeignKeyCacheInfo *info;
4214  Datum adatum;
4215  bool isnull;
4216  ArrayType *arr;
4217  int nelem;
4218 
4219  /* consider only foreign keys */
4220  if (constraint->contype != CONSTRAINT_FOREIGN)
4221  continue;
4222 
4223  info = makeNode(ForeignKeyCacheInfo);
4224  info->conrelid = constraint->conrelid;
4225  info->confrelid = constraint->confrelid;
4226 
4227  /* Extract data from conkey field */
4228  adatum = fastgetattr(htup, Anum_pg_constraint_conkey,
4229  conrel->rd_att, &isnull);
4230  if (isnull)
4231  elog(ERROR, "null conkey for rel %s",
4232  RelationGetRelationName(relation));
4233 
4234  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4235  nelem = ARR_DIMS(arr)[0];
4236  if (ARR_NDIM(arr) != 1 ||
4237  nelem < 1 ||
4238  nelem > INDEX_MAX_KEYS ||
4239  ARR_HASNULL(arr) ||
4240  ARR_ELEMTYPE(arr) != INT2OID)
4241  elog(ERROR, "conkey is not a 1-D smallint array");
4242 
4243  info->nkeys = nelem;
4244  memcpy(info->conkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4245 
4246  /* Likewise for confkey */
4247  adatum = fastgetattr(htup, Anum_pg_constraint_confkey,
4248  conrel->rd_att, &isnull);
4249  if (isnull)
4250  elog(ERROR, "null confkey for rel %s",
4251  RelationGetRelationName(relation));
4252 
4253  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4254  nelem = ARR_DIMS(arr)[0];
4255  if (ARR_NDIM(arr) != 1 ||
4256  nelem != info->nkeys ||
4257  ARR_HASNULL(arr) ||
4258  ARR_ELEMTYPE(arr) != INT2OID)
4259  elog(ERROR, "confkey is not a 1-D smallint array");
4260 
4261  memcpy(info->confkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4262 
4263  /* Likewise for conpfeqop */
4265  conrel->rd_att, &isnull);
4266  if (isnull)
4267  elog(ERROR, "null conpfeqop for rel %s",
4268  RelationGetRelationName(relation));
4269 
4270  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4271  nelem = ARR_DIMS(arr)[0];
4272  if (ARR_NDIM(arr) != 1 ||
4273  nelem != info->nkeys ||
4274  ARR_HASNULL(arr) ||
4275  ARR_ELEMTYPE(arr) != OIDOID)
4276  elog(ERROR, "conpfeqop is not a 1-D OID array");
4277 
4278  memcpy(info->conpfeqop, ARR_DATA_PTR(arr), nelem * sizeof(Oid));
4279 
4280  /* Add FK's node to the result list */
4281  result = lappend(result, info);
4282  }
4283 
4284  systable_endscan(conscan);
4285  heap_close(conrel, AccessShareLock);
4286 
4287  /* Now save a copy of the completed list in the relcache entry. */
4289  oldlist = relation->rd_fkeylist;
4290  relation->rd_fkeylist = copyObject(result);
4291  relation->rd_fkeyvalid = true;
4292  MemoryContextSwitchTo(oldcxt);
4293 
4294  /* Don't leak the old list, if there is one */
4295  list_free_deep(oldlist);
4296 
4297  return result;
4298 }
4299 
4300 /*
4301  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4302  *
4303  * The index list is created only if someone requests it. We scan pg_index
4304  * to find relevant indexes, and add the list to the relcache entry so that
4305  * we won't have to compute it again. Note that shared cache inval of a
4306  * relcache entry will delete the old list and set rd_indexvalid to 0,
4307  * so that we must recompute the index list on next request. This handles
4308  * creation or deletion of an index.
4309  *
4310  * Indexes that are marked not IndexIsLive are omitted from the returned list.
4311  * Such indexes are expected to be dropped momentarily, and should not be
4312  * touched at all by any caller of this function.
4313  *
4314  * The returned list is guaranteed to be sorted in order by OID. This is
4315  * needed by the executor, since for index types that we obtain exclusive
4316  * locks on when updating the index, all backends must lock the indexes in
4317  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4318  * consistent ordering would do, but ordering by OID is easy.
4319  *
4320  * Since shared cache inval causes the relcache's copy of the list to go away,
4321  * we return a copy of the list palloc'd in the caller's context. The caller
4322  * may list_free() the returned list after scanning it. This is necessary
4323  * since the caller will typically be doing syscache lookups on the relevant
4324  * indexes, and syscache lookup could cause SI messages to be processed!
4325  *
4326  * We also update rd_oidindex, which this module treats as effectively part
4327  * of the index list. rd_oidindex is valid when rd_indexvalid isn't zero;
4328  * it is the pg_class OID of a unique index on OID when the relation has one,
4329  * and InvalidOid if there is no such index.
4330  *
4331  * In exactly the same way, we update rd_pkindex, which is the OID of the
4332  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4333  * which is the pg_class OID of an index to be used as the relation's
4334  * replication identity index, or InvalidOid if there is no such index.
4335  */
4336 List *
4338 {
4339  Relation indrel;
4340  SysScanDesc indscan;
4341  ScanKeyData skey;
4342  HeapTuple htup;
4343  List *result;
4344  List *oldlist;
4345  char replident = relation->rd_rel->relreplident;
4346  Oid oidIndex = InvalidOid;
4347  Oid pkeyIndex = InvalidOid;
4348  Oid candidateIndex = InvalidOid;
4349  MemoryContext oldcxt;
4350 
4351  /* Quick exit if we already computed the list. */
4352  if (relation->rd_indexvalid != 0)
4353  return list_copy(relation->rd_indexlist);
4354 
4355  /*
4356  * We build the list we intend to return (in the caller's context) while
4357  * doing the scan. After successfully completing the scan, we copy that
4358  * list into the relcache entry. This avoids cache-context memory leakage
4359  * if we get some sort of error partway through.
4360  */
4361  result = NIL;
4362  oidIndex = InvalidOid;
4363 
4364  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4365  ScanKeyInit(&skey,
4367  BTEqualStrategyNumber, F_OIDEQ,
4368  ObjectIdGetDatum(RelationGetRelid(relation)));
4369 
4371  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4372  NULL, 1, &skey);
4373 
4374  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4375  {
4377  Datum indclassDatum;
4378  oidvector *indclass;
4379  bool isnull;
4380 
4381  /*
4382  * Ignore any indexes that are currently being dropped. This will
4383  * prevent them from being searched, inserted into, or considered in
4384  * HOT-safety decisions. It's unsafe to touch such an index at all
4385  * since its catalog entries could disappear at any instant.
4386  */
4387  if (!IndexIsLive(index))
4388  continue;
4389 
4390  /* Add index's OID to result list in the proper order */
4391  result = insert_ordered_oid(result, index->indexrelid);
4392 
4393  /*
4394  * indclass cannot be referenced directly through the C struct,
4395  * because it comes after the variable-width indkey field. Must
4396  * extract the datum the hard way...
4397  */
4398  indclassDatum = heap_getattr(htup,
4401  &isnull);
4402  Assert(!isnull);
4403  indclass = (oidvector *) DatumGetPointer(indclassDatum);
4404 
4405  /*
4406  * Invalid, non-unique, non-immediate or predicate indexes aren't
4407  * interesting for either oid indexes or replication identity indexes,
4408  * so don't check them.
4409  */
4410  if (!IndexIsValid(index) || !index->indisunique ||
4411  !index->indimmediate ||
4413  continue;
4414 
4415  /* Check to see if is a usable btree index on OID */
4416  if (index->indnatts == 1 &&
4417  index->indkey.values[0] == ObjectIdAttributeNumber &&
4418  indclass->values[0] == OID_BTREE_OPS_OID)
4419  oidIndex = index->indexrelid;
4420 
4421  /* remember primary key index if any */
4422  if (index->indisprimary)
4423  pkeyIndex = index->indexrelid;
4424 
4425  /* remember explicitly chosen replica index */
4426  if (index->indisreplident)
4427  candidateIndex = index->indexrelid;
4428  }
4429 
4430  systable_endscan(indscan);
4431 
4432  heap_close(indrel, AccessShareLock);
4433 
4434  /* Now save a copy of the completed list in the relcache entry. */
4436  oldlist = relation->rd_indexlist;
4437  relation->rd_indexlist = list_copy(result);
4438  relation->rd_oidindex = oidIndex;
4439  relation->rd_pkindex = pkeyIndex;
4440  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4441  relation->rd_replidindex = pkeyIndex;
4442  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4443  relation->rd_replidindex = candidateIndex;
4444  else
4445  relation->rd_replidindex = InvalidOid;
4446  relation->rd_indexvalid = 1;
4447  MemoryContextSwitchTo(oldcxt);
4448 
4449  /* Don't leak the old list, if there is one */
4450  list_free(oldlist);
4451 
4452  return result;
4453 }
4454 
4455 /*
4456  * RelationGetStatExtList
4457  * get a list of OIDs of extended statistics on this relation
4458  *
4459  * The statistics list is created only if someone requests it, in a way
4460  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4461  * relevant statistics, and add the list to the relcache entry so that we
4462  * won't have to compute it again. Note that shared cache inval of a
4463  * relcache entry will delete the old list and set rd_statvalid to 0,
4464  * so that we must recompute the statistics list on next request. This
4465  * handles creation or deletion of a statistic.
4466  *
4467  * The returned list is guaranteed to be sorted in order by OID, although
4468  * this is not currently needed.
4469  *
4470  * Since shared cache inval causes the relcache's copy of the list to go away,
4471  * we return a copy of the list palloc'd in the caller's context. The caller
4472  * may list_free() the returned list after scanning it. This is necessary
4473  * since the caller will typically be doing syscache lookups on the relevant
4474  * statistics, and syscache lookup could cause SI messages to be processed!
4475  */
4476 List *
4478 {
4479  Relation indrel;
4480  SysScanDesc indscan;
4481  ScanKeyData skey;
4482  HeapTuple htup;
4483  List *result;
4484  List *oldlist;
4485  MemoryContext oldcxt;
4486 
4487  /* Quick exit if we already computed the list. */
4488  if (relation->rd_statvalid != 0)
4489  return list_copy(relation->rd_statlist);
4490 
4491  /*
4492  * We build the list we intend to return (in the caller's context) while
4493  * doing the scan. After successfully completing the scan, we copy that
4494  * list into the relcache entry. This avoids cache-context memory leakage
4495  * if we get some sort of error partway through.
4496  */
4497  result = NIL;
4498 
4499  /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */
4500  ScanKeyInit(&skey,
4502  BTEqualStrategyNumber, F_OIDEQ,
4503  ObjectIdGetDatum(RelationGetRelid(relation)));
4504 
4506  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4507  NULL, 1, &skey);
4508 
4509  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4510  /* TODO maybe include only already built statistics? */
4511  result = insert_ordered_oid(result, HeapTupleGetOid(htup));
4512 
4513  systable_endscan(indscan);
4514 
4515  heap_close(indrel, AccessShareLock);
4516 
4517  /* Now save a copy of the completed list in the relcache entry. */
4519  oldlist = relation->rd_statlist;
4520  relation->rd_statlist = list_copy(result);
4521 
4522  relation->rd_statvalid = true;
4523  MemoryContextSwitchTo(oldcxt);
4524 
4525  /* Don't leak the old list, if there is one */
4526  list_free(oldlist);
4527 
4528  return result;
4529 }
4530 
4531 /*
4532  * insert_ordered_oid
4533  * Insert a new Oid into a sorted list of Oids, preserving ordering
4534  *
4535  * Building the ordered list this way is O(N^2), but with a pretty small
4536  * constant, so for the number of entries we expect it will probably be
4537  * faster than trying to apply qsort(). Most tables don't have very many
4538  * indexes...
4539  */
4540 static List *
4542 {
4543  ListCell *prev;
4544 
4545  /* Does the datum belong at the front? */
4546  if (list == NIL || datum < linitial_oid(list))
4547  return lcons_oid(datum, list);
4548  /* No, so find the entry it belongs after */
4549  prev = list_head(list);
4550  for (;;)
4551  {
4552  ListCell *curr = lnext(prev);
4553 
4554  if (curr == NULL || datum < lfirst_oid(curr))
4555  break; /* it belongs after 'prev', before 'curr' */
4556 
4557  prev = curr;
4558  }
4559  /* Insert datum into list after 'prev' */
4560  lappend_cell_oid(list, prev, datum);
4561  return list;
4562 }
4563 
4564 /*
4565  * RelationSetIndexList -- externally force the index list contents
4566  *
4567  * This is used to temporarily override what we think the set of valid
4568  * indexes is (including the presence or absence of an OID index).
4569  * The forcing will be valid only until transaction commit or abort.
4570  *
4571  * This should only be applied to nailed relations, because in a non-nailed
4572  * relation the hacked index list could be lost at any time due to SI
4573  * messages. In practice it is only used on pg_class (see REINDEX).
4574  *
4575  * It is up to the caller to make sure the given list is correctly ordered.
4576  *
4577  * We deliberately do not change rd_indexattr here: even when operating
4578  * with a temporary partial index list, HOT-update decisions must be made
4579  * correctly with respect to the full index set. It is up to the caller
4580  * to ensure that a correct rd_indexattr set has been cached before first
4581  * calling RelationSetIndexList; else a subsequent inquiry might cause a
4582  * wrong rd_indexattr set to get computed and cached. Likewise, we do not
4583  * touch rd_keyattr, rd_pkattr or rd_idattr.
4584  */
4585 void
4586 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
4587 {
4588  MemoryContext oldcxt;
4589 
4590  Assert(relation->rd_isnailed);
4591  /* Copy the list into the cache context (could fail for lack of mem) */
4593  indexIds = list_copy(indexIds);
4594  MemoryContextSwitchTo(oldcxt);
4595  /* Okay to replace old list */
4596  list_free(relation->rd_indexlist);
4597  relation->rd_indexlist = indexIds;
4598  relation->rd_oidindex = oidIndex;
4599  /*
4600  * For the moment, assume the target rel hasn't got a pk or replica
4601  * index. We'll load them on demand in the API that wraps access to them.
4602  */
4603  relation->rd_pkindex = InvalidOid;
4604  relation->rd_replidindex = InvalidOid;
4605  relation->rd_indexvalid = 2; /* mark list as forced */
4606  /* Flag relation as needing eoxact cleanup (to reset the list) */
4607  EOXactListAdd(relation);
4608 }
4609 
4610 /*
4611  * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
4612  *
4613  * Returns InvalidOid if there is no such index.
4614  */
4615 Oid
4617 {
4618  List *ilist;
4619 
4620  /*
4621  * If relation doesn't have OIDs at all, caller is probably confused. (We
4622  * could just silently return InvalidOid, but it seems better to throw an
4623  * assertion.)
4624  */
4625  Assert(relation->rd_rel->relhasoids);
4626 
4627  if (relation->rd_indexvalid == 0)
4628  {
4629  /* RelationGetIndexList does the heavy lifting. */
4630  ilist = RelationGetIndexList(relation);
4631  list_free(ilist);
4632  Assert(relation->rd_indexvalid != 0);
4633  }
4634 
4635  return relation->rd_oidindex;
4636 }
4637 
4638 /*
4639  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4640  *
4641  * Returns InvalidOid if there is no such index.
4642  */
4643 Oid
4645 {
4646  List *ilist;
4647 
4648  if (relation->rd_indexvalid == 0)
4649  {
4650  /* RelationGetIndexList does the heavy lifting. */
4651  ilist = RelationGetIndexList(relation);
4652  list_free(ilist);
4653  Assert(relation->rd_indexvalid != 0);
4654  }
4655 
4656  return relation->rd_pkindex;
4657 }
4658 
4659 /*
4660  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4661  *
4662  * Returns InvalidOid if there is no such index.
4663  */
4664 Oid
4666 {
4667  List *ilist;
4668 
4669  if (relation->rd_indexvalid == 0)
4670  {
4671  /* RelationGetIndexList does the heavy lifting. */
4672  ilist = RelationGetIndexList(relation);
4673  list_free(ilist);
4674  Assert(relation->rd_indexvalid != 0);
4675  }
4676 
4677  return relation->rd_replidindex;
4678 }
4679 
4680 /*
4681  * RelationGetIndexExpressions -- get the index expressions for an index
4682  *
4683  * We cache the result of transforming pg_index.indexprs into a node tree.
4684  * If the rel is not an index or has no expressional columns, we return NIL.
4685  * Otherwise, the returned tree is copied into the caller's memory context.
4686  * (We don't want to return a pointer to the relcache copy, since it could
4687  * disappear due to relcache invalidation.)
4688  */
4689 List *
4691 {
4692  List *result;
4693  Datum exprsDatum;
4694  bool isnull;
4695  char *exprsString;
4696  MemoryContext oldcxt;
4697 
4698  /* Quick exit if we already computed the result. */
4699  if (relation->rd_indexprs)
4700  return (List *) copyObject(relation->rd_indexprs);
4701 
4702  /* Quick exit if there is nothing to do. */
4703  if (relation->rd_indextuple == NULL ||
4705  return NIL;
4706 
4707  /*
4708  * We build the tree we intend to return in the caller's context. After
4709  * successfully completing the work, we copy it into the relcache entry.
4710  * This avoids problems if we get some sort of error partway through.
4711  */
4712  exprsDatum = heap_getattr(relation->rd_indextuple,
4715  &isnull);
4716  Assert(!isnull);
4717  exprsString = TextDatumGetCString(exprsDatum);
4718  result = (List *) stringToNode(exprsString);
4719  pfree(exprsString);
4720 
4721  /*
4722  * Run the expressions through eval_const_expressions. This is not just an
4723  * optimization, but is necessary, because the planner will be comparing
4724  * them to similarly-processed qual clauses, and may fail to detect valid
4725  * matches without this. We don't bother with canonicalize_qual, however.
4726  */
4727  result = (List *) eval_const_expressions(NULL, (Node *) result);
4728 
4729  result = (List *) canonicalize_qual((Expr *) result);
4730 
4731  /* May as well fix opfuncids too */
4732  fix_opfuncids((Node *) result);
4733 
4734  /* Now save a copy of the completed tree in the relcache entry. */
4735  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4736  relation->rd_indexprs = (List *) copyObject(result);
4737  MemoryContextSwitchTo(oldcxt);
4738 
4739  return result;
4740 }
4741 
4742 /*
4743  * RelationGetIndexPredicate -- get the index predicate for an index
4744  *
4745  * We cache the result of transforming pg_index.indpred into an implicit-AND
4746  * node tree (suitable for use in planning).
4747  * If the rel is not an index or has no predicate, we return NIL.
4748  * Otherwise, the returned tree is copied into the caller's memory context.
4749  * (We don't want to return a pointer to the relcache copy, since it could
4750  * disappear due to relcache invalidation.)
4751  */
4752 List *
4754 {
4755  List *result;
4756  Datum predDatum;
4757  bool isnull;
4758  char *predString;
4759  MemoryContext oldcxt;
4760 
4761  /* Quick exit if we already computed the result. */
4762  if (relation->rd_indpred)
4763  return (List *) copyObject(relation->rd_indpred);
4764 
4765  /* Quick exit if there is nothing to do. */
4766  if (relation->rd_indextuple == NULL ||
4768  return NIL;
4769 
4770  /*
4771  * We build the tree we intend to return in the caller's context. After
4772  * successfully completing the work, we copy it into the relcache entry.
4773  * This avoids problems if we get some sort of error partway through.
4774  */
4775  predDatum = heap_getattr(relation->rd_indextuple,
4778  &isnull);
4779  Assert(!isnull);
4780  predString = TextDatumGetCString(predDatum);
4781  result = (List *) stringToNode(predString);
4782  pfree(predString);
4783 
4784  /*
4785  * Run the expression through const-simplification and canonicalization.
4786  * This is not just an optimization, but is necessary, because the planner
4787  * will be comparing it to similarly-processed qual clauses, and may fail
4788  * to detect valid matches without this. This must match the processing
4789  * done to qual clauses in preprocess_expression()! (We can skip the
4790  * stuff involving subqueries, however, since we don't allow any in index
4791  * predicates.)
4792  */
4793  result = (List *) eval_const_expressions(NULL, (Node *) result);
4794 
4795  result = (List *) canonicalize_qual((Expr *) result);
4796 
4797  /* Also convert to implicit-AND format */
4798  result = make_ands_implicit((Expr *) result);
4799 
4800  /* May as well fix opfuncids too */
4801  fix_opfuncids((Node *) result);
4802 
4803  /* Now save a copy of the completed tree in the relcache entry. */
4804  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4805  relation->rd_indpred = (List *) copyObject(result);
4806  MemoryContextSwitchTo(oldcxt);
4807 
4808  return result;
4809 }
4810 
4811 /*
4812  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4813  *
4814  * The result has a bit set for each attribute used anywhere in the index
4815  * definitions of all the indexes on this relation. (This includes not only
4816  * simple index keys, but attributes used in expressions and partial-index
4817  * predicates.)
4818  *
4819  * Depending on attrKind, a bitmap covering the attnums for all index columns,
4820  * for all potential foreign key columns, or for all columns in the configured
4821  * replica identity index is returned.
4822  *
4823  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4824  * we can include system attributes (e.g., OID) in the bitmap representation.
4825  *
4826  * Caller had better hold at least RowExclusiveLock on the target relation
4827  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4828  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4829  * that lock level doesn't guarantee a stable set of indexes, so we have to
4830  * be prepared to retry here in case of a change in the set of indexes.
4831  *
4832  * The returned result is palloc'd in the caller's memory context and should
4833  * be bms_free'd when not needed anymore.
4834  */
4835 Bitmapset *
4837 {
4838  Bitmapset *indexattrs; /* indexed columns */
4839  Bitmapset *uindexattrs; /* columns in unique indexes */
4840  Bitmapset *pkindexattrs; /* columns in the primary index */
4841  Bitmapset *idindexattrs; /* columns in the replica identity */
4842  List *indexoidlist;
4843  List *newindexoidlist;
4844  Oid relpkindex;
4845  Oid relreplindex;
4846  ListCell *l;
4847  MemoryContext oldcxt;
4848 
4849  /* Quick exit if we already computed the result. */
4850  if (relation->rd_indexattr != NULL)
4851  {
4852  switch (attrKind)
4853  {
4854  case INDEX_ATTR_BITMAP_ALL:
4855  return bms_copy(relation->rd_indexattr);
4856  case INDEX_ATTR_BITMAP_KEY:
4857  return bms_copy(relation->rd_keyattr);
4859  return bms_copy(relation->rd_pkattr);
4861  return bms_copy(relation->rd_idattr);
4862  default:
4863  elog(ERROR, "unknown attrKind %u", attrKind);
4864  }
4865  }
4866 
4867  /* Fast path if definitely no indexes */
4868  if (!RelationGetForm(relation)->relhasindex)
4869  return NULL;
4870 
4871  /*
4872  * Get cached list of index OIDs. If we have to start over, we do so here.
4873  */
4874 restart:
4875  indexoidlist = RelationGetIndexList(relation);
4876 
4877  /* Fall out if no indexes (but relhasindex was set) */
4878  if (indexoidlist == NIL)
4879  return NULL;
4880 
4881  /*
4882  * Copy the rd_pkindex and rd_replidindex values computed by
4883  * RelationGetIndexList before proceeding. This is needed because a
4884  * relcache flush could occur inside index_open below, resetting the
4885  * fields managed by RelationGetIndexList. We need to do the work with
4886  * stable values of these fields.
4887  */
4888  relpkindex = relation->rd_pkindex;
4889  relreplindex = relation->rd_replidindex;
4890 
4891  /*
4892  * For each index, add referenced attributes to indexattrs.
4893  *
4894  * Note: we consider all indexes returned by RelationGetIndexList, even if
4895  * they are not indisready or indisvalid. This is important because an
4896  * index for which CREATE INDEX CONCURRENTLY has just started must be
4897  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4898  * CONCURRENTLY is far enough along that we should ignore the index, it
4899  * won't be returned at all by RelationGetIndexList.
4900  */
4901  indexattrs = NULL;
4902  uindexattrs = NULL;
4903  pkindexattrs = NULL;
4904  idindexattrs = NULL;
4905  foreach(l, indexoidlist)
4906  {
4907  Oid indexOid = lfirst_oid(l);
4908  Relation indexDesc;
4909  IndexInfo *indexInfo;
4910  int i;
4911  bool isKey; /* candidate key */
4912  bool isPK; /* primary key */
4913  bool isIDKey; /* replica identity index */
4914 
4915  indexDesc = index_open(indexOid, AccessShareLock);
4916 
4917  /* Extract index key information from the index's pg_index row */
4918  indexInfo = BuildIndexInfo(indexDesc);
4919 
4920  /* Can this index be referenced by a foreign key? */
4921  isKey = indexInfo->ii_Unique &&
4922  indexInfo->ii_Expressions == NIL &&
4923  indexInfo->ii_Predicate == NIL;
4924 
4925  /* Is this a primary key? */
4926  isPK = (indexOid == relpkindex);
4927 
4928  /* Is this index the configured (or default) replica identity? */
4929  isIDKey = (indexOid == relreplindex);
4930 
4931  /* Collect simple attribute references */
4932  for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
4933  {
4934  int attrnum = indexInfo->ii_KeyAttrNumbers[i];
4935 
4936  if (attrnum != 0)
4937  {
4938  indexattrs = bms_add_member(indexattrs,
4940 
4941  if (isKey)
4942  uindexattrs = bms_add_member(uindexattrs,
4944 
4945  if (isPK)
4946  pkindexattrs = bms_add_member(pkindexattrs,
4948 
4949  if (isIDKey)
4950  idindexattrs = bms_add_member(idindexattrs,
4952  }
4953  }
4954 
4955  /* Collect all attributes used in expressions, too */
4956  pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
4957 
4958  /* Collect all attributes in the index predicate, too */
4959  pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
4960 
4961  index_close(indexDesc, AccessShareLock);
4962  }
4963 
4964  /*
4965  * During one of the index_opens in the above loop, we might have received
4966  * a relcache flush event on this relcache entry, which might have been
4967  * signaling a change in the rel's index list. If so, we'd better start
4968  * over to ensure we deliver up-to-date attribute bitmaps.
4969  */
4970  newindexoidlist = RelationGetIndexList(relation);
4971  if (equal(indexoidlist, newindexoidlist) &&
4972  relpkindex == relation->rd_pkindex &&
4973  relreplindex == relation->rd_replidindex)
4974  {
4975  /* Still the same index set, so proceed */
4976  list_free(newindexoidlist);
4977  list_free(indexoidlist);
4978  }
4979  else
4980  {
4981  /* Gotta do it over ... might as well not leak memory */
4982  list_free(newindexoidlist);
4983  list_free(indexoidlist);
4984  bms_free(uindexattrs);
4985  bms_free(pkindexattrs);
4986  bms_free(idindexattrs);
4987  bms_free(indexattrs);
4988 
4989  goto restart;
4990  }
4991 
4992  /* Don't leak the old values of these bitmaps, if any */
4993  bms_free(relation->rd_indexattr);
4994  relation->rd_indexattr = NULL;
4995  bms_free(relation->rd_keyattr);
4996  relation->rd_keyattr = NULL;
4997  bms_free(relation->rd_pkattr);
4998  relation->rd_pkattr = NULL;
4999  bms_free(relation->rd_idattr);
5000  relation->rd_idattr = NULL;
5001 
5002  /*
5003  * Now save copies of the bitmaps in the relcache entry. We intentionally
5004  * set rd_indexattr last, because that's the one that signals validity of
5005  * the values; if we run out of memory before making that copy, we won't
5006  * leave the relcache entry looking like the other ones are valid but
5007  * empty.
5008  */
5010  relation->rd_keyattr = bms_copy(uindexattrs);
5011  relation->rd_pkattr = bms_copy(pkindexattrs);
5012  relation->rd_idattr = bms_copy(idindexattrs);
5013  relation->rd_indexattr = bms_copy(indexattrs);
5014  MemoryContextSwitchTo(oldcxt);
5015 
5016  /* We return our original working copy for caller to play with */
5017  switch (attrKind)
5018  {
5019  case INDEX_ATTR_BITMAP_ALL:
5020  return indexattrs;
5021  case INDEX_ATTR_BITMAP_KEY:
5022  return uindexattrs;
5024  return bms_copy(relation->rd_pkattr);
5026  return idindexattrs;
5027  default:
5028  elog(ERROR, "unknown attrKind %u", attrKind);
5029  return NULL;
5030  }
5031 }
5032 
5033 /*
5034  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5035  *
5036  * This should be called only for an index that is known to have an
5037  * associated exclusion constraint. It returns arrays (palloc'd in caller's
5038  * context) of the exclusion operator OIDs, their underlying functions'
5039  * OIDs, and their strategy numbers in the index's opclasses. We cache
5040  * all this information since it requires a fair amount of work to get.
5041  */
5042 void
5044  Oid **operators,
5045  Oid **procs,
5046  uint16 **strategies)
5047 {
5048  int ncols = indexRelation->rd_rel->relnatts;
5049  Oid *ops;
5050  Oid *funcs;
5051  uint16 *strats;
5052  Relation conrel;
5053  SysScanDesc conscan;
5054  ScanKeyData skey[1];
5055  HeapTuple htup;
5056  bool found;
5057  MemoryContext oldcxt;
5058  int i;
5059 
5060  /* Allocate result space in caller context */
5061  *operators = ops = (Oid *) palloc(sizeof(Oid) * ncols);
5062  *procs = funcs = (Oid *) palloc(sizeof(Oid) * ncols);
5063  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * ncols);
5064 
5065  /* Quick exit if we have the data cached already */
5066  if (indexRelation->rd_exclstrats != NULL)
5067  {
5068  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * ncols);
5069  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * ncols);
5070  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * ncols);
5071  return;
5072  }
5073 
5074  /*
5075  * Search pg_constraint for the constraint associated with the index. To
5076  * make this not too painfully slow, we use the index on conrelid; that
5077  * will hold the parent relation's OID not the index's own OID.
5078  */
5079  ScanKeyInit(&skey[0],
5081  BTEqualStrategyNumber, F_OIDEQ,
5082  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5083 
5085  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
5086  NULL, 1, skey);
5087  found = false;
5088 
5089  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5090  {
5092  Datum val;
5093  bool isnull;
5094  ArrayType *arr;
5095  int nelem;
5096 
5097  /* We want the exclusion constraint owning the index */
5098  if (conform->contype != CONSTRAINT_EXCLUSION ||
5099  conform->conindid != RelationGetRelid(indexRelation))
5100  continue;
5101 
5102  /* There should be only one */
5103  if (found)
5104  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5105  RelationGetRelationName(indexRelation));
5106  found = true;
5107 
5108  /* Extract the operator OIDS from conexclop */
5109  val = fastgetattr(htup,
5111  conrel->rd_att, &isnull);
5112  if (isnull)
5113  elog(ERROR, "null conexclop for rel %s",
5114  RelationGetRelationName(indexRelation));
5115 
5116  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5117  nelem = ARR_DIMS(arr)[0];
5118  if (ARR_NDIM(arr) != 1 ||
5119  nelem != ncols ||
5120  ARR_HASNULL(arr) ||
5121  ARR_ELEMTYPE(arr) != OIDOID)
5122  elog(ERROR, "conexclop is not a 1-D Oid array");
5123 
5124  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * ncols);
5125  }
5126 
5127  systable_endscan(conscan);
5128  heap_close(conrel, AccessShareLock);
5129 
5130  if (!found)
5131  elog(ERROR, "exclusion constraint record missing for rel %s",
5132  RelationGetRelationName(indexRelation));
5133 
5134  /* We need the func OIDs and strategy numbers too */
5135  for (i = 0; i < ncols; i++)
5136  {
5137  funcs[i] = get_opcode(ops[i]);
5138  strats[i] = get_op_opfamily_strategy(ops[i],
5139  indexRelation->rd_opfamily[i]);
5140  /* shouldn't fail, since it was checked at index creation */
5141  if (strats[i] == InvalidStrategy)
5142  elog(ERROR, "could not find strategy for operator %u in family %u",
5143  ops[i], indexRelation->rd_opfamily[i]);
5144  }
5145 
5146  /* Save a copy of the results in the relcache entry. */
5147  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5148  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * ncols);
5149  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * ncols);
5150  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * ncols);
5151  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * ncols);
5152  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * ncols);
5153  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * ncols);
5154  MemoryContextSwitchTo(oldcxt);
5155 }
5156 
5157 /*
5158  * Get publication actions for the given relation.
5159  */
5160 struct PublicationActions *
5162 {
5163  List *puboids;
5164  ListCell *lc;
5165  MemoryContext oldcxt;
5166  PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5167 
5168  if (relation->rd_pubactions)
5169  return memcpy(pubactions, relation->rd_pubactions,
5170  sizeof(PublicationActions));
5171 
5172  /* Fetch the publication membership info. */
5173  puboids = GetRelationPublications(RelationGetRelid(relation));
5174  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5175 
5176  foreach(lc, puboids)
5177  {
5178  Oid pubid = lfirst_oid(lc);
5179  HeapTuple tup;
5180  Form_pg_publication pubform;
5181 
5183 
5184  if (!HeapTupleIsValid(tup))
5185  elog(ERROR, "cache lookup failed for publication %u", pubid);
5186 
5187  pubform = (Form_pg_publication) GETSTRUCT(tup);
5188 
5189  pubactions->pubinsert |= pubform->pubinsert;
5190  pubactions->pubupdate |= pubform->pubupdate;
5191  pubactions->pubdelete |= pubform->pubdelete;
5192 
5193  ReleaseSysCache(tup);
5194 
5195  /*
5196  * If we know everything is replicated, there is no point to check
5197  * for other publications.
5198  */
5199  if (pubactions->pubinsert && pubactions->pubupdate &&
5200  pubactions->pubdelete)
5201  break;
5202  }
5203 
5204  if (relation->rd_pubactions)
5205  {
5206  pfree(relation->rd_pubactions);
5207  relation->rd_pubactions = NULL;
5208  }
5209 
5210  /* Now save copy of the actions in the relcache entry. */
5212  relation->rd_pubactions = palloc(sizeof(PublicationActions));
5213  memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5214  MemoryContextSwitchTo(oldcxt);
5215 
5216  return pubactions;
5217 }
5218 
5219 /*
5220  * Routines to support ereport() reports of relation-related errors
5221  *
5222  * These could have been put into elog.c, but it seems like a module layering
5223  * violation to have elog.c calling relcache or syscache stuff --- and we
5224  * definitely don't want elog.h including rel.h. So we put them here.
5225  */
5226 
5227 /*
5228  * errtable --- stores schema_name and table_name of a table
5229  * within the current errordata.
5230  */
5231 int
5233 {
5237 
5238  return 0; /* return value does not matter */
5239 }
5240 
5241 /*
5242  * errtablecol --- stores schema_name, table_name and column_name
5243  * of a table column within the current errordata.
5244  *
5245  * The column is specified by attribute number --- for most callers, this is
5246  * easier and less error-prone than getting the column name for themselves.
5247  */
5248 int
5249 errtablecol(Relation rel, int attnum)
5250 {
5251  TupleDesc reldesc = RelationGetDescr(rel);
5252  const char *colname;
5253 
5254  /* Use reldesc if it's a user attribute, else consult the catalogs */
5255  if (attnum > 0 && attnum <= reldesc->natts)
5256  colname = NameStr(reldesc->attrs[attnum - 1]->attname);
5257  else
5258  colname = get_relid_attribute_name(RelationGetRelid(rel), attnum);
5259 
5260  return errtablecolname(rel, colname);
5261 }
5262 
5263 /*
5264  * errtablecolname --- stores schema_name, table_name and column_name
5265  * of a table column within the current errordata, where the column name is
5266  * given directly rather than extracted from the relation's catalog data.
5267  *
5268  * Don't use this directly unless errtablecol() is inconvenient for some
5269  * reason. This might possibly be needed during intermediate states in ALTER
5270  * TABLE, for instance.
5271  */
5272 int
5273 errtablecolname(Relation rel, const char *colname)
5274 {
5275  errtable(rel);
5277 
5278  return 0; /* return value does not matter */
5279 }
5280 
5281 /*
5282  * errtableconstraint --- stores schema_name, table_name and constraint_name
5283  * of a table-related constraint within the current errordata.
5284  */
5285 int
5286 errtableconstraint(Relation rel, const char *conname)
5287 {
5288  errtable(rel);
5290 
5291  return 0; /* return value does not matter */
5292 }
5293 
5294 
5295 /*
5296  * load_relcache_init_file, write_relcache_init_file
5297  *
5298  * In late 1992, we started regularly having databases with more than
5299  * a thousand classes in them. With this number of classes, it became
5300  * critical to do indexed lookups on the system catalogs.
5301  *
5302  * Bootstrapping these lookups is very hard. We want to be able to
5303  * use an index on pg_attribute, for example, but in order to do so,
5304  * we must have read pg_attribute for the attributes in the index,
5305  * which implies that we need to use the index.
5306  *
5307  * In order to get around the problem, we do the following:
5308  *
5309  * + When the database system is initialized (at initdb time), we
5310  * don't use indexes. We do sequential scans.
5311  *
5312  * + When the backend is started up in normal mode, we load an image
5313  * of the appropriate relation descriptors, in internal format,
5314  * from an initialization file in the data/base/... directory.
5315  *
5316  * + If the initialization file isn't there, then we create the
5317  * relation descriptors using sequential scans and write 'em to
5318  * the initialization file for use by subsequent backends.
5319  *
5320  * As of Postgres 9.0, there is one local initialization file in each
5321  * database, plus one shared initialization file for shared catalogs.
5322  *
5323  * We could dispense with the initialization files and just build the
5324  * critical reldescs the hard way on every backend startup, but that
5325  * slows down backend startup noticeably.
5326  *
5327  * We can in fact go further, and save more relcache entries than
5328  * just the ones that are absolutely critical; this allows us to speed
5329  * up backend startup by not having to build such entries the hard way.
5330  * Presently, all the catalog and index entries that are referred to
5331  * by catcaches are stored in the initialization files.
5332  *
5333  * The same mechanism that detects when catcache and relcache entries
5334  * need to be invalidated (due to catalog updates) also arranges to
5335  * unlink the initialization files when the contents may be out of date.
5336  * The files will then be rebuilt during the next backend startup.
5337  */
5338 
5339 /*
5340  * load_relcache_init_file -- attempt to load cache from the shared
5341  * or local cache init file
5342  *
5343  * If successful, return TRUE and set criticalRelcachesBuilt or
5344  * criticalSharedRelcachesBuilt to true.
5345  * If not successful, return FALSE.
5346  *
5347  * NOTE: we assume we are already switched into CacheMemoryContext.
5348  */
5349 static bool
5351 {
5352  FILE *fp;
5353  char initfilename[MAXPGPATH];
5354  Relation *rels;
5355  int relno,
5356  num_rels,
5357  max_rels,
5358  nailed_rels,
5359  nailed_indexes,
5360  magic;
5361  int i;
5362 
5363  if (shared)
5364  snprintf(initfilename, sizeof(initfilename), "global/%s",
5366  else
5367  snprintf(initfilename, sizeof(initfilename), "%s/%s",
5369 
5370  fp = AllocateFile(initfilename, PG_BINARY_R);
5371  if (fp == NULL)
5372  return false;
5373 
5374  /*
5375  * Read the index relcache entries from the file. Note we will not enter
5376  * any of them into the cache if the read fails partway through; this
5377  * helps to guard against broken init files.
5378  */
5379  max_rels = 100;
5380  rels = (Relation *) palloc(max_rels * sizeof(Relation));
5381  num_rels = 0;
5382  nailed_rels = nailed_indexes = 0;
5383 
5384  /* check for correct magic number (compatible version) */
5385  if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5386  goto read_failed;
5387  if (magic != RELCACHE_INIT_FILEMAGIC)
5388  goto read_failed;
5389 
5390  for (relno = 0;; relno++)
5391  {
5392  Size len;
5393  size_t nread;
5394  Relation rel;
5395  Form_pg_class relform;
5396  bool has_not_null;
5397 
5398  /* first read the relation descriptor length */
5399  nread = fread(&len, 1, sizeof(len), fp);
5400  if (nread != sizeof(len))
5401  {
5402  if (nread == 0)
5403  break; /* end of file */
5404  goto read_failed;
5405  }
5406 
5407  /* safety check for incompatible relcache layout */
5408  if (len != sizeof(RelationData))
5409  goto read_failed;
5410 
5411  /* allocate another relcache header */
5412  if (num_rels >= max_rels)
5413  {
5414  max_rels *= 2;
5415  rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5416  }
5417 
5418  rel = rels[num_rels++] = (Relation) palloc(len);
5419 
5420  /* then, read the Relation structure */
5421  if (fread(rel, 1, len, fp) != len)
5422  goto read_failed;
5423 
5424  /* next read the relation tuple form */
5425  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5426  goto read_failed;
5427 
5428  relform = (Form_pg_class) palloc(len);
5429  if (fread(relform, 1, len, fp) != len)
5430  goto read_failed;
5431 
5432  rel->rd_rel = relform;
5433 
5434  /* initialize attribute tuple forms */
5435  rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
5436  relform->relhasoids);
5437  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5438 
5439  rel->rd_att->tdtypeid = relform->reltype;
5440  rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
5441 
5442  /* next read all the attribute tuple form data entries */
5443  has_not_null = false;
5444  for (i = 0; i < relform->relnatts; i++)
5445  {
5446  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5447  goto read_failed;
5448  if (len != ATTRIBUTE_FIXED_PART_SIZE)
5449  goto read_failed;
5450  if (fread(rel->rd_att->attrs[i], 1, len, fp) != len)
5451  goto read_failed;
5452 
5453  has_not_null |= rel->rd_att->attrs[i]->attnotnull;
5454  }
5455 
5456  /* next read the access method specific field */
5457  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5458  goto read_failed;
5459  if (len > 0)
5460  {
5461  rel->rd_options = palloc(len);
5462  if (fread(rel->rd_options, 1, len, fp) != len)
5463  goto read_failed;
5464  if (len != VARSIZE(rel->rd_options))
5465  goto read_failed; /* sanity check */
5466  }
5467  else
5468  {
5469  rel->rd_options = NULL;
5470  }
5471 
5472  /* mark not-null status */
5473  if (has_not_null)
5474  {
5475  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5476 
5477  constr->has_not_null = true;
5478  rel->rd_att->constr = constr;
5479  }
5480 
5481  /* If it's an index, there's more to do */
5482  if (rel->rd_rel->relkind == RELKIND_INDEX)
5483  {
5484  MemoryContext indexcxt;
5485  Oid *opfamily;
5486  Oid *opcintype;
5487  RegProcedure *support;
5488  int nsupport;
5489  int16 *indoption;
5490  Oid *indcollation;
5491 
5492  /* Count nailed indexes to ensure we have 'em all */
5493  if (rel->rd_isnailed)
5494  nailed_indexes++;
5495 
5496  /* next, read the pg_index tuple */
5497  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5498  goto read_failed;
5499 
5500  rel->rd_indextuple = (HeapTuple) palloc(len);
5501  if (fread(rel->rd_indextuple, 1, len, fp) != len)
5502  goto read_failed;
5503 
5504  /* Fix up internal pointers in the tuple -- see heap_copytuple */
5505  rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5507 
5508  /*
5509  * prepare index info context --- parameters should match
5510  * RelationInitIndexAccessInfo
5511  */
5515  rel->rd_indexcxt = indexcxt;
5516 
5517  /*
5518  * Now we can fetch the index AM's API struct. (We can't store
5519  * that in the init file, since it contains function pointers that
5520  * might vary across server executions. Fortunately, it should be
5521  * safe to call the amhandler even while bootstrapping indexes.)
5522  */
5523  InitIndexAmRoutine(rel);
5524 
5525  /* next, read the vector of opfamily OIDs */
5526  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5527  goto read_failed;
5528 
5529  opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5530  if (fread(opfamily, 1, len, fp) != len)
5531  goto read_failed;
5532 
5533  rel->rd_opfamily = opfamily;
5534 
5535  /* next, read the vector of opcintype OIDs */
5536  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5537  goto read_failed;
5538 
5539  opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5540  if (fread(opcintype, 1, len, fp) != len)
5541  goto read_failed;
5542 
5543  rel->rd_opcintype = opcintype;
5544 
5545  /* next, read the vector of support procedure OIDs */
5546  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5547  goto read_failed;
5548  support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5549  if (fread(support, 1, len, fp) != len)
5550  goto read_failed;
5551 
5552  rel->rd_support = support;
5553 
5554  /* next, read the vector of collation OIDs */
5555  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5556  goto read_failed;
5557 
5558  indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5559  if (fread(indcollation, 1, len, fp) != len)
5560  goto read_failed;
5561 
5562  rel->rd_indcollation = indcollation;
5563 
5564  /* finally, read the vector of indoption values */
5565  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5566  goto read_failed;
5567 
5568  indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5569  if (fread(indoption, 1, len, fp) != len)
5570  goto read_failed;
5571 
5572  rel->rd_indoption = indoption;
5573 
5574  /* set up zeroed fmgr-info vector */
5575  nsupport = relform->relnatts * rel->rd_amroutine->amsupport;
5576  rel->rd_supportinfo = (FmgrInfo *)
5577  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5578  }
5579  else
5580  {
5581  /* Count nailed rels to ensure we have 'em all */
5582  if (rel->rd_isnailed)
5583  nailed_rels++;
5584 
5585  Assert(rel->rd_index == NULL);
5586  Assert(rel->rd_indextuple == NULL);
5587  Assert(rel->rd_indexcxt == NULL);
5588  Assert(rel->rd_amroutine == NULL);
5589  Assert(rel->rd_opfamily == NULL);
5590  Assert(rel->rd_opcintype == NULL);
5591  Assert(rel->rd_support == NULL);
5592  Assert(rel->rd_supportinfo == NULL);
5593  Assert(rel->rd_indoption == NULL);
5594  Assert(rel->rd_indcollation == NULL);
5595  }
5596 
5597  /*
5598  * Rules and triggers are not saved (mainly because the internal
5599  * format is complex and subject to change). They must be rebuilt if
5600  * needed by RelationCacheInitializePhase3. This is not expected to
5601  * be a big performance hit since few system catalogs have such. Ditto
5602  * for RLS policy data, index expressions, predicates, exclusion info,
5603  * and FDW info.
5604  */
5605  rel->rd_rules = NULL;
5606  rel->rd_rulescxt = NULL;
5607  rel->trigdesc = NULL;
5608  rel->rd_rsdesc = NULL;
5609  rel->rd_partkeycxt = NULL;
5610  rel->rd_partkey = NULL;
5611  rel->rd_partdesc = NULL;
5612  rel->rd_partcheck = NIL;
5613  rel->rd_indexprs = NIL;
5614  rel->rd_indpred = NIL;
5615  rel->rd_exclops = NULL;
5616  rel->rd_exclprocs = NULL;
5617  rel->rd_exclstrats = NULL;
5618  rel->rd_fdwroutine = NULL;
5619 
5620  /*
5621  * Reset transient-state fields in the relcache entry
5622  */
5623  rel->rd_smgr = NULL;
5624  if (rel->rd_isnailed)
5625  rel->rd_refcnt = 1;
5626  else
5627  rel->rd_refcnt = 0;
5628  rel->rd_indexvalid = 0;
5629  rel->rd_fkeylist = NIL;
5630  rel->rd_fkeyvalid = false;
5631  rel->rd_indexlist = NIL;
5632  rel->rd_oidindex = InvalidOid;
5633  rel->rd_pkindex = InvalidOid;
5634  rel->rd_replidindex = InvalidOid;
5635  rel->rd_indexattr = NULL;
5636  rel->rd_keyattr = NULL;
5637  rel->rd_pkattr = NULL;
5638  rel->rd_idattr = NULL;
5639  rel->rd_pubactions = NULL;
5640  rel->rd_statvalid = false;
5641  rel->rd_statlist = NIL;
5644  rel->rd_amcache = NULL;
5645  MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
5646 
5647  /*
5648  * Recompute lock and physical addressing info. This is needed in
5649  * case the pg_internal.init file was copied from some other database
5650  * by CREATE DATABASE.
5651  */
5652  RelationInitLockInfo(rel);
5654  }
5655 
5656  /*
5657  * We reached the end of the init file without apparent problem. Did we
5658  * get the right number of nailed items? This is a useful crosscheck in
5659  * case the set of critical rels or indexes changes. However, that should
5660  * not happen in a normally-running system, so let's bleat if it does.
5661  *
5662  * For the shared init file, we're called before client authentication is