PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
relcache.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * relcache.c
4  * POSTGRES relation descriptor cache code
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/cache/relcache.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  * RelationCacheInitialize - initialize relcache (to empty)
18  * RelationCacheInitializePhase2 - initialize shared-catalog entries
19  * RelationCacheInitializePhase3 - finish initializing relcache
20  * RelationIdGetRelation - get a reldesc by relation id
21  * RelationClose - close an open relation
22  *
23  * NOTES
24  * The following code contains many undocumented hacks. Please be
25  * careful....
26  */
27 #include "postgres.h"
28 
29 #include <sys/file.h>
30 #include <fcntl.h>
31 #include <unistd.h>
32 
33 #include "access/htup_details.h"
34 #include "access/multixact.h"
35 #include "access/nbtree.h"
36 #include "access/reloptions.h"
37 #include "access/sysattr.h"
38 #include "access/xact.h"
39 #include "access/xlog.h"
40 #include "catalog/catalog.h"
41 #include "catalog/index.h"
42 #include "catalog/indexing.h"
43 #include "catalog/namespace.h"
44 #include "catalog/partition.h"
45 #include "catalog/pg_am.h"
46 #include "catalog/pg_amproc.h"
47 #include "catalog/pg_attrdef.h"
48 #include "catalog/pg_authid.h"
50 #include "catalog/pg_constraint.h"
51 #include "catalog/pg_database.h"
52 #include "catalog/pg_namespace.h"
53 #include "catalog/pg_opclass.h"
55 #include "catalog/pg_proc.h"
56 #include "catalog/pg_publication.h"
57 #include "catalog/pg_rewrite.h"
58 #include "catalog/pg_shseclabel.h"
61 #include "catalog/pg_tablespace.h"
62 #include "catalog/pg_trigger.h"
63 #include "catalog/pg_type.h"
64 #include "catalog/schemapg.h"
65 #include "catalog/storage.h"
66 #include "commands/policy.h"
67 #include "commands/trigger.h"
68 #include "miscadmin.h"
69 #include "nodes/nodeFuncs.h"
70 #include "optimizer/clauses.h"
71 #include "optimizer/prep.h"
72 #include "optimizer/var.h"
73 #include "rewrite/rewriteDefine.h"
74 #include "rewrite/rowsecurity.h"
75 #include "storage/lmgr.h"
76 #include "storage/smgr.h"
77 #include "utils/array.h"
78 #include "utils/builtins.h"
79 #include "utils/fmgroids.h"
80 #include "utils/inval.h"
81 #include "utils/lsyscache.h"
82 #include "utils/memutils.h"
83 #include "utils/relmapper.h"
84 #include "utils/resowner_private.h"
85 #include "utils/snapmgr.h"
86 #include "utils/syscache.h"
87 #include "utils/tqual.h"
88 
89 
90 /*
91  * name of relcache init file(s), used to speed up backend startup
92  */
93 #define RELCACHE_INIT_FILENAME "pg_internal.init"
94 
95 #define RELCACHE_INIT_FILEMAGIC 0x573266 /* version ID value */
96 
97 /*
98  * hardcoded tuple descriptors, contents generated by genbki.pl
99  */
100 static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
101 static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
102 static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
103 static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
104 static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
105 static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_authid};
106 static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
107 static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
108 static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
109 static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
110 
111 /*
112  * Hash tables that index the relation cache
113  *
114  * We used to index the cache by both name and OID, but now there
115  * is only an index by OID.
116  */
117 typedef struct relidcacheent
118 {
121 } RelIdCacheEnt;
122 
124 
125 /*
126  * This flag is false until we have prepared the critical relcache entries
127  * that are needed to do indexscans on the tables read by relcache building.
128  */
130 
131 /*
132  * This flag is false until we have prepared the critical relcache entries
133  * for shared catalogs (which are the tables needed for login).
134  */
136 
137 /*
138  * This counter counts relcache inval events received since backend startup
139  * (but only for rels that are actually in cache). Presently, we use it only
140  * to detect whether data about to be written by write_relcache_init_file()
141  * might already be obsolete.
142  */
143 static long relcacheInvalsReceived = 0L;
144 
145 /*
146  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
147  * cleanup work. This list intentionally has limited size; if it overflows,
148  * we fall back to scanning the whole hashtable. There is no value in a very
149  * large list because (1) at some point, a hash_seq_search scan is faster than
150  * retail lookups, and (2) the value of this is to reduce EOXact work for
151  * short transactions, which can't have dirtied all that many tables anyway.
152  * EOXactListAdd() does not bother to prevent duplicate list entries, so the
153  * cleanup processing must be idempotent.
154  */
155 #define MAX_EOXACT_LIST 32
157 static int eoxact_list_len = 0;
158 static bool eoxact_list_overflowed = false;
159 
160 #define EOXactListAdd(rel) \
161  do { \
162  if (eoxact_list_len < MAX_EOXACT_LIST) \
163  eoxact_list[eoxact_list_len++] = (rel)->rd_id; \
164  else \
165  eoxact_list_overflowed = true; \
166  } while (0)
167 
168 /*
169  * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact
170  * cleanup work. The array expands as needed; there is no hashtable because
171  * we don't need to access individual items except at EOXact.
172  */
174 static int NextEOXactTupleDescNum = 0;
175 static int EOXactTupleDescArrayLen = 0;
176 
177 /*
178  * macros to manipulate the lookup hashtable
179  */
180 #define RelationCacheInsert(RELATION, replace_allowed) \
181 do { \
182  RelIdCacheEnt *hentry; bool found; \
183  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
184  (void *) &((RELATION)->rd_id), \
185  HASH_ENTER, &found); \
186  if (found) \
187  { \
188  /* see comments in RelationBuildDesc and RelationBuildLocalRelation */ \
189  Relation _old_rel = hentry->reldesc; \
190  Assert(replace_allowed); \
191  hentry->reldesc = (RELATION); \
192  if (RelationHasReferenceCountZero(_old_rel)) \
193  RelationDestroyRelation(_old_rel, false); \
194  else if (!IsBootstrapProcessingMode()) \
195  elog(WARNING, "leaking still-referenced relcache entry for \"%s\"", \
196  RelationGetRelationName(_old_rel)); \
197  } \
198  else \
199  hentry->reldesc = (RELATION); \
200 } while(0)
201 
202 #define RelationIdCacheLookup(ID, RELATION) \
203 do { \
204  RelIdCacheEnt *hentry; \
205  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
206  (void *) &(ID), \
207  HASH_FIND, NULL); \
208  if (hentry) \
209  RELATION = hentry->reldesc; \
210  else \
211  RELATION = NULL; \
212 } while(0)
213 
214 #define RelationCacheDelete(RELATION) \
215 do { \
216  RelIdCacheEnt *hentry; \
217  hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
218  (void *) &((RELATION)->rd_id), \
219  HASH_REMOVE, NULL); \
220  if (hentry == NULL) \
221  elog(WARNING, "failed to delete relcache entry for OID %u", \
222  (RELATION)->rd_id); \
223 } while(0)
224 
225 
226 /*
227  * Special cache for opclass-related information
228  *
229  * Note: only default support procs get cached, ie, those with
230  * lefttype = righttype = opcintype.
231  */
232 typedef struct opclasscacheent
233 {
234  Oid opclassoid; /* lookup key: OID of opclass */
235  bool valid; /* set TRUE after successful fill-in */
236  StrategyNumber numSupport; /* max # of support procs (from pg_am) */
237  Oid opcfamily; /* OID of opclass's family */
238  Oid opcintype; /* OID of opclass's declared input type */
239  RegProcedure *supportProcs; /* OIDs of support procedures */
241 
242 static HTAB *OpClassCache = NULL;
243 
244 
245 /* non-export function prototypes */
246 
247 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
248 static void RelationClearRelation(Relation relation, bool rebuild);
249 
250 static void RelationReloadIndexInfo(Relation relation);
251 static void RelationFlushRelation(Relation relation);
253 static void AtEOXact_cleanup(Relation relation, bool isCommit);
254 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
255  SubTransactionId mySubid, SubTransactionId parentSubid);
256 static bool load_relcache_init_file(bool shared);
257 static void write_relcache_init_file(bool shared);
258 static void write_item(const void *data, Size len, FILE *fp);
259 
260 static void formrdesc(const char *relationName, Oid relationReltype,
261  bool isshared, bool hasoids,
262  int natts, const FormData_pg_attribute *attrs);
263 
264 static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
266 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
267 static void RelationBuildTupleDesc(Relation relation);
268 static void RelationBuildPartitionKey(Relation relation);
270 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
271 static void RelationInitPhysicalAddr(Relation relation);
272 static void load_critical_index(Oid indexoid, Oid heapoid);
273 static TupleDesc GetPgClassDescriptor(void);
274 static TupleDesc GetPgIndexDescriptor(void);
275 static void AttrDefaultFetch(Relation relation);
276 static void CheckConstraintFetch(Relation relation);
277 static int CheckConstraintCmp(const void *a, const void *b);
278 static List *insert_ordered_oid(List *list, Oid datum);
279 static void InitIndexAmRoutine(Relation relation);
280 static void IndexSupportInitialize(oidvector *indclass,
281  RegProcedure *indexSupport,
282  Oid *opFamily,
283  Oid *opcInType,
284  StrategyNumber maxSupportNumber,
285  AttrNumber maxAttributeNumber);
286 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
287  StrategyNumber numSupport);
288 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
289 static void unlink_initfile(const char *initfilename);
290 static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
291  PartitionDesc partdesc2);
292 
293 
294 /*
295  * ScanPgRelation
296  *
297  * This is used by RelationBuildDesc to find a pg_class
298  * tuple matching targetRelId. The caller must hold at least
299  * AccessShareLock on the target relid to prevent concurrent-update
300  * scenarios; it isn't guaranteed that all scans used to build the
301  * relcache entry will use the same snapshot. If, for example,
302  * an attribute were to be added after scanning pg_class and before
303  * scanning pg_attribute, relnatts wouldn't match.
304  *
305  * NB: the returned tuple has been copied into palloc'd storage
306  * and must eventually be freed with heap_freetuple.
307  */
308 static HeapTuple
309 ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
310 {
311  HeapTuple pg_class_tuple;
312  Relation pg_class_desc;
313  SysScanDesc pg_class_scan;
314  ScanKeyData key[1];
315  Snapshot snapshot;
316 
317  /*
318  * If something goes wrong during backend startup, we might find ourselves
319  * trying to read pg_class before we've selected a database. That ain't
320  * gonna work, so bail out with a useful error message. If this happens,
321  * it probably means a relcache entry that needs to be nailed isn't.
322  */
323  if (!OidIsValid(MyDatabaseId))
324  elog(FATAL, "cannot read pg_class without having selected a database");
325 
326  /*
327  * form a scan key
328  */
329  ScanKeyInit(&key[0],
331  BTEqualStrategyNumber, F_OIDEQ,
332  ObjectIdGetDatum(targetRelId));
333 
334  /*
335  * Open pg_class and fetch a tuple. Force heap scan if we haven't yet
336  * built the critical relcache entries (this includes initdb and startup
337  * without a pg_internal.init file). The caller can also force a heap
338  * scan by setting indexOK == false.
339  */
340  pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
341 
342  /*
343  * The caller might need a tuple that's newer than the one the historic
344  * snapshot; currently the only case requiring to do so is looking up the
345  * relfilenode of non mapped system relations during decoding.
346  */
347  if (force_non_historic)
349  else
351 
352  pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
353  indexOK && criticalRelcachesBuilt,
354  snapshot,
355  1, key);
356 
357  pg_class_tuple = systable_getnext(pg_class_scan);
358 
359  /*
360  * Must copy tuple before releasing buffer.
361  */
362  if (HeapTupleIsValid(pg_class_tuple))
363  pg_class_tuple = heap_copytuple(pg_class_tuple);
364 
365  /* all done */
366  systable_endscan(pg_class_scan);
367  heap_close(pg_class_desc, AccessShareLock);
368 
369  return pg_class_tuple;
370 }
371 
372 /*
373  * AllocateRelationDesc
374  *
375  * This is used to allocate memory for a new relation descriptor
376  * and initialize the rd_rel field from the given pg_class tuple.
377  */
378 static Relation
380 {
381  Relation relation;
382  MemoryContext oldcxt;
383  Form_pg_class relationForm;
384 
385  /* Relcache entries must live in CacheMemoryContext */
387 
388  /*
389  * allocate and zero space for new relation descriptor
390  */
391  relation = (Relation) palloc0(sizeof(RelationData));
392 
393  /* make sure relation is marked as having no open file yet */
394  relation->rd_smgr = NULL;
395 
396  /*
397  * Copy the relation tuple form
398  *
399  * We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
400  * variable-length fields (relacl, reloptions) are NOT stored in the
401  * relcache --- there'd be little point in it, since we don't copy the
402  * tuple's nulls bitmap and hence wouldn't know if the values are valid.
403  * Bottom line is that relacl *cannot* be retrieved from the relcache. Get
404  * it from the syscache if you need it. The same goes for the original
405  * form of reloptions (however, we do store the parsed form of reloptions
406  * in rd_options).
407  */
408  relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
409 
410  memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
411 
412  /* initialize relation tuple form */
413  relation->rd_rel = relationForm;
414 
415  /* and allocate attribute tuple form storage */
416  relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts,
417  relationForm->relhasoids);
418  /* which we mark as a reference-counted tupdesc */
419  relation->rd_att->tdrefcount = 1;
420 
421  MemoryContextSwitchTo(oldcxt);
422 
423  return relation;
424 }
425 
426 /*
427  * RelationParseRelOptions
428  * Convert pg_class.reloptions into pre-parsed rd_options
429  *
430  * tuple is the real pg_class tuple (not rd_rel!) for relation
431  *
432  * Note: rd_rel and (if an index) rd_amroutine must be valid already
433  */
434 static void
436 {
437  bytea *options;
438 
439  relation->rd_options = NULL;
440 
441  /* Fall out if relkind should not have options */
442  switch (relation->rd_rel->relkind)
443  {
444  case RELKIND_RELATION:
445  case RELKIND_TOASTVALUE:
446  case RELKIND_INDEX:
447  case RELKIND_VIEW:
448  case RELKIND_MATVIEW:
450  break;
451  default:
452  return;
453  }
454 
455  /*
456  * Fetch reloptions from tuple; have to use a hardwired descriptor because
457  * we might not have any other for pg_class yet (consider executing this
458  * code for pg_class itself)
459  */
460  options = extractRelOptions(tuple,
462  relation->rd_rel->relkind == RELKIND_INDEX ?
463  relation->rd_amroutine->amoptions : NULL);
464 
465  /*
466  * Copy parsed data into CacheMemoryContext. To guard against the
467  * possibility of leaks in the reloptions code, we want to do the actual
468  * parsing in the caller's memory context and copy the results into
469  * CacheMemoryContext after the fact.
470  */
471  if (options)
472  {
474  VARSIZE(options));
475  memcpy(relation->rd_options, options, VARSIZE(options));
476  pfree(options);
477  }
478 }
479 
480 /*
481  * RelationBuildTupleDesc
482  *
483  * Form the relation's tuple descriptor from information in
484  * the pg_attribute, pg_attrdef & pg_constraint system catalogs.
485  */
486 static void
488 {
489  HeapTuple pg_attribute_tuple;
490  Relation pg_attribute_desc;
491  SysScanDesc pg_attribute_scan;
492  ScanKeyData skey[2];
493  int need;
494  TupleConstr *constr;
495  AttrDefault *attrdef = NULL;
496  int ndef = 0;
497 
498  /* copy some fields from pg_class row to rd_att */
499  relation->rd_att->tdtypeid = relation->rd_rel->reltype;
500  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
501  relation->rd_att->tdhasoid = relation->rd_rel->relhasoids;
502 
504  sizeof(TupleConstr));
505  constr->has_not_null = false;
506 
507  /*
508  * Form a scan key that selects only user attributes (attnum > 0).
509  * (Eliminating system attribute rows at the index level is lots faster
510  * than fetching them.)
511  */
512  ScanKeyInit(&skey[0],
514  BTEqualStrategyNumber, F_OIDEQ,
516  ScanKeyInit(&skey[1],
518  BTGreaterStrategyNumber, F_INT2GT,
519  Int16GetDatum(0));
520 
521  /*
522  * Open pg_attribute and begin a scan. Force heap scan if we haven't yet
523  * built the critical relcache entries (this includes initdb and startup
524  * without a pg_internal.init file).
525  */
526  pg_attribute_desc = heap_open(AttributeRelationId, AccessShareLock);
527  pg_attribute_scan = systable_beginscan(pg_attribute_desc,
530  NULL,
531  2, skey);
532 
533  /*
534  * add attribute data to relation->rd_att
535  */
536  need = relation->rd_rel->relnatts;
537 
538  while (HeapTupleIsValid(pg_attribute_tuple = systable_getnext(pg_attribute_scan)))
539  {
540  Form_pg_attribute attp;
541 
542  attp = (Form_pg_attribute) GETSTRUCT(pg_attribute_tuple);
543 
544  if (attp->attnum <= 0 ||
545  attp->attnum > relation->rd_rel->relnatts)
546  elog(ERROR, "invalid attribute number %d for %s",
547  attp->attnum, RelationGetRelationName(relation));
548 
549  memcpy(TupleDescAttr(relation->rd_att, attp->attnum - 1),
550  attp,
552 
553  /* Update constraint/default info */
554  if (attp->attnotnull)
555  constr->has_not_null = true;
556 
557  if (attp->atthasdef)
558  {
559  if (attrdef == NULL)
560  attrdef = (AttrDefault *)
562  relation->rd_rel->relnatts *
563  sizeof(AttrDefault));
564  attrdef[ndef].adnum = attp->attnum;
565  attrdef[ndef].adbin = NULL;
566  ndef++;
567  }
568  need--;
569  if (need == 0)
570  break;
571  }
572 
573  /*
574  * end the scan and close the attribute relation
575  */
576  systable_endscan(pg_attribute_scan);
577  heap_close(pg_attribute_desc, AccessShareLock);
578 
579  if (need != 0)
580  elog(ERROR, "catalog is missing %d attribute(s) for relid %u",
581  need, RelationGetRelid(relation));
582 
583  /*
584  * The attcacheoff values we read from pg_attribute should all be -1
585  * ("unknown"). Verify this if assert checking is on. They will be
586  * computed when and if needed during tuple access.
587  */
588 #ifdef USE_ASSERT_CHECKING
589  {
590  int i;
591 
592  for (i = 0; i < relation->rd_rel->relnatts; i++)
593  Assert(TupleDescAttr(relation->rd_att, i)->attcacheoff == -1);
594  }
595 #endif
596 
597  /*
598  * However, we can easily set the attcacheoff value for the first
599  * attribute: it must be zero. This eliminates the need for special cases
600  * for attnum=1 that used to exist in fastgetattr() and index_getattr().
601  */
602  if (relation->rd_rel->relnatts > 0)
603  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
604 
605  /*
606  * Set up constraint/default info
607  */
608  if (constr->has_not_null || ndef > 0 || relation->rd_rel->relchecks)
609  {
610  relation->rd_att->constr = constr;
611 
612  if (ndef > 0) /* DEFAULTs */
613  {
614  if (ndef < relation->rd_rel->relnatts)
615  constr->defval = (AttrDefault *)
616  repalloc(attrdef, ndef * sizeof(AttrDefault));
617  else
618  constr->defval = attrdef;
619  constr->num_defval = ndef;
620  AttrDefaultFetch(relation);
621  }
622  else
623  constr->num_defval = 0;
624 
625  if (relation->rd_rel->relchecks > 0) /* CHECKs */
626  {
627  constr->num_check = relation->rd_rel->relchecks;
628  constr->check = (ConstrCheck *)
630  constr->num_check * sizeof(ConstrCheck));
631  CheckConstraintFetch(relation);
632  }
633  else
634  constr->num_check = 0;
635  }
636  else
637  {
638  pfree(constr);
639  relation->rd_att->constr = NULL;
640  }
641 }
642 
643 /*
644  * RelationBuildRuleLock
645  *
646  * Form the relation's rewrite rules from information in
647  * the pg_rewrite system catalog.
648  *
649  * Note: The rule parsetrees are potentially very complex node structures.
650  * To allow these trees to be freed when the relcache entry is flushed,
651  * we make a private memory context to hold the RuleLock information for
652  * each relcache entry that has associated rules. The context is used
653  * just for rule info, not for any other subsidiary data of the relcache
654  * entry, because that keeps the update logic in RelationClearRelation()
655  * manageable. The other subsidiary data structures are simple enough
656  * to be easy to free explicitly, anyway.
657  */
658 static void
660 {
661  MemoryContext rulescxt;
662  MemoryContext oldcxt;
663  HeapTuple rewrite_tuple;
664  Relation rewrite_desc;
665  TupleDesc rewrite_tupdesc;
666  SysScanDesc rewrite_scan;
667  ScanKeyData key;
668  RuleLock *rulelock;
669  int numlocks;
670  RewriteRule **rules;
671  int maxlocks;
672 
673  /*
674  * Make the private context. Assume it'll not contain much data.
675  */
677  RelationGetRelationName(relation),
679  relation->rd_rulescxt = rulescxt;
680 
681  /*
682  * allocate an array to hold the rewrite rules (the array is extended if
683  * necessary)
684  */
685  maxlocks = 4;
686  rules = (RewriteRule **)
687  MemoryContextAlloc(rulescxt, sizeof(RewriteRule *) * maxlocks);
688  numlocks = 0;
689 
690  /*
691  * form a scan key
692  */
693  ScanKeyInit(&key,
695  BTEqualStrategyNumber, F_OIDEQ,
697 
698  /*
699  * open pg_rewrite and begin a scan
700  *
701  * Note: since we scan the rules using RewriteRelRulenameIndexId, we will
702  * be reading the rules in name order, except possibly during
703  * emergency-recovery operations (ie, IgnoreSystemIndexes). This in turn
704  * ensures that rules will be fired in name order.
705  */
707  rewrite_tupdesc = RelationGetDescr(rewrite_desc);
708  rewrite_scan = systable_beginscan(rewrite_desc,
710  true, NULL,
711  1, &key);
712 
713  while (HeapTupleIsValid(rewrite_tuple = systable_getnext(rewrite_scan)))
714  {
715  Form_pg_rewrite rewrite_form = (Form_pg_rewrite) GETSTRUCT(rewrite_tuple);
716  bool isnull;
717  Datum rule_datum;
718  char *rule_str;
719  RewriteRule *rule;
720 
721  rule = (RewriteRule *) MemoryContextAlloc(rulescxt,
722  sizeof(RewriteRule));
723 
724  rule->ruleId = HeapTupleGetOid(rewrite_tuple);
725 
726  rule->event = rewrite_form->ev_type - '0';
727  rule->enabled = rewrite_form->ev_enabled;
728  rule->isInstead = rewrite_form->is_instead;
729 
730  /*
731  * Must use heap_getattr to fetch ev_action and ev_qual. Also, the
732  * rule strings are often large enough to be toasted. To avoid
733  * leaking memory in the caller's context, do the detoasting here so
734  * we can free the detoasted version.
735  */
736  rule_datum = heap_getattr(rewrite_tuple,
738  rewrite_tupdesc,
739  &isnull);
740  Assert(!isnull);
741  rule_str = TextDatumGetCString(rule_datum);
742  oldcxt = MemoryContextSwitchTo(rulescxt);
743  rule->actions = (List *) stringToNode(rule_str);
744  MemoryContextSwitchTo(oldcxt);
745  pfree(rule_str);
746 
747  rule_datum = heap_getattr(rewrite_tuple,
749  rewrite_tupdesc,
750  &isnull);
751  Assert(!isnull);
752  rule_str = TextDatumGetCString(rule_datum);
753  oldcxt = MemoryContextSwitchTo(rulescxt);
754  rule->qual = (Node *) stringToNode(rule_str);
755  MemoryContextSwitchTo(oldcxt);
756  pfree(rule_str);
757 
758  /*
759  * We want the rule's table references to be checked as though by the
760  * table owner, not the user referencing the rule. Therefore, scan
761  * through the rule's actions and set the checkAsUser field on all
762  * rtable entries. We have to look at the qual as well, in case it
763  * contains sublinks.
764  *
765  * The reason for doing this when the rule is loaded, rather than when
766  * it is stored, is that otherwise ALTER TABLE OWNER would have to
767  * grovel through stored rules to update checkAsUser fields. Scanning
768  * the rule tree during load is relatively cheap (compared to
769  * constructing it in the first place), so we do it here.
770  */
771  setRuleCheckAsUser((Node *) rule->actions, relation->rd_rel->relowner);
772  setRuleCheckAsUser(rule->qual, relation->rd_rel->relowner);
773 
774  if (numlocks >= maxlocks)
775  {
776  maxlocks *= 2;
777  rules = (RewriteRule **)
778  repalloc(rules, sizeof(RewriteRule *) * maxlocks);
779  }
780  rules[numlocks++] = rule;
781  }
782 
783  /*
784  * end the scan and close the attribute relation
785  */
786  systable_endscan(rewrite_scan);
787  heap_close(rewrite_desc, AccessShareLock);
788 
789  /*
790  * there might not be any rules (if relhasrules is out-of-date)
791  */
792  if (numlocks == 0)
793  {
794  relation->rd_rules = NULL;
795  relation->rd_rulescxt = NULL;
796  MemoryContextDelete(rulescxt);
797  return;
798  }
799 
800  /*
801  * form a RuleLock and insert into relation
802  */
803  rulelock = (RuleLock *) MemoryContextAlloc(rulescxt, sizeof(RuleLock));
804  rulelock->numLocks = numlocks;
805  rulelock->rules = rules;
806 
807  relation->rd_rules = rulelock;
808 }
809 
810 /*
811  * RelationBuildPartitionKey
812  * Build and attach to relcache partition key data of relation
813  *
814  * Partitioning key data is stored in CacheMemoryContext to ensure it survives
815  * as long as the relcache. To avoid leaking memory in that context in case
816  * of an error partway through this function, we build the structure in the
817  * working context (which must be short-lived) and copy the completed
818  * structure into the cache memory.
819  *
820  * Also, since the structure being created here is sufficiently complex, we
821  * make a private child context of CacheMemoryContext for each relation that
822  * has associated partition key information. That means no complicated logic
823  * to free individual elements whenever the relcache entry is flushed - just
824  * delete the context.
825  */
826 static void
828 {
830  HeapTuple tuple;
831  bool isnull;
832  int i;
833  PartitionKey key;
834  AttrNumber *attrs;
835  oidvector *opclass;
836  oidvector *collation;
837  ListCell *partexprs_item;
838  Datum datum;
839  MemoryContext partkeycxt,
840  oldcxt;
841 
842  tuple = SearchSysCache1(PARTRELID,
844 
845  /*
846  * The following happens when we have created our pg_class entry but not
847  * the pg_partitioned_table entry yet.
848  */
849  if (!HeapTupleIsValid(tuple))
850  return;
851 
852  key = (PartitionKey) palloc0(sizeof(PartitionKeyData));
853 
854  /* Fixed-length attributes */
855  form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
856  key->strategy = form->partstrat;
857  key->partnatts = form->partnatts;
858 
859  /*
860  * We can rely on the first variable-length attribute being mapped to the
861  * relevant field of the catalog's C struct, because all previous
862  * attributes are non-nullable and fixed-length.
863  */
864  attrs = form->partattrs.values;
865 
866  /* But use the hard way to retrieve further variable-length attributes */
867  /* Operator class */
868  datum = SysCacheGetAttr(PARTRELID, tuple,
870  Assert(!isnull);
871  opclass = (oidvector *) DatumGetPointer(datum);
872 
873  /* Collation */
874  datum = SysCacheGetAttr(PARTRELID, tuple,
876  Assert(!isnull);
877  collation = (oidvector *) DatumGetPointer(datum);
878 
879  /* Expressions */
880  datum = SysCacheGetAttr(PARTRELID, tuple,
882  if (!isnull)
883  {
884  char *exprString;
885  Node *expr;
886 
887  exprString = TextDatumGetCString(datum);
888  expr = stringToNode(exprString);
889  pfree(exprString);
890 
891  /*
892  * Run the expressions through const-simplification since the planner
893  * will be comparing them to similarly-processed qual clause operands,
894  * and may fail to detect valid matches without this step. We don't
895  * need to bother with canonicalize_qual() though, because partition
896  * expressions are not full-fledged qualification clauses.
897  */
898  expr = eval_const_expressions(NULL, (Node *) expr);
899 
900  /* May as well fix opfuncids too */
901  fix_opfuncids((Node *) expr);
902  key->partexprs = (List *) expr;
903  }
904 
905  key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
906  key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
907  key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
908  key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
909 
910  key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
911 
912  /* Gather type and collation info as well */
913  key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
914  key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
915  key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
916  key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
917  key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
918  key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
919 
920  /* Copy partattrs and fill other per-attribute info */
921  memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
922  partexprs_item = list_head(key->partexprs);
923  for (i = 0; i < key->partnatts; i++)
924  {
925  AttrNumber attno = key->partattrs[i];
926  HeapTuple opclasstup;
927  Form_pg_opclass opclassform;
928  Oid funcid;
929 
930  /* Collect opfamily information */
931  opclasstup = SearchSysCache1(CLAOID,
932  ObjectIdGetDatum(opclass->values[i]));
933  if (!HeapTupleIsValid(opclasstup))
934  elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
935 
936  opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
937  key->partopfamily[i] = opclassform->opcfamily;
938  key->partopcintype[i] = opclassform->opcintype;
939 
940  /*
941  * A btree support function covers the cases of list and range methods
942  * currently supported.
943  */
944  funcid = get_opfamily_proc(opclassform->opcfamily,
945  opclassform->opcintype,
946  opclassform->opcintype,
947  BTORDER_PROC);
948  if (!OidIsValid(funcid)) /* should not happen */
949  elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
950  BTORDER_PROC, opclassform->opcintype, opclassform->opcintype,
951  opclassform->opcfamily);
952 
953  fmgr_info(funcid, &key->partsupfunc[i]);
954 
955  /* Collation */
956  key->partcollation[i] = collation->values[i];
957 
958  /* Collect type information */
959  if (attno != 0)
960  {
961  Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
962 
963  key->parttypid[i] = att->atttypid;
964  key->parttypmod[i] = att->atttypmod;
965  key->parttypcoll[i] = att->attcollation;
966  }
967  else
968  {
969  key->parttypid[i] = exprType(lfirst(partexprs_item));
970  key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
971  key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
972  }
974  &key->parttyplen[i],
975  &key->parttypbyval[i],
976  &key->parttypalign[i]);
977 
978  ReleaseSysCache(opclasstup);
979  }
980 
981  ReleaseSysCache(tuple);
982 
983  /* Success --- now copy to the cache memory */
985  RelationGetRelationName(relation),
987  relation->rd_partkeycxt = partkeycxt;
988  oldcxt = MemoryContextSwitchTo(relation->rd_partkeycxt);
989  relation->rd_partkey = copy_partition_key(key);
990  MemoryContextSwitchTo(oldcxt);
991 }
992 
993 /*
994  * copy_partition_key
995  *
996  * The copy is allocated in the current memory context.
997  */
998 static PartitionKey
1000 {
1001  PartitionKey newkey;
1002  int n;
1003 
1004  newkey = (PartitionKey) palloc(sizeof(PartitionKeyData));
1005 
1006  newkey->strategy = fromkey->strategy;
1007  newkey->partnatts = n = fromkey->partnatts;
1008 
1009  newkey->partattrs = (AttrNumber *) palloc(n * sizeof(AttrNumber));
1010  memcpy(newkey->partattrs, fromkey->partattrs, n * sizeof(AttrNumber));
1011 
1012  newkey->partexprs = copyObject(fromkey->partexprs);
1013 
1014  newkey->partopfamily = (Oid *) palloc(n * sizeof(Oid));
1015  memcpy(newkey->partopfamily, fromkey->partopfamily, n * sizeof(Oid));
1016 
1017  newkey->partopcintype = (Oid *) palloc(n * sizeof(Oid));
1018  memcpy(newkey->partopcintype, fromkey->partopcintype, n * sizeof(Oid));
1019 
1020  newkey->partsupfunc = (FmgrInfo *) palloc(n * sizeof(FmgrInfo));
1021  memcpy(newkey->partsupfunc, fromkey->partsupfunc, n * sizeof(FmgrInfo));
1022 
1023  newkey->partcollation = (Oid *) palloc(n * sizeof(Oid));
1024  memcpy(newkey->partcollation, fromkey->partcollation, n * sizeof(Oid));
1025 
1026  newkey->parttypid = (Oid *) palloc(n * sizeof(Oid));
1027  memcpy(newkey->parttypid, fromkey->parttypid, n * sizeof(Oid));
1028 
1029  newkey->parttypmod = (int32 *) palloc(n * sizeof(int32));
1030  memcpy(newkey->parttypmod, fromkey->parttypmod, n * sizeof(int32));
1031 
1032  newkey->parttyplen = (int16 *) palloc(n * sizeof(int16));
1033  memcpy(newkey->parttyplen, fromkey->parttyplen, n * sizeof(int16));
1034 
1035  newkey->parttypbyval = (bool *) palloc(n * sizeof(bool));
1036  memcpy(newkey->parttypbyval, fromkey->parttypbyval, n * sizeof(bool));
1037 
1038  newkey->parttypalign = (char *) palloc(n * sizeof(bool));
1039  memcpy(newkey->parttypalign, fromkey->parttypalign, n * sizeof(char));
1040 
1041  newkey->parttypcoll = (Oid *) palloc(n * sizeof(Oid));
1042  memcpy(newkey->parttypcoll, fromkey->parttypcoll, n * sizeof(Oid));
1043 
1044  return newkey;
1045 }
1046 
1047 /*
1048  * equalRuleLocks
1049  *
1050  * Determine whether two RuleLocks are equivalent
1051  *
1052  * Probably this should be in the rules code someplace...
1053  */
1054 static bool
1056 {
1057  int i;
1058 
1059  /*
1060  * As of 7.3 we assume the rule ordering is repeatable, because
1061  * RelationBuildRuleLock should read 'em in a consistent order. So just
1062  * compare corresponding slots.
1063  */
1064  if (rlock1 != NULL)
1065  {
1066  if (rlock2 == NULL)
1067  return false;
1068  if (rlock1->numLocks != rlock2->numLocks)
1069  return false;
1070  for (i = 0; i < rlock1->numLocks; i++)
1071  {
1072  RewriteRule *rule1 = rlock1->rules[i];
1073  RewriteRule *rule2 = rlock2->rules[i];
1074 
1075  if (rule1->ruleId != rule2->ruleId)
1076  return false;
1077  if (rule1->event != rule2->event)
1078  return false;
1079  if (rule1->enabled != rule2->enabled)
1080  return false;
1081  if (rule1->isInstead != rule2->isInstead)
1082  return false;
1083  if (!equal(rule1->qual, rule2->qual))
1084  return false;
1085  if (!equal(rule1->actions, rule2->actions))
1086  return false;
1087  }
1088  }
1089  else if (rlock2 != NULL)
1090  return false;
1091  return true;
1092 }
1093 
1094 /*
1095  * equalPolicy
1096  *
1097  * Determine whether two policies are equivalent
1098  */
1099 static bool
1101 {
1102  int i;
1103  Oid *r1,
1104  *r2;
1105 
1106  if (policy1 != NULL)
1107  {
1108  if (policy2 == NULL)
1109  return false;
1110 
1111  if (policy1->polcmd != policy2->polcmd)
1112  return false;
1113  if (policy1->hassublinks != policy2->hassublinks)
1114  return false;
1115  if (strcmp(policy1->policy_name, policy2->policy_name) != 0)
1116  return false;
1117  if (ARR_DIMS(policy1->roles)[0] != ARR_DIMS(policy2->roles)[0])
1118  return false;
1119 
1120  r1 = (Oid *) ARR_DATA_PTR(policy1->roles);
1121  r2 = (Oid *) ARR_DATA_PTR(policy2->roles);
1122 
1123  for (i = 0; i < ARR_DIMS(policy1->roles)[0]; i++)
1124  {
1125  if (r1[i] != r2[i])
1126  return false;
1127  }
1128 
1129  if (!equal(policy1->qual, policy2->qual))
1130  return false;
1131  if (!equal(policy1->with_check_qual, policy2->with_check_qual))
1132  return false;
1133  }
1134  else if (policy2 != NULL)
1135  return false;
1136 
1137  return true;
1138 }
1139 
1140 /*
1141  * equalRSDesc
1142  *
1143  * Determine whether two RowSecurityDesc's are equivalent
1144  */
1145 static bool
1147 {
1148  ListCell *lc,
1149  *rc;
1150 
1151  if (rsdesc1 == NULL && rsdesc2 == NULL)
1152  return true;
1153 
1154  if ((rsdesc1 != NULL && rsdesc2 == NULL) ||
1155  (rsdesc1 == NULL && rsdesc2 != NULL))
1156  return false;
1157 
1158  if (list_length(rsdesc1->policies) != list_length(rsdesc2->policies))
1159  return false;
1160 
1161  /* RelationBuildRowSecurity should build policies in order */
1162  forboth(lc, rsdesc1->policies, rc, rsdesc2->policies)
1163  {
1166 
1167  if (!equalPolicy(l, r))
1168  return false;
1169  }
1170 
1171  return true;
1172 }
1173 
1174 /*
1175  * equalPartitionDescs
1176  * Compare two partition descriptors for logical equality
1177  */
1178 static bool
1180  PartitionDesc partdesc2)
1181 {
1182  int i;
1183 
1184  if (partdesc1 != NULL)
1185  {
1186  if (partdesc2 == NULL)
1187  return false;
1188  if (partdesc1->nparts != partdesc2->nparts)
1189  return false;
1190 
1191  Assert(key != NULL || partdesc1->nparts == 0);
1192 
1193  /*
1194  * Same oids? If the partitioning structure did not change, that is,
1195  * no partitions were added or removed to the relation, the oids array
1196  * should still match element-by-element.
1197  */
1198  for (i = 0; i < partdesc1->nparts; i++)
1199  {
1200  if (partdesc1->oids[i] != partdesc2->oids[i])
1201  return false;
1202  }
1203 
1204  /*
1205  * Now compare partition bound collections. The logic to iterate over
1206  * the collections is private to partition.c.
1207  */
1208  if (partdesc1->boundinfo != NULL)
1209  {
1210  if (partdesc2->boundinfo == NULL)
1211  return false;
1212 
1214  key->parttypbyval,
1215  partdesc1->boundinfo,
1216  partdesc2->boundinfo))
1217  return false;
1218  }
1219  else if (partdesc2->boundinfo != NULL)
1220  return false;
1221  }
1222  else if (partdesc2 != NULL)
1223  return false;
1224 
1225  return true;
1226 }
1227 
1228 /*
1229  * RelationBuildDesc
1230  *
1231  * Build a relation descriptor. The caller must hold at least
1232  * AccessShareLock on the target relid.
1233  *
1234  * The new descriptor is inserted into the hash table if insertIt is true.
1235  *
1236  * Returns NULL if no pg_class row could be found for the given relid
1237  * (suggesting we are trying to access a just-deleted relation).
1238  * Any other error is reported via elog.
1239  */
1240 static Relation
1241 RelationBuildDesc(Oid targetRelId, bool insertIt)
1242 {
1243  Relation relation;
1244  Oid relid;
1245  HeapTuple pg_class_tuple;
1246  Form_pg_class relp;
1247 
1248  /*
1249  * find the tuple in pg_class corresponding to the given relation id
1250  */
1251  pg_class_tuple = ScanPgRelation(targetRelId, true, false);
1252 
1253  /*
1254  * if no such tuple exists, return NULL
1255  */
1256  if (!HeapTupleIsValid(pg_class_tuple))
1257  return NULL;
1258 
1259  /*
1260  * get information from the pg_class_tuple
1261  */
1262  relid = HeapTupleGetOid(pg_class_tuple);
1263  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
1264  Assert(relid == targetRelId);
1265 
1266  /*
1267  * allocate storage for the relation descriptor, and copy pg_class_tuple
1268  * to relation->rd_rel.
1269  */
1270  relation = AllocateRelationDesc(relp);
1271 
1272  /*
1273  * initialize the relation's relation id (relation->rd_id)
1274  */
1275  RelationGetRelid(relation) = relid;
1276 
1277  /*
1278  * normal relations are not nailed into the cache; nor can a pre-existing
1279  * relation be new. It could be temp though. (Actually, it could be new
1280  * too, but it's okay to forget that fact if forced to flush the entry.)
1281  */
1282  relation->rd_refcnt = 0;
1283  relation->rd_isnailed = false;
1286  switch (relation->rd_rel->relpersistence)
1287  {
1290  relation->rd_backend = InvalidBackendId;
1291  relation->rd_islocaltemp = false;
1292  break;
1293  case RELPERSISTENCE_TEMP:
1294  if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
1295  {
1296  relation->rd_backend = BackendIdForTempRelations();
1297  relation->rd_islocaltemp = true;
1298  }
1299  else
1300  {
1301  /*
1302  * If it's a temp table, but not one of ours, we have to use
1303  * the slow, grotty method to figure out the owning backend.
1304  *
1305  * Note: it's possible that rd_backend gets set to MyBackendId
1306  * here, in case we are looking at a pg_class entry left over
1307  * from a crashed backend that coincidentally had the same
1308  * BackendId we're using. We should *not* consider such a
1309  * table to be "ours"; this is why we need the separate
1310  * rd_islocaltemp flag. The pg_class entry will get flushed
1311  * if/when we clean out the corresponding temp table namespace
1312  * in preparation for using it.
1313  */
1314  relation->rd_backend =
1315  GetTempNamespaceBackendId(relation->rd_rel->relnamespace);
1316  Assert(relation->rd_backend != InvalidBackendId);
1317  relation->rd_islocaltemp = false;
1318  }
1319  break;
1320  default:
1321  elog(ERROR, "invalid relpersistence: %c",
1322  relation->rd_rel->relpersistence);
1323  break;
1324  }
1325 
1326  /*
1327  * initialize the tuple descriptor (relation->rd_att).
1328  */
1329  RelationBuildTupleDesc(relation);
1330 
1331  /*
1332  * Fetch rules and triggers that affect this relation
1333  */
1334  if (relation->rd_rel->relhasrules)
1335  RelationBuildRuleLock(relation);
1336  else
1337  {
1338  relation->rd_rules = NULL;
1339  relation->rd_rulescxt = NULL;
1340  }
1341 
1342  if (relation->rd_rel->relhastriggers)
1343  RelationBuildTriggers(relation);
1344  else
1345  relation->trigdesc = NULL;
1346 
1347  if (relation->rd_rel->relrowsecurity)
1348  RelationBuildRowSecurity(relation);
1349  else
1350  relation->rd_rsdesc = NULL;
1351 
1352  /* foreign key data is not loaded till asked for */
1353  relation->rd_fkeylist = NIL;
1354  relation->rd_fkeyvalid = false;
1355 
1356  /* if a partitioned table, initialize key and partition descriptor info */
1357  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1358  {
1359  RelationBuildPartitionKey(relation);
1360  RelationBuildPartitionDesc(relation);
1361  }
1362  else
1363  {
1364  relation->rd_partkeycxt = NULL;
1365  relation->rd_partkey = NULL;
1366  relation->rd_partdesc = NULL;
1367  relation->rd_pdcxt = NULL;
1368  }
1369 
1370  /*
1371  * if it's an index, initialize index-related information
1372  */
1373  if (OidIsValid(relation->rd_rel->relam))
1374  RelationInitIndexAccessInfo(relation);
1375 
1376  /* extract reloptions if any */
1377  RelationParseRelOptions(relation, pg_class_tuple);
1378 
1379  /*
1380  * initialize the relation lock manager information
1381  */
1382  RelationInitLockInfo(relation); /* see lmgr.c */
1383 
1384  /*
1385  * initialize physical addressing information for the relation
1386  */
1387  RelationInitPhysicalAddr(relation);
1388 
1389  /* make sure relation is marked as having no open file yet */
1390  relation->rd_smgr = NULL;
1391 
1392  /*
1393  * now we can free the memory allocated for pg_class_tuple
1394  */
1395  heap_freetuple(pg_class_tuple);
1396 
1397  /*
1398  * Insert newly created relation into relcache hash table, if requested.
1399  *
1400  * There is one scenario in which we might find a hashtable entry already
1401  * present, even though our caller failed to find it: if the relation is a
1402  * system catalog or index that's used during relcache load, we might have
1403  * recursively created the same relcache entry during the preceding steps.
1404  * So allow RelationCacheInsert to delete any already-present relcache
1405  * entry for the same OID. The already-present entry should have refcount
1406  * zero (else somebody forgot to close it); in the event that it doesn't,
1407  * we'll elog a WARNING and leak the already-present entry.
1408  */
1409  if (insertIt)
1410  RelationCacheInsert(relation, true);
1411 
1412  /* It's fully valid */
1413  relation->rd_isvalid = true;
1414 
1415  return relation;
1416 }
1417 
1418 /*
1419  * Initialize the physical addressing info (RelFileNode) for a relcache entry
1420  *
1421  * Note: at the physical level, relations in the pg_global tablespace must
1422  * be treated as shared, even if relisshared isn't set. Hence we do not
1423  * look at relisshared here.
1424  */
1425 static void
1427 {
1428  if (relation->rd_rel->reltablespace)
1429  relation->rd_node.spcNode = relation->rd_rel->reltablespace;
1430  else
1431  relation->rd_node.spcNode = MyDatabaseTableSpace;
1432  if (relation->rd_node.spcNode == GLOBALTABLESPACE_OID)
1433  relation->rd_node.dbNode = InvalidOid;
1434  else
1435  relation->rd_node.dbNode = MyDatabaseId;
1436 
1437  if (relation->rd_rel->relfilenode)
1438  {
1439  /*
1440  * Even if we are using a decoding snapshot that doesn't represent the
1441  * current state of the catalog we need to make sure the filenode
1442  * points to the current file since the older file will be gone (or
1443  * truncated). The new file will still contain older rows so lookups
1444  * in them will work correctly. This wouldn't work correctly if
1445  * rewrites were allowed to change the schema in an incompatible way,
1446  * but those are prevented both on catalog tables and on user tables
1447  * declared as additional catalog tables.
1448  */
1451  && IsTransactionState())
1452  {
1453  HeapTuple phys_tuple;
1454  Form_pg_class physrel;
1455 
1456  phys_tuple = ScanPgRelation(RelationGetRelid(relation),
1457  RelationGetRelid(relation) != ClassOidIndexId,
1458  true);
1459  if (!HeapTupleIsValid(phys_tuple))
1460  elog(ERROR, "could not find pg_class entry for %u",
1461  RelationGetRelid(relation));
1462  physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
1463 
1464  relation->rd_rel->reltablespace = physrel->reltablespace;
1465  relation->rd_rel->relfilenode = physrel->relfilenode;
1466  heap_freetuple(phys_tuple);
1467  }
1468 
1469  relation->rd_node.relNode = relation->rd_rel->relfilenode;
1470  }
1471  else
1472  {
1473  /* Consult the relation mapper */
1474  relation->rd_node.relNode =
1475  RelationMapOidToFilenode(relation->rd_id,
1476  relation->rd_rel->relisshared);
1477  if (!OidIsValid(relation->rd_node.relNode))
1478  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1479  RelationGetRelationName(relation), relation->rd_id);
1480  }
1481 }
1482 
1483 /*
1484  * Fill in the IndexAmRoutine for an index relation.
1485  *
1486  * relation's rd_amhandler and rd_indexcxt must be valid already.
1487  */
1488 static void
1490 {
1491  IndexAmRoutine *cached,
1492  *tmp;
1493 
1494  /*
1495  * Call the amhandler in current, short-lived memory context, just in case
1496  * it leaks anything (it probably won't, but let's be paranoid).
1497  */
1498  tmp = GetIndexAmRoutine(relation->rd_amhandler);
1499 
1500  /* OK, now transfer the data into relation's rd_indexcxt. */
1501  cached = (IndexAmRoutine *) MemoryContextAlloc(relation->rd_indexcxt,
1502  sizeof(IndexAmRoutine));
1503  memcpy(cached, tmp, sizeof(IndexAmRoutine));
1504  relation->rd_amroutine = cached;
1505 
1506  pfree(tmp);
1507 }
1508 
1509 /*
1510  * Initialize index-access-method support data for an index relation
1511  */
1512 void
1514 {
1515  HeapTuple tuple;
1516  Form_pg_am aform;
1517  Datum indcollDatum;
1518  Datum indclassDatum;
1519  Datum indoptionDatum;
1520  bool isnull;
1521  oidvector *indcoll;
1522  oidvector *indclass;
1523  int2vector *indoption;
1524  MemoryContext indexcxt;
1525  MemoryContext oldcontext;
1526  int natts;
1527  uint16 amsupport;
1528 
1529  /*
1530  * Make a copy of the pg_index entry for the index. Since pg_index
1531  * contains variable-length and possibly-null fields, we have to do this
1532  * honestly rather than just treating it as a Form_pg_index struct.
1533  */
1534  tuple = SearchSysCache1(INDEXRELID,
1535  ObjectIdGetDatum(RelationGetRelid(relation)));
1536  if (!HeapTupleIsValid(tuple))
1537  elog(ERROR, "cache lookup failed for index %u",
1538  RelationGetRelid(relation));
1540  relation->rd_indextuple = heap_copytuple(tuple);
1541  relation->rd_index = (Form_pg_index) GETSTRUCT(relation->rd_indextuple);
1542  MemoryContextSwitchTo(oldcontext);
1543  ReleaseSysCache(tuple);
1544 
1545  /*
1546  * Look up the index's access method, save the OID of its handler function
1547  */
1548  tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(relation->rd_rel->relam));
1549  if (!HeapTupleIsValid(tuple))
1550  elog(ERROR, "cache lookup failed for access method %u",
1551  relation->rd_rel->relam);
1552  aform = (Form_pg_am) GETSTRUCT(tuple);
1553  relation->rd_amhandler = aform->amhandler;
1554  ReleaseSysCache(tuple);
1555 
1556  natts = relation->rd_rel->relnatts;
1557  if (natts != relation->rd_index->indnatts)
1558  elog(ERROR, "relnatts disagrees with indnatts for index %u",
1559  RelationGetRelid(relation));
1560 
1561  /*
1562  * Make the private context to hold index access info. The reason we need
1563  * a context, and not just a couple of pallocs, is so that we won't leak
1564  * any subsidiary info attached to fmgr lookup records.
1565  */
1567  RelationGetRelationName(relation),
1569  relation->rd_indexcxt = indexcxt;
1570 
1571  /*
1572  * Now we can fetch the index AM's API struct
1573  */
1574  InitIndexAmRoutine(relation);
1575 
1576  /*
1577  * Allocate arrays to hold data
1578  */
1579  relation->rd_opfamily = (Oid *)
1580  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1581  relation->rd_opcintype = (Oid *)
1582  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1583 
1584  amsupport = relation->rd_amroutine->amsupport;
1585  if (amsupport > 0)
1586  {
1587  int nsupport = natts * amsupport;
1588 
1589  relation->rd_support = (RegProcedure *)
1590  MemoryContextAllocZero(indexcxt, nsupport * sizeof(RegProcedure));
1591  relation->rd_supportinfo = (FmgrInfo *)
1592  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
1593  }
1594  else
1595  {
1596  relation->rd_support = NULL;
1597  relation->rd_supportinfo = NULL;
1598  }
1599 
1600  relation->rd_indcollation = (Oid *)
1601  MemoryContextAllocZero(indexcxt, natts * sizeof(Oid));
1602 
1603  relation->rd_indoption = (int16 *)
1604  MemoryContextAllocZero(indexcxt, natts * sizeof(int16));
1605 
1606  /*
1607  * indcollation cannot be referenced directly through the C struct,
1608  * because it comes after the variable-width indkey field. Must extract
1609  * the datum the hard way...
1610  */
1611  indcollDatum = fastgetattr(relation->rd_indextuple,
1614  &isnull);
1615  Assert(!isnull);
1616  indcoll = (oidvector *) DatumGetPointer(indcollDatum);
1617  memcpy(relation->rd_indcollation, indcoll->values, natts * sizeof(Oid));
1618 
1619  /*
1620  * indclass cannot be referenced directly through the C struct, because it
1621  * comes after the variable-width indkey field. Must extract the datum
1622  * the hard way...
1623  */
1624  indclassDatum = fastgetattr(relation->rd_indextuple,
1627  &isnull);
1628  Assert(!isnull);
1629  indclass = (oidvector *) DatumGetPointer(indclassDatum);
1630 
1631  /*
1632  * Fill the support procedure OID array, as well as the info about
1633  * opfamilies and opclass input types. (aminfo and supportinfo are left
1634  * as zeroes, and are filled on-the-fly when used)
1635  */
1636  IndexSupportInitialize(indclass, relation->rd_support,
1637  relation->rd_opfamily, relation->rd_opcintype,
1638  amsupport, natts);
1639 
1640  /*
1641  * Similarly extract indoption and copy it to the cache entry
1642  */
1643  indoptionDatum = fastgetattr(relation->rd_indextuple,
1646  &isnull);
1647  Assert(!isnull);
1648  indoption = (int2vector *) DatumGetPointer(indoptionDatum);
1649  memcpy(relation->rd_indoption, indoption->values, natts * sizeof(int16));
1650 
1651  /*
1652  * expressions, predicate, exclusion caches will be filled later
1653  */
1654  relation->rd_indexprs = NIL;
1655  relation->rd_indpred = NIL;
1656  relation->rd_exclops = NULL;
1657  relation->rd_exclprocs = NULL;
1658  relation->rd_exclstrats = NULL;
1659  relation->rd_amcache = NULL;
1660 }
1661 
1662 /*
1663  * IndexSupportInitialize
1664  * Initializes an index's cached opclass information,
1665  * given the index's pg_index.indclass entry.
1666  *
1667  * Data is returned into *indexSupport, *opFamily, and *opcInType,
1668  * which are arrays allocated by the caller.
1669  *
1670  * The caller also passes maxSupportNumber and maxAttributeNumber, since these
1671  * indicate the size of the arrays it has allocated --- but in practice these
1672  * numbers must always match those obtainable from the system catalog entries
1673  * for the index and access method.
1674  */
1675 static void
1677  RegProcedure *indexSupport,
1678  Oid *opFamily,
1679  Oid *opcInType,
1680  StrategyNumber maxSupportNumber,
1681  AttrNumber maxAttributeNumber)
1682 {
1683  int attIndex;
1684 
1685  for (attIndex = 0; attIndex < maxAttributeNumber; attIndex++)
1686  {
1687  OpClassCacheEnt *opcentry;
1688 
1689  if (!OidIsValid(indclass->values[attIndex]))
1690  elog(ERROR, "bogus pg_index tuple");
1691 
1692  /* look up the info for this opclass, using a cache */
1693  opcentry = LookupOpclassInfo(indclass->values[attIndex],
1694  maxSupportNumber);
1695 
1696  /* copy cached data into relcache entry */
1697  opFamily[attIndex] = opcentry->opcfamily;
1698  opcInType[attIndex] = opcentry->opcintype;
1699  if (maxSupportNumber > 0)
1700  memcpy(&indexSupport[attIndex * maxSupportNumber],
1701  opcentry->supportProcs,
1702  maxSupportNumber * sizeof(RegProcedure));
1703  }
1704 }
1705 
1706 /*
1707  * LookupOpclassInfo
1708  *
1709  * This routine maintains a per-opclass cache of the information needed
1710  * by IndexSupportInitialize(). This is more efficient than relying on
1711  * the catalog cache, because we can load all the info about a particular
1712  * opclass in a single indexscan of pg_amproc.
1713  *
1714  * The information from pg_am about expected range of support function
1715  * numbers is passed in, rather than being looked up, mainly because the
1716  * caller will have it already.
1717  *
1718  * Note there is no provision for flushing the cache. This is OK at the
1719  * moment because there is no way to ALTER any interesting properties of an
1720  * existing opclass --- all you can do is drop it, which will result in
1721  * a useless but harmless dead entry in the cache. To support altering
1722  * opclass membership (not the same as opfamily membership!), we'd need to
1723  * be able to flush this cache as well as the contents of relcache entries
1724  * for indexes.
1725  */
1726 static OpClassCacheEnt *
1727 LookupOpclassInfo(Oid operatorClassOid,
1728  StrategyNumber numSupport)
1729 {
1730  OpClassCacheEnt *opcentry;
1731  bool found;
1732  Relation rel;
1733  SysScanDesc scan;
1734  ScanKeyData skey[3];
1735  HeapTuple htup;
1736  bool indexOK;
1737 
1738  if (OpClassCache == NULL)
1739  {
1740  /* First time through: initialize the opclass cache */
1741  HASHCTL ctl;
1742 
1743  MemSet(&ctl, 0, sizeof(ctl));
1744  ctl.keysize = sizeof(Oid);
1745  ctl.entrysize = sizeof(OpClassCacheEnt);
1746  OpClassCache = hash_create("Operator class cache", 64,
1747  &ctl, HASH_ELEM | HASH_BLOBS);
1748 
1749  /* Also make sure CacheMemoryContext exists */
1750  if (!CacheMemoryContext)
1752  }
1753 
1754  opcentry = (OpClassCacheEnt *) hash_search(OpClassCache,
1755  (void *) &operatorClassOid,
1756  HASH_ENTER, &found);
1757 
1758  if (!found)
1759  {
1760  /* Need to allocate memory for new entry */
1761  opcentry->valid = false; /* until known OK */
1762  opcentry->numSupport = numSupport;
1763 
1764  if (numSupport > 0)
1765  opcentry->supportProcs = (RegProcedure *)
1767  numSupport * sizeof(RegProcedure));
1768  else
1769  opcentry->supportProcs = NULL;
1770  }
1771  else
1772  {
1773  Assert(numSupport == opcentry->numSupport);
1774  }
1775 
1776  /*
1777  * When testing for cache-flush hazards, we intentionally disable the
1778  * operator class cache and force reloading of the info on each call. This
1779  * is helpful because we want to test the case where a cache flush occurs
1780  * while we are loading the info, and it's very hard to provoke that if
1781  * this happens only once per opclass per backend.
1782  */
1783 #if defined(CLOBBER_CACHE_ALWAYS)
1784  opcentry->valid = false;
1785 #endif
1786 
1787  if (opcentry->valid)
1788  return opcentry;
1789 
1790  /*
1791  * Need to fill in new entry.
1792  *
1793  * To avoid infinite recursion during startup, force heap scans if we're
1794  * looking up info for the opclasses used by the indexes we would like to
1795  * reference here.
1796  */
1797  indexOK = criticalRelcachesBuilt ||
1798  (operatorClassOid != OID_BTREE_OPS_OID &&
1799  operatorClassOid != INT2_BTREE_OPS_OID);
1800 
1801  /*
1802  * We have to fetch the pg_opclass row to determine its opfamily and
1803  * opcintype, which are needed to look up related operators and functions.
1804  * It'd be convenient to use the syscache here, but that probably doesn't
1805  * work while bootstrapping.
1806  */
1807  ScanKeyInit(&skey[0],
1809  BTEqualStrategyNumber, F_OIDEQ,
1810  ObjectIdGetDatum(operatorClassOid));
1812  scan = systable_beginscan(rel, OpclassOidIndexId, indexOK,
1813  NULL, 1, skey);
1814 
1815  if (HeapTupleIsValid(htup = systable_getnext(scan)))
1816  {
1817  Form_pg_opclass opclassform = (Form_pg_opclass) GETSTRUCT(htup);
1818 
1819  opcentry->opcfamily = opclassform->opcfamily;
1820  opcentry->opcintype = opclassform->opcintype;
1821  }
1822  else
1823  elog(ERROR, "could not find tuple for opclass %u", operatorClassOid);
1824 
1825  systable_endscan(scan);
1827 
1828  /*
1829  * Scan pg_amproc to obtain support procs for the opclass. We only fetch
1830  * the default ones (those with lefttype = righttype = opcintype).
1831  */
1832  if (numSupport > 0)
1833  {
1834  ScanKeyInit(&skey[0],
1836  BTEqualStrategyNumber, F_OIDEQ,
1837  ObjectIdGetDatum(opcentry->opcfamily));
1838  ScanKeyInit(&skey[1],
1840  BTEqualStrategyNumber, F_OIDEQ,
1841  ObjectIdGetDatum(opcentry->opcintype));
1842  ScanKeyInit(&skey[2],
1844  BTEqualStrategyNumber, F_OIDEQ,
1845  ObjectIdGetDatum(opcentry->opcintype));
1847  scan = systable_beginscan(rel, AccessMethodProcedureIndexId, indexOK,
1848  NULL, 3, skey);
1849 
1850  while (HeapTupleIsValid(htup = systable_getnext(scan)))
1851  {
1852  Form_pg_amproc amprocform = (Form_pg_amproc) GETSTRUCT(htup);
1853 
1854  if (amprocform->amprocnum <= 0 ||
1855  (StrategyNumber) amprocform->amprocnum > numSupport)
1856  elog(ERROR, "invalid amproc number %d for opclass %u",
1857  amprocform->amprocnum, operatorClassOid);
1858 
1859  opcentry->supportProcs[amprocform->amprocnum - 1] =
1860  amprocform->amproc;
1861  }
1862 
1863  systable_endscan(scan);
1865  }
1866 
1867  opcentry->valid = true;
1868  return opcentry;
1869 }
1870 
1871 
1872 /*
1873  * formrdesc
1874  *
1875  * This is a special cut-down version of RelationBuildDesc(),
1876  * used while initializing the relcache.
1877  * The relation descriptor is built just from the supplied parameters,
1878  * without actually looking at any system table entries. We cheat
1879  * quite a lot since we only need to work for a few basic system
1880  * catalogs.
1881  *
1882  * formrdesc is currently used for: pg_database, pg_authid, pg_auth_members,
1883  * pg_shseclabel, pg_class, pg_attribute, pg_proc, and pg_type
1884  * (see RelationCacheInitializePhase2/3).
1885  *
1886  * Note that these catalogs can't have constraints (except attnotnull),
1887  * default values, rules, or triggers, since we don't cope with any of that.
1888  * (Well, actually, this only matters for properties that need to be valid
1889  * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
1890  * these properties matter then...)
1891  *
1892  * NOTE: we assume we are already switched into CacheMemoryContext.
1893  */
1894 static void
1895 formrdesc(const char *relationName, Oid relationReltype,
1896  bool isshared, bool hasoids,
1897  int natts, const FormData_pg_attribute *attrs)
1898 {
1899  Relation relation;
1900  int i;
1901  bool has_not_null;
1902 
1903  /*
1904  * allocate new relation desc, clear all fields of reldesc
1905  */
1906  relation = (Relation) palloc0(sizeof(RelationData));
1907 
1908  /* make sure relation is marked as having no open file yet */
1909  relation->rd_smgr = NULL;
1910 
1911  /*
1912  * initialize reference count: 1 because it is nailed in cache
1913  */
1914  relation->rd_refcnt = 1;
1915 
1916  /*
1917  * all entries built with this routine are nailed-in-cache; none are for
1918  * new or temp relations.
1919  */
1920  relation->rd_isnailed = true;
1923  relation->rd_backend = InvalidBackendId;
1924  relation->rd_islocaltemp = false;
1925 
1926  /*
1927  * initialize relation tuple form
1928  *
1929  * The data we insert here is pretty incomplete/bogus, but it'll serve to
1930  * get us launched. RelationCacheInitializePhase3() will read the real
1931  * data from pg_class and replace what we've done here. Note in
1932  * particular that relowner is left as zero; this cues
1933  * RelationCacheInitializePhase3 that the real data isn't there yet.
1934  */
1936 
1937  namestrcpy(&relation->rd_rel->relname, relationName);
1938  relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
1939  relation->rd_rel->reltype = relationReltype;
1940 
1941  /*
1942  * It's important to distinguish between shared and non-shared relations,
1943  * even at bootstrap time, to make sure we know where they are stored.
1944  */
1945  relation->rd_rel->relisshared = isshared;
1946  if (isshared)
1947  relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
1948 
1949  /* formrdesc is used only for permanent relations */
1950  relation->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
1951 
1952  /* ... and they're always populated, too */
1953  relation->rd_rel->relispopulated = true;
1954 
1955  relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
1956  relation->rd_rel->relpages = 0;
1957  relation->rd_rel->reltuples = 0;
1958  relation->rd_rel->relallvisible = 0;
1959  relation->rd_rel->relkind = RELKIND_RELATION;
1960  relation->rd_rel->relhasoids = hasoids;
1961  relation->rd_rel->relnatts = (int16) natts;
1962 
1963  /*
1964  * initialize attribute tuple form
1965  *
1966  * Unlike the case with the relation tuple, this data had better be right
1967  * because it will never be replaced. The data comes from
1968  * src/include/catalog/ headers via genbki.pl.
1969  */
1970  relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
1971  relation->rd_att->tdrefcount = 1; /* mark as refcounted */
1972 
1973  relation->rd_att->tdtypeid = relationReltype;
1974  relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
1975 
1976  /*
1977  * initialize tuple desc info
1978  */
1979  has_not_null = false;
1980  for (i = 0; i < natts; i++)
1981  {
1982  memcpy(TupleDescAttr(relation->rd_att, i),
1983  &attrs[i],
1985  has_not_null |= attrs[i].attnotnull;
1986  /* make sure attcacheoff is valid */
1987  TupleDescAttr(relation->rd_att, i)->attcacheoff = -1;
1988  }
1989 
1990  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
1991  TupleDescAttr(relation->rd_att, 0)->attcacheoff = 0;
1992 
1993  /* mark not-null status */
1994  if (has_not_null)
1995  {
1996  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
1997 
1998  constr->has_not_null = true;
1999  relation->rd_att->constr = constr;
2000  }
2001 
2002  /*
2003  * initialize relation id from info in att array (my, this is ugly)
2004  */
2005  RelationGetRelid(relation) = TupleDescAttr(relation->rd_att, 0)->attrelid;
2006 
2007  /*
2008  * All relations made with formrdesc are mapped. This is necessarily so
2009  * because there is no other way to know what filenode they currently
2010  * have. In bootstrap mode, add them to the initial relation mapper data,
2011  * specifying that the initial filenode is the same as the OID.
2012  */
2013  relation->rd_rel->relfilenode = InvalidOid;
2016  RelationGetRelid(relation),
2017  isshared, true);
2018 
2019  /*
2020  * initialize the relation lock manager information
2021  */
2022  RelationInitLockInfo(relation); /* see lmgr.c */
2023 
2024  /*
2025  * initialize physical addressing information for the relation
2026  */
2027  RelationInitPhysicalAddr(relation);
2028 
2029  /*
2030  * initialize the rel-has-index flag, using hardwired knowledge
2031  */
2033  {
2034  /* In bootstrap mode, we have no indexes */
2035  relation->rd_rel->relhasindex = false;
2036  }
2037  else
2038  {
2039  /* Otherwise, all the rels formrdesc is used for have indexes */
2040  relation->rd_rel->relhasindex = true;
2041  }
2042 
2043  /*
2044  * add new reldesc to relcache
2045  */
2046  RelationCacheInsert(relation, false);
2047 
2048  /* It's fully valid */
2049  relation->rd_isvalid = true;
2050 }
2051 
2052 
2053 /* ----------------------------------------------------------------
2054  * Relation Descriptor Lookup Interface
2055  * ----------------------------------------------------------------
2056  */
2057 
2058 /*
2059  * RelationIdGetRelation
2060  *
2061  * Lookup a reldesc by OID; make one if not already in cache.
2062  *
2063  * Returns NULL if no pg_class row could be found for the given relid
2064  * (suggesting we are trying to access a just-deleted relation).
2065  * Any other error is reported via elog.
2066  *
2067  * NB: caller should already have at least AccessShareLock on the
2068  * relation ID, else there are nasty race conditions.
2069  *
2070  * NB: relation ref count is incremented, or set to 1 if new entry.
2071  * Caller should eventually decrement count. (Usually,
2072  * that happens by calling RelationClose().)
2073  */
2074 Relation
2076 {
2077  Relation rd;
2078 
2079  /* Make sure we're in an xact, even if this ends up being a cache hit */
2081 
2082  /*
2083  * first try to find reldesc in the cache
2084  */
2085  RelationIdCacheLookup(relationId, rd);
2086 
2087  if (RelationIsValid(rd))
2088  {
2090  /* revalidate cache entry if necessary */
2091  if (!rd->rd_isvalid)
2092  {
2093  /*
2094  * Indexes only have a limited number of possible schema changes,
2095  * and we don't want to use the full-blown procedure because it's
2096  * a headache for indexes that reload itself depends on.
2097  */
2098  if (rd->rd_rel->relkind == RELKIND_INDEX)
2100  else
2101  RelationClearRelation(rd, true);
2102  Assert(rd->rd_isvalid);
2103  }
2104  return rd;
2105  }
2106 
2107  /*
2108  * no reldesc in the cache, so have RelationBuildDesc() build one and add
2109  * it.
2110  */
2111  rd = RelationBuildDesc(relationId, true);
2112  if (RelationIsValid(rd))
2114  return rd;
2115 }
2116 
2117 /* ----------------------------------------------------------------
2118  * cache invalidation support routines
2119  * ----------------------------------------------------------------
2120  */
2121 
2122 /*
2123  * RelationIncrementReferenceCount
2124  * Increments relation reference count.
2125  *
2126  * Note: bootstrap mode has its own weird ideas about relation refcount
2127  * behavior; we ought to fix it someday, but for now, just disable
2128  * reference count ownership tracking in bootstrap mode.
2129  */
2130 void
2132 {
2134  rel->rd_refcnt += 1;
2137 }
2138 
2139 /*
2140  * RelationDecrementReferenceCount
2141  * Decrements relation reference count.
2142  */
2143 void
2145 {
2146  Assert(rel->rd_refcnt > 0);
2147  rel->rd_refcnt -= 1;
2150 }
2151 
2152 /*
2153  * RelationClose - close an open relation
2154  *
2155  * Actually, we just decrement the refcount.
2156  *
2157  * NOTE: if compiled with -DRELCACHE_FORCE_RELEASE then relcache entries
2158  * will be freed as soon as their refcount goes to zero. In combination
2159  * with aset.c's CLOBBER_FREED_MEMORY option, this provides a good test
2160  * to catch references to already-released relcache entries. It slows
2161  * things down quite a bit, however.
2162  */
2163 void
2165 {
2166  /* Note: no locking manipulations needed */
2168 
2169 #ifdef RELCACHE_FORCE_RELEASE
2170  if (RelationHasReferenceCountZero(relation) &&
2171  relation->rd_createSubid == InvalidSubTransactionId &&
2173  RelationClearRelation(relation, false);
2174 #endif
2175 }
2176 
2177 /*
2178  * RelationReloadIndexInfo - reload minimal information for an open index
2179  *
2180  * This function is used only for indexes. A relcache inval on an index
2181  * can mean that its pg_class or pg_index row changed. There are only
2182  * very limited changes that are allowed to an existing index's schema,
2183  * so we can update the relcache entry without a complete rebuild; which
2184  * is fortunate because we can't rebuild an index entry that is "nailed"
2185  * and/or in active use. We support full replacement of the pg_class row,
2186  * as well as updates of a few simple fields of the pg_index row.
2187  *
2188  * We can't necessarily reread the catalog rows right away; we might be
2189  * in a failed transaction when we receive the SI notification. If so,
2190  * RelationClearRelation just marks the entry as invalid by setting
2191  * rd_isvalid to false. This routine is called to fix the entry when it
2192  * is next needed.
2193  *
2194  * We assume that at the time we are called, we have at least AccessShareLock
2195  * on the target index. (Note: in the calls from RelationClearRelation,
2196  * this is legitimate because we know the rel has positive refcount.)
2197  *
2198  * If the target index is an index on pg_class or pg_index, we'd better have
2199  * previously gotten at least AccessShareLock on its underlying catalog,
2200  * else we are at risk of deadlock against someone trying to exclusive-lock
2201  * the heap and index in that order. This is ensured in current usage by
2202  * only applying this to indexes being opened or having positive refcount.
2203  */
2204 static void
2206 {
2207  bool indexOK;
2208  HeapTuple pg_class_tuple;
2209  Form_pg_class relp;
2210 
2211  /* Should be called only for invalidated indexes */
2212  Assert(relation->rd_rel->relkind == RELKIND_INDEX &&
2213  !relation->rd_isvalid);
2214 
2215  /* Ensure it's closed at smgr level */
2216  RelationCloseSmgr(relation);
2217 
2218  /* Must free any AM cached data upon relcache flush */
2219  if (relation->rd_amcache)
2220  pfree(relation->rd_amcache);
2221  relation->rd_amcache = NULL;
2222 
2223  /*
2224  * If it's a shared index, we might be called before backend startup has
2225  * finished selecting a database, in which case we have no way to read
2226  * pg_class yet. However, a shared index can never have any significant
2227  * schema updates, so it's okay to ignore the invalidation signal. Just
2228  * mark it valid and return without doing anything more.
2229  */
2230  if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
2231  {
2232  relation->rd_isvalid = true;
2233  return;
2234  }
2235 
2236  /*
2237  * Read the pg_class row
2238  *
2239  * Don't try to use an indexscan of pg_class_oid_index to reload the info
2240  * for pg_class_oid_index ...
2241  */
2242  indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
2243  pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
2244  if (!HeapTupleIsValid(pg_class_tuple))
2245  elog(ERROR, "could not find pg_class tuple for index %u",
2246  RelationGetRelid(relation));
2247  relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
2248  memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE);
2249  /* Reload reloptions in case they changed */
2250  if (relation->rd_options)
2251  pfree(relation->rd_options);
2252  RelationParseRelOptions(relation, pg_class_tuple);
2253  /* done with pg_class tuple */
2254  heap_freetuple(pg_class_tuple);
2255  /* We must recalculate physical address in case it changed */
2256  RelationInitPhysicalAddr(relation);
2257 
2258  /*
2259  * For a non-system index, there are fields of the pg_index row that are
2260  * allowed to change, so re-read that row and update the relcache entry.
2261  * Most of the info derived from pg_index (such as support function lookup
2262  * info) cannot change, and indeed the whole point of this routine is to
2263  * update the relcache entry without clobbering that data; so wholesale
2264  * replacement is not appropriate.
2265  */
2266  if (!IsSystemRelation(relation))
2267  {
2268  HeapTuple tuple;
2270 
2271  tuple = SearchSysCache1(INDEXRELID,
2272  ObjectIdGetDatum(RelationGetRelid(relation)));
2273  if (!HeapTupleIsValid(tuple))
2274  elog(ERROR, "cache lookup failed for index %u",
2275  RelationGetRelid(relation));
2276  index = (Form_pg_index) GETSTRUCT(tuple);
2277 
2278  /*
2279  * Basically, let's just copy all the bool fields. There are one or
2280  * two of these that can't actually change in the current code, but
2281  * it's not worth it to track exactly which ones they are. None of
2282  * the array fields are allowed to change, though.
2283  */
2284  relation->rd_index->indisunique = index->indisunique;
2285  relation->rd_index->indisprimary = index->indisprimary;
2286  relation->rd_index->indisexclusion = index->indisexclusion;
2287  relation->rd_index->indimmediate = index->indimmediate;
2288  relation->rd_index->indisclustered = index->indisclustered;
2289  relation->rd_index->indisvalid = index->indisvalid;
2290  relation->rd_index->indcheckxmin = index->indcheckxmin;
2291  relation->rd_index->indisready = index->indisready;
2292  relation->rd_index->indislive = index->indislive;
2293 
2294  /* Copy xmin too, as that is needed to make sense of indcheckxmin */
2296  HeapTupleHeaderGetXmin(tuple->t_data));
2297 
2298  ReleaseSysCache(tuple);
2299  }
2300 
2301  /* Okay, now it's valid again */
2302  relation->rd_isvalid = true;
2303 }
2304 
2305 /*
2306  * RelationDestroyRelation
2307  *
2308  * Physically delete a relation cache entry and all subsidiary data.
2309  * Caller must already have unhooked the entry from the hash table.
2310  */
2311 static void
2312 RelationDestroyRelation(Relation relation, bool remember_tupdesc)
2313 {
2315 
2316  /*
2317  * Make sure smgr and lower levels close the relation's files, if they
2318  * weren't closed already. (This was probably done by caller, but let's
2319  * just be real sure.)
2320  */
2321  RelationCloseSmgr(relation);
2322 
2323  /*
2324  * Free all the subsidiary data structures of the relcache entry, then the
2325  * entry itself.
2326  */
2327  if (relation->rd_rel)
2328  pfree(relation->rd_rel);
2329  /* can't use DecrTupleDescRefCount here */
2330  Assert(relation->rd_att->tdrefcount > 0);
2331  if (--relation->rd_att->tdrefcount == 0)
2332  {
2333  /*
2334  * If we Rebuilt a relcache entry during a transaction then its
2335  * possible we did that because the TupDesc changed as the result of
2336  * an ALTER TABLE that ran at less than AccessExclusiveLock. It's
2337  * possible someone copied that TupDesc, in which case the copy would
2338  * point to free'd memory. So if we rebuild an entry we keep the
2339  * TupDesc around until end of transaction, to be safe.
2340  */
2341  if (remember_tupdesc)
2343  else
2344  FreeTupleDesc(relation->rd_att);
2345  }
2346  FreeTriggerDesc(relation->trigdesc);
2347  list_free_deep(relation->rd_fkeylist);
2348  list_free(relation->rd_indexlist);
2349  bms_free(relation->rd_indexattr);
2350  bms_free(relation->rd_keyattr);
2351  bms_free(relation->rd_pkattr);
2352  bms_free(relation->rd_idattr);
2353  if (relation->rd_pubactions)
2354  pfree(relation->rd_pubactions);
2355  if (relation->rd_options)
2356  pfree(relation->rd_options);
2357  if (relation->rd_indextuple)
2358  pfree(relation->rd_indextuple);
2359  if (relation->rd_indexcxt)
2360  MemoryContextDelete(relation->rd_indexcxt);
2361  if (relation->rd_rulescxt)
2362  MemoryContextDelete(relation->rd_rulescxt);
2363  if (relation->rd_rsdesc)
2364  MemoryContextDelete(relation->rd_rsdesc->rscxt);
2365  if (relation->rd_partkeycxt)
2367  if (relation->rd_pdcxt)
2368  MemoryContextDelete(relation->rd_pdcxt);
2369  if (relation->rd_partcheck)
2370  pfree(relation->rd_partcheck);
2371  if (relation->rd_fdwroutine)
2372  pfree(relation->rd_fdwroutine);
2373  pfree(relation);
2374 }
2375 
2376 /*
2377  * RelationClearRelation
2378  *
2379  * Physically blow away a relation cache entry, or reset it and rebuild
2380  * it from scratch (that is, from catalog entries). The latter path is
2381  * used when we are notified of a change to an open relation (one with
2382  * refcount > 0).
2383  *
2384  * NB: when rebuilding, we'd better hold some lock on the relation,
2385  * else the catalog data we need to read could be changing under us.
2386  * Also, a rel to be rebuilt had better have refcnt > 0. This is because
2387  * an sinval reset could happen while we're accessing the catalogs, and
2388  * the rel would get blown away underneath us by RelationCacheInvalidate
2389  * if it has zero refcnt.
2390  *
2391  * The "rebuild" parameter is redundant in current usage because it has
2392  * to match the relation's refcnt status, but we keep it as a crosscheck
2393  * that we're doing what the caller expects.
2394  */
2395 static void
2396 RelationClearRelation(Relation relation, bool rebuild)
2397 {
2398  /*
2399  * As per notes above, a rel to be rebuilt MUST have refcnt > 0; while of
2400  * course it would be an equally bad idea to blow away one with nonzero
2401  * refcnt, since that would leave someone somewhere with a dangling
2402  * pointer. All callers are expected to have verified that this holds.
2403  */
2404  Assert(rebuild ?
2405  !RelationHasReferenceCountZero(relation) :
2406  RelationHasReferenceCountZero(relation));
2407 
2408  /*
2409  * Make sure smgr and lower levels close the relation's files, if they
2410  * weren't closed already. If the relation is not getting deleted, the
2411  * next smgr access should reopen the files automatically. This ensures
2412  * that the low-level file access state is updated after, say, a vacuum
2413  * truncation.
2414  */
2415  RelationCloseSmgr(relation);
2416 
2417  /*
2418  * Never, never ever blow away a nailed-in system relation, because we'd
2419  * be unable to recover. However, we must redo RelationInitPhysicalAddr
2420  * in case it is a mapped relation whose mapping changed.
2421  *
2422  * If it's a nailed-but-not-mapped index, then we need to re-read the
2423  * pg_class row to see if its relfilenode changed. We do that immediately
2424  * if we're inside a valid transaction and the relation is open (not
2425  * counting the nailed refcount). Otherwise just mark the entry as
2426  * possibly invalid, and it'll be fixed when next opened.
2427  */
2428  if (relation->rd_isnailed)
2429  {
2430  RelationInitPhysicalAddr(relation);
2431 
2432  if (relation->rd_rel->relkind == RELKIND_INDEX)
2433  {
2434  relation->rd_isvalid = false; /* needs to be revalidated */
2435  if (relation->rd_refcnt > 1 && IsTransactionState())
2436  RelationReloadIndexInfo(relation);
2437  }
2438  return;
2439  }
2440 
2441  /*
2442  * Even non-system indexes should not be blown away if they are open and
2443  * have valid index support information. This avoids problems with active
2444  * use of the index support information. As with nailed indexes, we
2445  * re-read the pg_class row to handle possible physical relocation of the
2446  * index, and we check for pg_index updates too.
2447  */
2448  if (relation->rd_rel->relkind == RELKIND_INDEX &&
2449  relation->rd_refcnt > 0 &&
2450  relation->rd_indexcxt != NULL)
2451  {
2452  relation->rd_isvalid = false; /* needs to be revalidated */
2453  if (IsTransactionState())
2454  RelationReloadIndexInfo(relation);
2455  return;
2456  }
2457 
2458  /* Mark it invalid until we've finished rebuild */
2459  relation->rd_isvalid = false;
2460 
2461  /*
2462  * If we're really done with the relcache entry, blow it away. But if
2463  * someone is still using it, reconstruct the whole deal without moving
2464  * the physical RelationData record (so that the someone's pointer is
2465  * still valid).
2466  */
2467  if (!rebuild)
2468  {
2469  /* Remove it from the hash table */
2470  RelationCacheDelete(relation);
2471 
2472  /* And release storage */
2473  RelationDestroyRelation(relation, false);
2474  }
2475  else if (!IsTransactionState())
2476  {
2477  /*
2478  * If we're not inside a valid transaction, we can't do any catalog
2479  * access so it's not possible to rebuild yet. Just exit, leaving
2480  * rd_isvalid = false so that the rebuild will occur when the entry is
2481  * next opened.
2482  *
2483  * Note: it's possible that we come here during subtransaction abort,
2484  * and the reason for wanting to rebuild is that the rel is open in
2485  * the outer transaction. In that case it might seem unsafe to not
2486  * rebuild immediately, since whatever code has the rel already open
2487  * will keep on using the relcache entry as-is. However, in such a
2488  * case the outer transaction should be holding a lock that's
2489  * sufficient to prevent any significant change in the rel's schema,
2490  * so the existing entry contents should be good enough for its
2491  * purposes; at worst we might be behind on statistics updates or the
2492  * like. (See also CheckTableNotInUse() and its callers.) These same
2493  * remarks also apply to the cases above where we exit without having
2494  * done RelationReloadIndexInfo() yet.
2495  */
2496  return;
2497  }
2498  else
2499  {
2500  /*
2501  * Our strategy for rebuilding an open relcache entry is to build a
2502  * new entry from scratch, swap its contents with the old entry, and
2503  * finally delete the new entry (along with any infrastructure swapped
2504  * over from the old entry). This is to avoid trouble in case an
2505  * error causes us to lose control partway through. The old entry
2506  * will still be marked !rd_isvalid, so we'll try to rebuild it again
2507  * on next access. Meanwhile it's not any less valid than it was
2508  * before, so any code that might expect to continue accessing it
2509  * isn't hurt by the rebuild failure. (Consider for example a
2510  * subtransaction that ALTERs a table and then gets canceled partway
2511  * through the cache entry rebuild. The outer transaction should
2512  * still see the not-modified cache entry as valid.) The worst
2513  * consequence of an error is leaking the necessarily-unreferenced new
2514  * entry, and this shouldn't happen often enough for that to be a big
2515  * problem.
2516  *
2517  * When rebuilding an open relcache entry, we must preserve ref count,
2518  * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
2519  * attempt to preserve the pg_class entry (rd_rel), tupledesc,
2520  * rewrite-rule, partition key, and partition descriptor substructures
2521  * in place, because various places assume that these structures won't
2522  * move while they are working with an open relcache entry. (Note:
2523  * the refcount mechanism for tupledescs might someday allow us to
2524  * remove this hack for the tupledesc.)
2525  *
2526  * Note that this process does not touch CurrentResourceOwner; which
2527  * is good because whatever ref counts the entry may have do not
2528  * necessarily belong to that resource owner.
2529  */
2530  Relation newrel;
2531  Oid save_relid = RelationGetRelid(relation);
2532  bool keep_tupdesc;
2533  bool keep_rules;
2534  bool keep_policies;
2535  bool keep_partkey;
2536  bool keep_partdesc;
2537 
2538  /* Build temporary entry, but don't link it into hashtable */
2539  newrel = RelationBuildDesc(save_relid, false);
2540  if (newrel == NULL)
2541  {
2542  /*
2543  * We can validly get here, if we're using a historic snapshot in
2544  * which a relation, accessed from outside logical decoding, is
2545  * still invisible. In that case it's fine to just mark the
2546  * relation as invalid and return - it'll fully get reloaded by
2547  * the cache reset at the end of logical decoding (or at the next
2548  * access). During normal processing we don't want to ignore this
2549  * case as it shouldn't happen there, as explained below.
2550  */
2551  if (HistoricSnapshotActive())
2552  return;
2553 
2554  /*
2555  * This shouldn't happen as dropping a relation is intended to be
2556  * impossible if still referenced (c.f. CheckTableNotInUse()). But
2557  * if we get here anyway, we can't just delete the relcache entry,
2558  * as it possibly could get accessed later (as e.g. the error
2559  * might get trapped and handled via a subtransaction rollback).
2560  */
2561  elog(ERROR, "relation %u deleted while still in use", save_relid);
2562  }
2563 
2564  keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
2565  keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
2566  keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
2567  keep_partkey = (relation->rd_partkey != NULL);
2568  keep_partdesc = equalPartitionDescs(relation->rd_partkey,
2569  relation->rd_partdesc,
2570  newrel->rd_partdesc);
2571 
2572  /*
2573  * Perform swapping of the relcache entry contents. Within this
2574  * process the old entry is momentarily invalid, so there *must* be no
2575  * possibility of CHECK_FOR_INTERRUPTS within this sequence. Do it in
2576  * all-in-line code for safety.
2577  *
2578  * Since the vast majority of fields should be swapped, our method is
2579  * to swap the whole structures and then re-swap those few fields we
2580  * didn't want swapped.
2581  */
2582 #define SWAPFIELD(fldtype, fldname) \
2583  do { \
2584  fldtype _tmp = newrel->fldname; \
2585  newrel->fldname = relation->fldname; \
2586  relation->fldname = _tmp; \
2587  } while (0)
2588 
2589  /* swap all Relation struct fields */
2590  {
2591  RelationData tmpstruct;
2592 
2593  memcpy(&tmpstruct, newrel, sizeof(RelationData));
2594  memcpy(newrel, relation, sizeof(RelationData));
2595  memcpy(relation, &tmpstruct, sizeof(RelationData));
2596  }
2597 
2598  /* rd_smgr must not be swapped, due to back-links from smgr level */
2599  SWAPFIELD(SMgrRelation, rd_smgr);
2600  /* rd_refcnt must be preserved */
2601  SWAPFIELD(int, rd_refcnt);
2602  /* isnailed shouldn't change */
2603  Assert(newrel->rd_isnailed == relation->rd_isnailed);
2604  /* creation sub-XIDs must be preserved */
2605  SWAPFIELD(SubTransactionId, rd_createSubid);
2606  SWAPFIELD(SubTransactionId, rd_newRelfilenodeSubid);
2607  /* un-swap rd_rel pointers, swap contents instead */
2608  SWAPFIELD(Form_pg_class, rd_rel);
2609  /* ... but actually, we don't have to update newrel->rd_rel */
2610  memcpy(relation->rd_rel, newrel->rd_rel, CLASS_TUPLE_SIZE);
2611  /* preserve old tupledesc and rules if no logical change */
2612  if (keep_tupdesc)
2613  SWAPFIELD(TupleDesc, rd_att);
2614  if (keep_rules)
2615  {
2616  SWAPFIELD(RuleLock *, rd_rules);
2617  SWAPFIELD(MemoryContext, rd_rulescxt);
2618  }
2619  if (keep_policies)
2620  SWAPFIELD(RowSecurityDesc *, rd_rsdesc);
2621  /* toast OID override must be preserved */
2622  SWAPFIELD(Oid, rd_toastoid);
2623  /* pgstat_info must be preserved */
2624  SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
2625  /* partition key must be preserved, if we have one */
2626  if (keep_partkey)
2627  {
2628  SWAPFIELD(PartitionKey, rd_partkey);
2629  SWAPFIELD(MemoryContext, rd_partkeycxt);
2630  }
2631  /* preserve old partdesc if no logical change */
2632  if (keep_partdesc)
2633  {
2634  SWAPFIELD(PartitionDesc, rd_partdesc);
2635  SWAPFIELD(MemoryContext, rd_pdcxt);
2636  }
2637 
2638 #undef SWAPFIELD
2639 
2640  /* And now we can throw away the temporary entry */
2641  RelationDestroyRelation(newrel, !keep_tupdesc);
2642  }
2643 }
2644 
2645 /*
2646  * RelationFlushRelation
2647  *
2648  * Rebuild the relation if it is open (refcount > 0), else blow it away.
2649  * This is used when we receive a cache invalidation event for the rel.
2650  */
2651 static void
2653 {
2654  if (relation->rd_createSubid != InvalidSubTransactionId ||
2656  {
2657  /*
2658  * New relcache entries are always rebuilt, not flushed; else we'd
2659  * forget the "new" status of the relation, which is a useful
2660  * optimization to have. Ditto for the new-relfilenode status.
2661  *
2662  * The rel could have zero refcnt here, so temporarily increment the
2663  * refcnt to ensure it's safe to rebuild it. We can assume that the
2664  * current transaction has some lock on the rel already.
2665  */
2667  RelationClearRelation(relation, true);
2669  }
2670  else
2671  {
2672  /*
2673  * Pre-existing rels can be dropped from the relcache if not open.
2674  */
2675  bool rebuild = !RelationHasReferenceCountZero(relation);
2676 
2677  RelationClearRelation(relation, rebuild);
2678  }
2679 }
2680 
2681 /*
2682  * RelationForgetRelation - unconditionally remove a relcache entry
2683  *
2684  * External interface for destroying a relcache entry when we
2685  * drop the relation.
2686  */
2687 void
2689 {
2690  Relation relation;
2691 
2692  RelationIdCacheLookup(rid, relation);
2693 
2694  if (!PointerIsValid(relation))
2695  return; /* not in cache, nothing to do */
2696 
2697  if (!RelationHasReferenceCountZero(relation))
2698  elog(ERROR, "relation %u is still open", rid);
2699 
2700  /* Unconditionally destroy the relcache entry */
2701  RelationClearRelation(relation, false);
2702 }
2703 
2704 /*
2705  * RelationCacheInvalidateEntry
2706  *
2707  * This routine is invoked for SI cache flush messages.
2708  *
2709  * Any relcache entry matching the relid must be flushed. (Note: caller has
2710  * already determined that the relid belongs to our database or is a shared
2711  * relation.)
2712  *
2713  * We used to skip local relations, on the grounds that they could
2714  * not be targets of cross-backend SI update messages; but it seems
2715  * safer to process them, so that our *own* SI update messages will
2716  * have the same effects during CommandCounterIncrement for both
2717  * local and nonlocal relations.
2718  */
2719 void
2721 {
2722  Relation relation;
2723 
2724  RelationIdCacheLookup(relationId, relation);
2725 
2726  if (PointerIsValid(relation))
2727  {
2729  RelationFlushRelation(relation);
2730  }
2731 }
2732 
2733 /*
2734  * RelationCacheInvalidate
2735  * Blow away cached relation descriptors that have zero reference counts,
2736  * and rebuild those with positive reference counts. Also reset the smgr
2737  * relation cache and re-read relation mapping data.
2738  *
2739  * This is currently used only to recover from SI message buffer overflow,
2740  * so we do not touch new-in-transaction relations; they cannot be targets
2741  * of cross-backend SI updates (and our own updates now go through a
2742  * separate linked list that isn't limited by the SI message buffer size).
2743  * Likewise, we need not discard new-relfilenode-in-transaction hints,
2744  * since any invalidation of those would be a local event.
2745  *
2746  * We do this in two phases: the first pass deletes deletable items, and
2747  * the second one rebuilds the rebuildable items. This is essential for
2748  * safety, because hash_seq_search only copes with concurrent deletion of
2749  * the element it is currently visiting. If a second SI overflow were to
2750  * occur while we are walking the table, resulting in recursive entry to
2751  * this routine, we could crash because the inner invocation blows away
2752  * the entry next to be visited by the outer scan. But this way is OK,
2753  * because (a) during the first pass we won't process any more SI messages,
2754  * so hash_seq_search will complete safely; (b) during the second pass we
2755  * only hold onto pointers to nondeletable entries.
2756  *
2757  * The two-phase approach also makes it easy to update relfilenodes for
2758  * mapped relations before we do anything else, and to ensure that the
2759  * second pass processes nailed-in-cache items before other nondeletable
2760  * items. This should ensure that system catalogs are up to date before
2761  * we attempt to use them to reload information about other open relations.
2762  */
2763 void
2765 {
2767  RelIdCacheEnt *idhentry;
2768  Relation relation;
2769  List *rebuildFirstList = NIL;
2770  List *rebuildList = NIL;
2771  ListCell *l;
2772 
2773  /*
2774  * Reload relation mapping data before starting to reconstruct cache.
2775  */
2777 
2778  /* Phase 1 */
2779  hash_seq_init(&status, RelationIdCache);
2780 
2781  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2782  {
2783  relation = idhentry->reldesc;
2784 
2785  /* Must close all smgr references to avoid leaving dangling ptrs */
2786  RelationCloseSmgr(relation);
2787 
2788  /*
2789  * Ignore new relations; no other backend will manipulate them before
2790  * we commit. Likewise, before replacing a relation's relfilenode, we
2791  * shall have acquired AccessExclusiveLock and drained any applicable
2792  * pending invalidations.
2793  */
2794  if (relation->rd_createSubid != InvalidSubTransactionId ||
2796  continue;
2797 
2799 
2800  if (RelationHasReferenceCountZero(relation))
2801  {
2802  /* Delete this entry immediately */
2803  Assert(!relation->rd_isnailed);
2804  RelationClearRelation(relation, false);
2805  }
2806  else
2807  {
2808  /*
2809  * If it's a mapped relation, immediately update its rd_node in
2810  * case its relfilenode changed. We must do this during phase 1
2811  * in case the relation is consulted during rebuild of other
2812  * relcache entries in phase 2. It's safe since consulting the
2813  * map doesn't involve any access to relcache entries.
2814  */
2815  if (RelationIsMapped(relation))
2816  RelationInitPhysicalAddr(relation);
2817 
2818  /*
2819  * Add this entry to list of stuff to rebuild in second pass.
2820  * pg_class goes to the front of rebuildFirstList while
2821  * pg_class_oid_index goes to the back of rebuildFirstList, so
2822  * they are done first and second respectively. Other nailed
2823  * relations go to the front of rebuildList, so they'll be done
2824  * next in no particular order; and everything else goes to the
2825  * back of rebuildList.
2826  */
2827  if (RelationGetRelid(relation) == RelationRelationId)
2828  rebuildFirstList = lcons(relation, rebuildFirstList);
2829  else if (RelationGetRelid(relation) == ClassOidIndexId)
2830  rebuildFirstList = lappend(rebuildFirstList, relation);
2831  else if (relation->rd_isnailed)
2832  rebuildList = lcons(relation, rebuildList);
2833  else
2834  rebuildList = lappend(rebuildList, relation);
2835  }
2836  }
2837 
2838  /*
2839  * Now zap any remaining smgr cache entries. This must happen before we
2840  * start to rebuild entries, since that may involve catalog fetches which
2841  * will re-open catalog files.
2842  */
2843  smgrcloseall();
2844 
2845  /* Phase 2: rebuild the items found to need rebuild in phase 1 */
2846  foreach(l, rebuildFirstList)
2847  {
2848  relation = (Relation) lfirst(l);
2849  RelationClearRelation(relation, true);
2850  }
2851  list_free(rebuildFirstList);
2852  foreach(l, rebuildList)
2853  {
2854  relation = (Relation) lfirst(l);
2855  RelationClearRelation(relation, true);
2856  }
2857  list_free(rebuildList);
2858 }
2859 
2860 /*
2861  * RelationCloseSmgrByOid - close a relcache entry's smgr link
2862  *
2863  * Needed in some cases where we are changing a relation's physical mapping.
2864  * The link will be automatically reopened on next use.
2865  */
2866 void
2868 {
2869  Relation relation;
2870 
2871  RelationIdCacheLookup(relationId, relation);
2872 
2873  if (!PointerIsValid(relation))
2874  return; /* not in cache, nothing to do */
2875 
2876  RelationCloseSmgr(relation);
2877 }
2878 
2879 static void
2881 {
2882  if (EOXactTupleDescArray == NULL)
2883  {
2884  MemoryContext oldcxt;
2885 
2887 
2888  EOXactTupleDescArray = (TupleDesc *) palloc(16 * sizeof(TupleDesc));
2891  MemoryContextSwitchTo(oldcxt);
2892  }
2894  {
2895  int32 newlen = EOXactTupleDescArrayLen * 2;
2896 
2898 
2899  EOXactTupleDescArray = (TupleDesc *) repalloc(EOXactTupleDescArray,
2900  newlen * sizeof(TupleDesc));
2901  EOXactTupleDescArrayLen = newlen;
2902  }
2903 
2904  EOXactTupleDescArray[NextEOXactTupleDescNum++] = td;
2905 }
2906 
2907 /*
2908  * AtEOXact_RelationCache
2909  *
2910  * Clean up the relcache at main-transaction commit or abort.
2911  *
2912  * Note: this must be called *before* processing invalidation messages.
2913  * In the case of abort, we don't want to try to rebuild any invalidated
2914  * cache entries (since we can't safely do database accesses). Therefore
2915  * we must reset refcnts before handling pending invalidations.
2916  *
2917  * As of PostgreSQL 8.1, relcache refcnts should get released by the
2918  * ResourceOwner mechanism. This routine just does a debugging
2919  * cross-check that no pins remain. However, we also need to do special
2920  * cleanup when the current transaction created any relations or made use
2921  * of forced index lists.
2922  */
2923 void
2925 {
2927  RelIdCacheEnt *idhentry;
2928  int i;
2929 
2930  /*
2931  * Unless the eoxact_list[] overflowed, we only need to examine the rels
2932  * listed in it. Otherwise fall back on a hash_seq_search scan.
2933  *
2934  * For simplicity, eoxact_list[] entries are not deleted till end of
2935  * top-level transaction, even though we could remove them at
2936  * subtransaction end in some cases, or remove relations from the list if
2937  * they are cleared for other reasons. Therefore we should expect the
2938  * case that list entries are not found in the hashtable; if not, there's
2939  * nothing to do for them.
2940  */
2942  {
2943  hash_seq_init(&status, RelationIdCache);
2944  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
2945  {
2946  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2947  }
2948  }
2949  else
2950  {
2951  for (i = 0; i < eoxact_list_len; i++)
2952  {
2953  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
2954  (void *) &eoxact_list[i],
2955  HASH_FIND,
2956  NULL);
2957  if (idhentry != NULL)
2958  AtEOXact_cleanup(idhentry->reldesc, isCommit);
2959  }
2960  }
2961 
2962  if (EOXactTupleDescArrayLen > 0)
2963  {
2964  Assert(EOXactTupleDescArray != NULL);
2965  for (i = 0; i < NextEOXactTupleDescNum; i++)
2966  FreeTupleDesc(EOXactTupleDescArray[i]);
2967  pfree(EOXactTupleDescArray);
2968  EOXactTupleDescArray = NULL;
2969  }
2970 
2971  /* Now we're out of the transaction and can clear the lists */
2972  eoxact_list_len = 0;
2973  eoxact_list_overflowed = false;
2976 }
2977 
2978 /*
2979  * AtEOXact_cleanup
2980  *
2981  * Clean up a single rel at main-transaction commit or abort
2982  *
2983  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
2984  * bother to prevent duplicate entries in eoxact_list[].
2985  */
2986 static void
2987 AtEOXact_cleanup(Relation relation, bool isCommit)
2988 {
2989  /*
2990  * The relcache entry's ref count should be back to its normal
2991  * not-in-a-transaction state: 0 unless it's nailed in cache.
2992  *
2993  * In bootstrap mode, this is NOT true, so don't check it --- the
2994  * bootstrap code expects relations to stay open across start/commit
2995  * transaction calls. (That seems bogus, but it's not worth fixing.)
2996  *
2997  * Note: ideally this check would be applied to every relcache entry, not
2998  * just those that have eoxact work to do. But it's not worth forcing a
2999  * scan of the whole relcache just for this. (Moreover, doing so would
3000  * mean that assert-enabled testing never tests the hash_search code path
3001  * above, which seems a bad idea.)
3002  */
3003 #ifdef USE_ASSERT_CHECKING
3005  {
3006  int expected_refcnt;
3007 
3008  expected_refcnt = relation->rd_isnailed ? 1 : 0;
3009  Assert(relation->rd_refcnt == expected_refcnt);
3010  }
3011 #endif
3012 
3013  /*
3014  * Is it a relation created in the current transaction?
3015  *
3016  * During commit, reset the flag to zero, since we are now out of the
3017  * creating transaction. During abort, simply delete the relcache entry
3018  * --- it isn't interesting any longer. (NOTE: if we have forgotten the
3019  * new-ness of a new relation due to a forced cache flush, the entry will
3020  * get deleted anyway by shared-cache-inval processing of the aborted
3021  * pg_class insertion.)
3022  */
3023  if (relation->rd_createSubid != InvalidSubTransactionId)
3024  {
3025  if (isCommit)
3027  else if (RelationHasReferenceCountZero(relation))
3028  {
3029  RelationClearRelation(relation, false);
3030  return;
3031  }
3032  else
3033  {
3034  /*
3035  * Hmm, somewhere there's a (leaked?) reference to the relation.
3036  * We daren't remove the entry for fear of dereferencing a
3037  * dangling pointer later. Bleat, and mark it as not belonging to
3038  * the current transaction. Hopefully it'll get cleaned up
3039  * eventually. This must be just a WARNING to avoid
3040  * error-during-error-recovery loops.
3041  */
3043  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3044  RelationGetRelationName(relation));
3045  }
3046  }
3047 
3048  /*
3049  * Likewise, reset the hint about the relfilenode being new.
3050  */
3052 
3053  /*
3054  * Flush any temporary index list.
3055  */
3056  if (relation->rd_indexvalid == 2)
3057  {
3058  list_free(relation->rd_indexlist);
3059  relation->rd_indexlist = NIL;
3060  relation->rd_oidindex = InvalidOid;
3061  relation->rd_pkindex = InvalidOid;
3062  relation->rd_replidindex = InvalidOid;
3063  relation->rd_indexvalid = 0;
3064  }
3065 }
3066 
3067 /*
3068  * AtEOSubXact_RelationCache
3069  *
3070  * Clean up the relcache at sub-transaction commit or abort.
3071  *
3072  * Note: this must be called *before* processing invalidation messages.
3073  */
3074 void
3076  SubTransactionId parentSubid)
3077 {
3079  RelIdCacheEnt *idhentry;
3080  int i;
3081 
3082  /*
3083  * Unless the eoxact_list[] overflowed, we only need to examine the rels
3084  * listed in it. Otherwise fall back on a hash_seq_search scan. Same
3085  * logic as in AtEOXact_RelationCache.
3086  */
3088  {
3089  hash_seq_init(&status, RelationIdCache);
3090  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3091  {
3092  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3093  mySubid, parentSubid);
3094  }
3095  }
3096  else
3097  {
3098  for (i = 0; i < eoxact_list_len; i++)
3099  {
3100  idhentry = (RelIdCacheEnt *) hash_search(RelationIdCache,
3101  (void *) &eoxact_list[i],
3102  HASH_FIND,
3103  NULL);
3104  if (idhentry != NULL)
3105  AtEOSubXact_cleanup(idhentry->reldesc, isCommit,
3106  mySubid, parentSubid);
3107  }
3108  }
3109 
3110  /* Don't reset the list; we still need more cleanup later */
3111 }
3112 
3113 /*
3114  * AtEOSubXact_cleanup
3115  *
3116  * Clean up a single rel at subtransaction commit or abort
3117  *
3118  * NB: this processing must be idempotent, because EOXactListAdd() doesn't
3119  * bother to prevent duplicate entries in eoxact_list[].
3120  */
3121 static void
3122 AtEOSubXact_cleanup(Relation relation, bool isCommit,
3123  SubTransactionId mySubid, SubTransactionId parentSubid)
3124 {
3125  /*
3126  * Is it a relation created in the current subtransaction?
3127  *
3128  * During subcommit, mark it as belonging to the parent, instead. During
3129  * subabort, simply delete the relcache entry.
3130  */
3131  if (relation->rd_createSubid == mySubid)
3132  {
3133  if (isCommit)
3134  relation->rd_createSubid = parentSubid;
3135  else if (RelationHasReferenceCountZero(relation))
3136  {
3137  RelationClearRelation(relation, false);
3138  return;
3139  }
3140  else
3141  {
3142  /*
3143  * Hmm, somewhere there's a (leaked?) reference to the relation.
3144  * We daren't remove the entry for fear of dereferencing a
3145  * dangling pointer later. Bleat, and transfer it to the parent
3146  * subtransaction so we can try again later. This must be just a
3147  * WARNING to avoid error-during-error-recovery loops.
3148  */
3149  relation->rd_createSubid = parentSubid;
3150  elog(WARNING, "cannot remove relcache entry for \"%s\" because it has nonzero refcount",
3151  RelationGetRelationName(relation));
3152  }
3153  }
3154 
3155  /*
3156  * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
3157  */
3158  if (relation->rd_newRelfilenodeSubid == mySubid)
3159  {
3160  if (isCommit)
3161  relation->rd_newRelfilenodeSubid = parentSubid;
3162  else
3164  }
3165 
3166  /*
3167  * Flush any temporary index list.
3168  */
3169  if (relation->rd_indexvalid == 2)
3170  {
3171  list_free(relation->rd_indexlist);
3172  relation->rd_indexlist = NIL;
3173  relation->rd_oidindex = InvalidOid;
3174  relation->rd_pkindex = InvalidOid;
3175  relation->rd_replidindex = InvalidOid;
3176  relation->rd_indexvalid = 0;
3177  }
3178 }
3179 
3180 
3181 /*
3182  * RelationBuildLocalRelation
3183  * Build a relcache entry for an about-to-be-created relation,
3184  * and enter it into the relcache.
3185  */
3186 Relation
3187 RelationBuildLocalRelation(const char *relname,
3188  Oid relnamespace,
3189  TupleDesc tupDesc,
3190  Oid relid,
3191  Oid relfilenode,
3192  Oid reltablespace,
3193  bool shared_relation,
3194  bool mapped_relation,
3195  char relpersistence,
3196  char relkind)
3197 {
3198  Relation rel;
3199  MemoryContext oldcxt;
3200  int natts = tupDesc->natts;
3201  int i;
3202  bool has_not_null;
3203  bool nailit;
3204 
3205  AssertArg(natts >= 0);
3206 
3207  /*
3208  * check for creation of a rel that must be nailed in cache.
3209  *
3210  * XXX this list had better match the relations specially handled in
3211  * RelationCacheInitializePhase2/3.
3212  */
3213  switch (relid)
3214  {
3215  case DatabaseRelationId:
3216  case AuthIdRelationId:
3217  case AuthMemRelationId:
3218  case RelationRelationId:
3219  case AttributeRelationId:
3220  case ProcedureRelationId:
3221  case TypeRelationId:
3222  nailit = true;
3223  break;
3224  default:
3225  nailit = false;
3226  break;
3227  }
3228 
3229  /*
3230  * check that hardwired list of shared rels matches what's in the
3231  * bootstrap .bki file. If you get a failure here during initdb, you
3232  * probably need to fix IsSharedRelation() to match whatever you've done
3233  * to the set of shared relations.
3234  */
3235  if (shared_relation != IsSharedRelation(relid))
3236  elog(ERROR, "shared_relation flag for \"%s\" does not match IsSharedRelation(%u)",
3237  relname, relid);
3238 
3239  /* Shared relations had better be mapped, too */
3240  Assert(mapped_relation || !shared_relation);
3241 
3242  /*
3243  * switch to the cache context to create the relcache entry.
3244  */
3245  if (!CacheMemoryContext)
3247 
3249 
3250  /*
3251  * allocate a new relation descriptor and fill in basic state fields.
3252  */
3253  rel = (Relation) palloc0(sizeof(RelationData));
3254 
3255  /* make sure relation is marked as having no open file yet */
3256  rel->rd_smgr = NULL;
3257 
3258  /* mark it nailed if appropriate */
3259  rel->rd_isnailed = nailit;
3260 
3261  rel->rd_refcnt = nailit ? 1 : 0;
3262 
3263  /* it's being created in this transaction */
3266 
3267  /*
3268  * create a new tuple descriptor from the one passed in. We do this
3269  * partly to copy it into the cache context, and partly because the new
3270  * relation can't have any defaults or constraints yet; they have to be
3271  * added in later steps, because they require additions to multiple system
3272  * catalogs. We can copy attnotnull constraints here, however.
3273  */
3274  rel->rd_att = CreateTupleDescCopy(tupDesc);
3275  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
3276  has_not_null = false;
3277  for (i = 0; i < natts; i++)
3278  {
3279  Form_pg_attribute satt = TupleDescAttr(tupDesc, i);
3280  Form_pg_attribute datt = TupleDescAttr(rel->rd_att, i);
3281 
3282  datt->attidentity = satt->attidentity;
3283  datt->attnotnull = satt->attnotnull;
3284  has_not_null |= satt->attnotnull;
3285  }
3286 
3287  if (has_not_null)
3288  {
3289  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
3290 
3291  constr->has_not_null = true;
3292  rel->rd_att->constr = constr;
3293  }
3294 
3295  /*
3296  * initialize relation tuple form (caller may add/override data later)
3297  */
3299 
3300  namestrcpy(&rel->rd_rel->relname, relname);
3301  rel->rd_rel->relnamespace = relnamespace;
3302 
3303  rel->rd_rel->relkind = relkind;
3304  rel->rd_rel->relhasoids = rel->rd_att->tdhasoid;
3305  rel->rd_rel->relnatts = natts;
3306  rel->rd_rel->reltype = InvalidOid;
3307  /* needed when bootstrapping: */
3308  rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
3309 
3310  /* set up persistence and relcache fields dependent on it */
3311  rel->rd_rel->relpersistence = relpersistence;
3312  switch (relpersistence)
3313  {
3317  rel->rd_islocaltemp = false;
3318  break;
3319  case RELPERSISTENCE_TEMP:
3320  Assert(isTempOrTempToastNamespace(relnamespace));
3322  rel->rd_islocaltemp = true;
3323  break;
3324  default:
3325  elog(ERROR, "invalid relpersistence: %c", relpersistence);
3326  break;
3327  }
3328 
3329  /* if it's a materialized view, it's not populated initially */
3330  if (relkind == RELKIND_MATVIEW)
3331  rel->rd_rel->relispopulated = false;
3332  else
3333  rel->rd_rel->relispopulated = true;
3334 
3335  /* system relations and non-table objects don't have one */
3336  if (!IsSystemNamespace(relnamespace) &&
3337  (relkind == RELKIND_RELATION ||
3338  relkind == RELKIND_MATVIEW ||
3339  relkind == RELKIND_PARTITIONED_TABLE))
3340  rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
3341  else
3342  rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
3343 
3344  /*
3345  * Insert relation physical and logical identifiers (OIDs) into the right
3346  * places. For a mapped relation, we set relfilenode to zero and rely on
3347  * RelationInitPhysicalAddr to consult the map.
3348  */
3349  rel->rd_rel->relisshared = shared_relation;
3350 
3351  RelationGetRelid(rel) = relid;
3352 
3353  for (i = 0; i < natts; i++)
3354  TupleDescAttr(rel->rd_att, i)->attrelid = relid;
3355 
3356  rel->rd_rel->reltablespace = reltablespace;
3357 
3358  if (mapped_relation)
3359  {
3360  rel->rd_rel->relfilenode = InvalidOid;
3361  /* Add it to the active mapping information */
3362  RelationMapUpdateMap(relid, relfilenode, shared_relation, true);
3363  }
3364  else
3365  rel->rd_rel->relfilenode = relfilenode;
3366 
3367  RelationInitLockInfo(rel); /* see lmgr.c */
3368 
3370 
3371  /*
3372  * Okay to insert into the relcache hash table.
3373  *
3374  * Ordinarily, there should certainly not be an existing hash entry for
3375  * the same OID; but during bootstrap, when we create a "real" relcache
3376  * entry for one of the bootstrap relations, we'll be overwriting the
3377  * phony one created with formrdesc. So allow that to happen for nailed
3378  * rels.
3379  */
3380  RelationCacheInsert(rel, nailit);
3381 
3382  /*
3383  * Flag relation as needing eoxact cleanup (to clear rd_createSubid). We
3384  * can't do this before storing relid in it.
3385  */
3386  EOXactListAdd(rel);
3387 
3388  /*
3389  * done building relcache entry.
3390  */
3391  MemoryContextSwitchTo(oldcxt);
3392 
3393  /* It's fully valid */
3394  rel->rd_isvalid = true;
3395 
3396  /*
3397  * Caller expects us to pin the returned entry.
3398  */
3400 
3401  return rel;
3402 }
3403 
3404 
3405 /*
3406  * RelationSetNewRelfilenode
3407  *
3408  * Assign a new relfilenode (physical file name) to the relation.
3409  *
3410  * This allows a full rewrite of the relation to be done with transactional
3411  * safety (since the filenode assignment can be rolled back). Note however
3412  * that there is no simple way to access the relation's old data for the
3413  * remainder of the current transaction. This limits the usefulness to cases
3414  * such as TRUNCATE or rebuilding an index from scratch.
3415  *
3416  * Caller must already hold exclusive lock on the relation.
3417  *
3418  * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId
3419  * must be passed for indexes and sequences). This should be a lower bound on
3420  * the XIDs that will be put into the new relation contents.
3421  *
3422  * The new filenode's persistence is set to the given value. This is useful
3423  * for the cases that are changing the relation's persistence; other callers
3424  * need to pass the original relpersistence value.
3425  */
3426 void
3427 RelationSetNewRelfilenode(Relation relation, char persistence,
3428  TransactionId freezeXid, MultiXactId minmulti)
3429 {
3430  Oid newrelfilenode;
3431  RelFileNodeBackend newrnode;
3432  Relation pg_class;
3433  HeapTuple tuple;
3434  Form_pg_class classform;
3435 
3436  /* Indexes, sequences must have Invalid frozenxid; other rels must not */
3437  Assert((relation->rd_rel->relkind == RELKIND_INDEX ||
3438  relation->rd_rel->relkind == RELKIND_SEQUENCE) ?
3439  freezeXid == InvalidTransactionId :
3440  TransactionIdIsNormal(freezeXid));
3441  Assert(TransactionIdIsNormal(freezeXid) == MultiXactIdIsValid(minmulti));
3442 
3443  /* Allocate a new relfilenode */
3444  newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
3445  persistence);
3446 
3447  /*
3448  * Get a writable copy of the pg_class tuple for the given relation.
3449  */
3451 
3452  tuple = SearchSysCacheCopy1(RELOID,
3453  ObjectIdGetDatum(RelationGetRelid(relation)));
3454  if (!HeapTupleIsValid(tuple))
3455  elog(ERROR, "could not find tuple for relation %u",
3456  RelationGetRelid(relation));
3457  classform = (Form_pg_class) GETSTRUCT(tuple);
3458 
3459  /*
3460  * Create storage for the main fork of the new relfilenode.
3461  *
3462  * NOTE: any conflict in relfilenode value will be caught here, if
3463  * GetNewRelFileNode messes up for any reason.
3464  */
3465  newrnode.node = relation->rd_node;
3466  newrnode.node.relNode = newrelfilenode;
3467  newrnode.backend = relation->rd_backend;
3468  RelationCreateStorage(newrnode.node, persistence);
3469  smgrclosenode(newrnode);
3470 
3471  /*
3472  * Schedule unlinking of the old storage at transaction commit.
3473  */
3474  RelationDropStorage(relation);
3475 
3476  /*
3477  * Now update the pg_class row. However, if we're dealing with a mapped
3478  * index, pg_class.relfilenode doesn't change; instead we have to send the
3479  * update to the relation mapper.
3480  */
3481  if (RelationIsMapped(relation))
3483  newrelfilenode,
3484  relation->rd_rel->relisshared,
3485  false);
3486  else
3487  classform->relfilenode = newrelfilenode;
3488 
3489  /* These changes are safe even for a mapped relation */
3490  if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
3491  {
3492  classform->relpages = 0; /* it's empty until further notice */
3493  classform->reltuples = 0;
3494  classform->relallvisible = 0;
3495  }
3496  classform->relfrozenxid = freezeXid;
3497  classform->relminmxid = minmulti;
3498  classform->relpersistence = persistence;
3499 
3500  CatalogTupleUpdate(pg_class, &tuple->t_self, tuple);
3501 
3502  heap_freetuple(tuple);
3503 
3504  heap_close(pg_class, RowExclusiveLock);
3505 
3506  /*
3507  * Make the pg_class row change visible, as well as the relation map
3508  * change if any. This will cause the relcache entry to get updated, too.
3509  */
3511 
3512  /*
3513  * Mark the rel as having been given a new relfilenode in the current
3514  * (sub) transaction. This is a hint that can be used to optimize later
3515  * operations on the rel in the same transaction.
3516  */
3518 
3519  /* Flag relation as needing eoxact cleanup (to remove the hint) */
3520  EOXactListAdd(relation);
3521 }
3522 
3523 
3524 /*
3525  * RelationCacheInitialize
3526  *
3527  * This initializes the relation descriptor cache. At the time
3528  * that this is invoked, we can't do database access yet (mainly
3529  * because the transaction subsystem is not up); all we are doing
3530  * is making an empty cache hashtable. This must be done before
3531  * starting the initialization transaction, because otherwise
3532  * AtEOXact_RelationCache would crash if that transaction aborts
3533  * before we can get the relcache set up.
3534  */
3535 
3536 #define INITRELCACHESIZE 400
3537 
3538 void
3540 {
3541  HASHCTL ctl;
3542 
3543  /*
3544  * make sure cache memory context exists
3545  */
3546  if (!CacheMemoryContext)
3548 
3549  /*
3550  * create hashtable that indexes the relcache
3551  */
3552  MemSet(&ctl, 0, sizeof(ctl));
3553  ctl.keysize = sizeof(Oid);
3554  ctl.entrysize = sizeof(RelIdCacheEnt);
3555  RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
3556  &ctl, HASH_ELEM | HASH_BLOBS);
3557 
3558  /*
3559  * relation mapper needs to be initialized too
3560  */
3562 }
3563 
3564 /*
3565  * RelationCacheInitializePhase2
3566  *
3567  * This is called to prepare for access to shared catalogs during startup.
3568  * We must at least set up nailed reldescs for pg_database, pg_authid,
3569  * pg_auth_members, and pg_shseclabel. Ideally we'd like to have reldescs
3570  * for their indexes, too. We attempt to load this information from the
3571  * shared relcache init file. If that's missing or broken, just make
3572  * phony entries for the catalogs themselves.
3573  * RelationCacheInitializePhase3 will clean up as needed.
3574  */
3575 void
3577 {
3578  MemoryContext oldcxt;
3579 
3580  /*
3581  * relation mapper needs initialized too
3582  */
3584 
3585  /*
3586  * In bootstrap mode, the shared catalogs aren't there yet anyway, so do
3587  * nothing.
3588  */
3590  return;
3591 
3592  /*
3593  * switch to cache memory context
3594  */
3596 
3597  /*
3598  * Try to load the shared relcache cache file. If unsuccessful, bootstrap
3599  * the cache with pre-made descriptors for the critical shared catalogs.
3600  */
3601  if (!load_relcache_init_file(true))
3602  {
3603  formrdesc("pg_database", DatabaseRelation_Rowtype_Id, true,
3605  formrdesc("pg_authid", AuthIdRelation_Rowtype_Id, true,
3607  formrdesc("pg_auth_members", AuthMemRelation_Rowtype_Id, true,
3609  formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
3611  formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
3613 
3614 #define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
3615  }
3616 
3617  MemoryContextSwitchTo(oldcxt);
3618 }
3619 
3620 /*
3621  * RelationCacheInitializePhase3
3622  *
3623  * This is called as soon as the catcache and transaction system
3624  * are functional and we have determined MyDatabaseId. At this point
3625  * we can actually read data from the database's system catalogs.
3626  * We first try to read pre-computed relcache entries from the local
3627  * relcache init file. If that's missing or broken, make phony entries
3628  * for the minimum set of nailed-in-cache relations. Then (unless
3629  * bootstrapping) make sure we have entries for the critical system
3630  * indexes. Once we've done all this, we have enough infrastructure to
3631  * open any system catalog or use any catcache. The last step is to
3632  * rewrite the cache files if needed.
3633  */
3634 void
3636 {
3638  RelIdCacheEnt *idhentry;
3639  MemoryContext oldcxt;
3640  bool needNewCacheFile = !criticalSharedRelcachesBuilt;
3641 
3642  /*
3643  * relation mapper needs initialized too
3644  */
3646 
3647  /*
3648  * switch to cache memory context
3649  */
3651 
3652  /*
3653  * Try to load the local relcache cache file. If unsuccessful, bootstrap
3654  * the cache with pre-made descriptors for the critical "nailed-in" system
3655  * catalogs.
3656  */
3657  if (IsBootstrapProcessingMode() ||
3658  !load_relcache_init_file(false))
3659  {
3660  needNewCacheFile = true;
3661 
3662  formrdesc("pg_class", RelationRelation_Rowtype_Id, false,
3663  true, Natts_pg_class, Desc_pg_class);
3664  formrdesc("pg_attribute", AttributeRelation_Rowtype_Id, false,
3666  formrdesc("pg_proc", ProcedureRelation_Rowtype_Id, false,
3667  true, Natts_pg_proc, Desc_pg_proc);
3668  formrdesc("pg_type", TypeRelation_Rowtype_Id, false,
3669  true, Natts_pg_type, Desc_pg_type);
3670 
3671 #define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
3672  }
3673 
3674  MemoryContextSwitchTo(oldcxt);
3675 
3676  /* In bootstrap mode, the faked-up formrdesc info is all we'll have */
3678  return;
3679 
3680  /*
3681  * If we didn't get the critical system indexes loaded into relcache, do
3682  * so now. These are critical because the catcache and/or opclass cache
3683  * depend on them for fetches done during relcache load. Thus, we have an
3684  * infinite-recursion problem. We can break the recursion by doing
3685  * heapscans instead of indexscans at certain key spots. To avoid hobbling
3686  * performance, we only want to do that until we have the critical indexes
3687  * loaded into relcache. Thus, the flag criticalRelcachesBuilt is used to
3688  * decide whether to do heapscan or indexscan at the key spots, and we set
3689  * it true after we've loaded the critical indexes.
3690  *
3691  * The critical indexes are marked as "nailed in cache", partly to make it
3692  * easy for load_relcache_init_file to count them, but mainly because we
3693  * cannot flush and rebuild them once we've set criticalRelcachesBuilt to
3694  * true. (NOTE: perhaps it would be possible to reload them by
3695  * temporarily setting criticalRelcachesBuilt to false again. For now,
3696  * though, we just nail 'em in.)
3697  *
3698  * RewriteRelRulenameIndexId and TriggerRelidNameIndexId are not critical
3699  * in the same way as the others, because the critical catalogs don't
3700  * (currently) have any rules or triggers, and so these indexes can be
3701  * rebuilt without inducing recursion. However they are used during
3702  * relcache load when a rel does have rules or triggers, so we choose to
3703  * nail them for performance reasons.
3704  */
3706  {
3712  IndexRelationId);
3721 
3722 #define NUM_CRITICAL_LOCAL_INDEXES 7 /* fix if you change list above */
3723 
3724  criticalRelcachesBuilt = true;
3725  }
3726 
3727  /*
3728  * Process critical shared indexes too.
3729  *
3730  * DatabaseNameIndexId isn't critical for relcache loading, but rather for
3731  * initial lookup of MyDatabaseId, without which we'll never find any
3732  * non-shared catalogs at all. Autovacuum calls InitPostgres with a
3733  * database OID, so it instead depends on DatabaseOidIndexId. We also
3734  * need to nail up some indexes on pg_authid and pg_auth_members for use
3735  * during client authentication. SharedSecLabelObjectIndexId isn't
3736  * critical for the core system, but authentication hooks might be
3737  * interested in it.
3738  */
3740  {
3753 
3754 #define NUM_CRITICAL_SHARED_INDEXES 6 /* fix if you change list above */
3755 
3757  }
3758 
3759  /*
3760  * Now, scan all the relcache entries and update anything that might be
3761  * wrong in the results from formrdesc or the relcache cache file. If we
3762  * faked up relcache entries using formrdesc, then read the real pg_class
3763  * rows and replace the fake entries with them. Also, if any of the
3764  * relcache entries have rules, triggers, or security policies, load that
3765  * info the hard way since it isn't recorded in the cache file.
3766  *
3767  * Whenever we access the catalogs to read data, there is a possibility of
3768  * a shared-inval cache flush causing relcache entries to be removed.
3769  * Since hash_seq_search only guarantees to still work after the *current*
3770  * entry is removed, it's unsafe to continue the hashtable scan afterward.
3771  * We handle this by restarting the scan from scratch after each access.
3772  * This is theoretically O(N^2), but the number of entries that actually
3773  * need to be fixed is small enough that it doesn't matter.
3774  */
3775  hash_seq_init(&status, RelationIdCache);
3776 
3777  while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
3778  {
3779  Relation relation = idhentry->reldesc;
3780  bool restart = false;
3781 
3782  /*
3783  * Make sure *this* entry doesn't get flushed while we work with it.
3784  */
3786 
3787  /*
3788  * If it's a faked-up entry, read the real pg_class tuple.
3789  */
3790  if (relation->rd_rel->relowner == InvalidOid)
3791  {
3792  HeapTuple htup;
3793  Form_pg_class relp;
3794 
3795  htup = SearchSysCache1(RELOID,
3796  ObjectIdGetDatum(RelationGetRelid(relation)));
3797  if (!HeapTupleIsValid(htup))
3798  elog(FATAL, "cache lookup failed for relation %u",
3799  RelationGetRelid(relation));
3800  relp = (Form_pg_class) GETSTRUCT(htup);
3801 
3802  /*
3803  * Copy tuple to relation->rd_rel. (See notes in
3804  * AllocateRelationDesc())
3805  */
3806  memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
3807 
3808  /* Update rd_options while we have the tuple */
3809  if (relation->rd_options)
3810  pfree(relation->rd_options);
3811  RelationParseRelOptions(relation, htup);
3812 
3813  /*
3814  * Check the values in rd_att were set up correctly. (We cannot
3815  * just copy them over now: formrdesc must have set up the rd_att
3816  * data correctly to start with, because it may already have been
3817  * copied into one or more catcache entries.)
3818  */
3819  Assert(relation->rd_att->tdtypeid == relp->reltype);
3820  Assert(relation->rd_att->tdtypmod == -1);
3821  Assert(relation->rd_att->tdhasoid == relp->relhasoids);
3822 
3823  ReleaseSysCache(htup);
3824 
3825  /* relowner had better be OK now, else we'll loop forever */
3826  if (relation->rd_rel->relowner == InvalidOid)
3827  elog(ERROR, "invalid relowner in pg_class entry for \"%s\"",
3828  RelationGetRelationName(relation));
3829 
3830  restart = true;
3831  }
3832 
3833  /*
3834  * Fix data that isn't saved in relcache cache file.
3835  *
3836  * relhasrules or relhastriggers could possibly be wrong or out of
3837  * date. If we don't actually find any rules or triggers, clear the
3838  * local copy of the flag so that we don't get into an infinite loop
3839  * here. We don't make any attempt to fix the pg_class entry, though.
3840  */
3841  if (relation->rd_rel->relhasrules && relation->rd_rules == NULL)
3842  {
3843  RelationBuildRuleLock(relation);
3844  if (relation->rd_rules == NULL)
3845  relation->rd_rel->relhasrules = false;
3846  restart = true;
3847  }
3848  if (relation->rd_rel->relhastriggers && relation->trigdesc == NULL)
3849  {
3850  RelationBuildTriggers(relation);
3851  if (relation->trigdesc == NULL)
3852  relation->rd_rel->relhastriggers = false;
3853  restart = true;
3854  }
3855 
3856  /*
3857  * Re-load the row security policies if the relation has them, since
3858  * they are not preserved in the cache. Note that we can never NOT
3859  * have a policy while relrowsecurity is true,
3860  * RelationBuildRowSecurity will create a single default-deny policy
3861  * if there is no policy defined in pg_policy.
3862  */
3863  if (relation->rd_rel->relrowsecurity && relation->rd_rsdesc == NULL)
3864  {
3865  RelationBuildRowSecurity(relation);
3866 
3867  Assert(relation->rd_rsdesc != NULL);
3868  restart = true;
3869  }
3870 
3871  /*
3872  * Reload the partition key and descriptor for a partitioned table.
3873  */
3874  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3875  relation->rd_partkey == NULL)
3876  {
3877  RelationBuildPartitionKey(relation);
3878  Assert(relation->rd_partkey != NULL);
3879 
3880  restart = true;
3881  }
3882 
3883  if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
3884  relation->rd_partdesc == NULL)
3885  {
3886  RelationBuildPartitionDesc(relation);
3887  Assert(relation->rd_partdesc != NULL);
3888 
3889  restart = true;
3890  }
3891 
3892  /* Release hold on the relation */
3894 
3895  /* Now, restart the hashtable scan if needed */
3896  if (restart)
3897  {
3898  hash_seq_term(&status);
3899  hash_seq_init(&status, RelationIdCache);
3900  }
3901  }
3902 
3903  /*
3904  * Lastly, write out new relcache cache files if needed. We don't bother
3905  * to distinguish cases where only one of the two needs an update.
3906  */
3907  if (needNewCacheFile)
3908  {
3909  /*
3910  * Force all the catcaches to finish initializing and thereby open the
3911  * catalogs and indexes they use. This will preload the relcache with
3912  * entries for all the most important system catalogs and indexes, so
3913  * that the init files will be most useful for future backends.
3914  */
3916 
3917  /* now write the files */
3919  write_relcache_init_file(false);
3920  }
3921 }
3922 
3923 /*
3924  * Load one critical system index into the relcache
3925  *
3926  * indexoid is the OID of the target index, heapoid is the OID of the catalog
3927  * it belongs to.
3928  */
3929 static void
3930 load_critical_index(Oid indexoid, Oid heapoid)
3931 {
3932  Relation ird;
3933 
3934  /*
3935  * We must lock the underlying catalog before locking the index to avoid
3936  * deadlock, since RelationBuildDesc might well need to read the catalog,
3937  * and if anyone else is exclusive-locking this catalog and index they'll
3938  * be doing it in that order.
3939  */
3940  LockRelationOid(heapoid, AccessShareLock);
3941  LockRelationOid(indexoid, AccessShareLock);
3942  ird = RelationBuildDesc(indexoid, true);
3943  if (ird == NULL)
3944  elog(PANIC, "could not open critical system index %u", indexoid);
3945  ird->rd_isnailed = true;
3946  ird->rd_refcnt = 1;
3949 }
3950 
3951 /*
3952  * GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
3953  * GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
3954  *
3955  * We need this kluge because we have to be able to access non-fixed-width
3956  * fields of pg_class and pg_index before we have the standard catalog caches
3957  * available. We use predefined data that's set up in just the same way as
3958  * the bootstrapped reldescs used by formrdesc(). The resulting tupdesc is
3959  * not 100% kosher: it does not have the correct rowtype OID in tdtypeid, nor
3960  * does it have a TupleConstr field. But it's good enough for the purpose of
3961  * extracting fields.
3962  */
3963 static TupleDesc
3965  bool hasoids)
3966 {
3967  TupleDesc result;
3968  MemoryContext oldcxt;
3969  int i;
3970 
3972 
3973  result = CreateTemplateTupleDesc(natts, hasoids);
3974  result->tdtypeid = RECORDOID; /* not right, but we don't care */
3975  result->tdtypmod = -1;
3976 
3977  for (i = 0; i < natts; i++)
3978  {
3979  memcpy(TupleDescAttr(result, i), &attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
3980  /* make sure attcacheoff is valid */
3981  TupleDescAttr(result, i)->attcacheoff = -1;
3982  }
3983 
3984  /* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
3985  TupleDescAttr(result, 0)->attcacheoff = 0;
3986 
3987  /* Note: we don't bother to set up a TupleConstr entry */
3988 
3989  MemoryContextSwitchTo(oldcxt);
3990 
3991  return result;
3992 }
3993 
3994 static TupleDesc
3996 {
3997  static TupleDesc pgclassdesc = NULL;
3998 
3999  /* Already done? */
4000  if (pgclassdesc == NULL)
4002  Desc_pg_class,
4003  true);
4004 
4005  return pgclassdesc;
4006 }
4007 
4008 static TupleDesc
4010 {
4011  static TupleDesc pgindexdesc = NULL;
4012 
4013  /* Already done? */
4014  if (pgindexdesc == NULL)
4016  Desc_pg_index,
4017  false);
4018 
4019  return pgindexdesc;
4020 }
4021 
4022 /*
4023  * Load any default attribute value definitions for the relation.
4024  */
4025 static void
4027 {
4028  AttrDefault *attrdef = relation->rd_att->constr->defval;
4029  int ndef = relation->rd_att->constr->num_defval;
4030  Relation adrel;
4031  SysScanDesc adscan;
4032  ScanKeyData skey;
4033  HeapTuple htup;
4034  Datum val;
4035  bool isnull;
4036  int found;
4037  int i;
4038 
4039  ScanKeyInit(&skey,
4041  BTEqualStrategyNumber, F_OIDEQ,
4042  ObjectIdGetDatum(RelationGetRelid(relation)));
4043 
4045  adscan = systable_beginscan(adrel, AttrDefaultIndexId, true,
4046  NULL, 1, &skey);
4047  found = 0;
4048 
4049  while (HeapTupleIsValid(htup = systable_getnext(adscan)))
4050  {
4051  Form_pg_attrdef adform = (Form_pg_attrdef) GETSTRUCT(htup);
4052  Form_pg_attribute attr = TupleDescAttr(relation->rd_att, adform->adnum - 1);
4053 
4054  for (i = 0; i < ndef; i++)
4055  {
4056  if (adform->adnum != attrdef[i].adnum)
4057  continue;
4058  if (attrdef[i].adbin != NULL)
4059  elog(WARNING, "multiple attrdef records found for attr %s of rel %s",
4060  NameStr(attr->attname),
4061  RelationGetRelationName(relation));
4062  else
4063  found++;
4064 
4065  val = fastgetattr(htup,
4067  adrel->rd_att, &isnull);
4068  if (isnull)
4069  elog(WARNING, "null adbin for attr %s of rel %s",
4070  NameStr(attr->attname),
4071  RelationGetRelationName(relation));
4072  else
4073  {
4074  /* detoast and convert to cstring in caller's context */
4075  char *s = TextDatumGetCString(val);
4076 
4078  pfree(s);
4079  }
4080  break;
4081  }
4082 
4083  if (i >= ndef)
4084  elog(WARNING, "unexpected attrdef record found for attr %d of rel %s",
4085  adform->adnum, RelationGetRelationName(relation));
4086  }
4087 
4088  systable_endscan(adscan);
4089  heap_close(adrel, AccessShareLock);
4090 
4091  if (found != ndef)
4092  elog(WARNING, "%d attrdef record(s) missing for rel %s",
4093  ndef - found, RelationGetRelationName(relation));
4094 }
4095 
4096 /*
4097  * Load any check constraints for the relation.
4098  */
4099 static void
4101 {
4102  ConstrCheck *check = relation->rd_att->constr->check;
4103  int ncheck = relation->rd_att->constr->num_check;
4104  Relation conrel;
4105  SysScanDesc conscan;
4106  ScanKeyData skey[1];
4107  HeapTuple htup;
4108  int found = 0;
4109 
4110  ScanKeyInit(&skey[0],
4112  BTEqualStrategyNumber, F_OIDEQ,
4113  ObjectIdGetDatum(RelationGetRelid(relation)));
4114 
4116  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4117  NULL, 1, skey);
4118 
4119  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4120  {
4122  Datum val;
4123  bool isnull;
4124  char *s;
4125 
4126  /* We want check constraints only */
4127  if (conform->contype != CONSTRAINT_CHECK)
4128  continue;
4129 
4130  if (found >= ncheck)
4131  elog(ERROR, "unexpected constraint record found for rel %s",
4132  RelationGetRelationName(relation));
4133 
4134  check[found].ccvalid = conform->convalidated;
4135  check[found].ccnoinherit = conform->connoinherit;
4137  NameStr(conform->conname));
4138 
4139  /* Grab and test conbin is actually set */
4140  val = fastgetattr(htup,
4142  conrel->rd_att, &isnull);
4143  if (isnull)
4144  elog(ERROR, "null conbin for rel %s",
4145  RelationGetRelationName(relation));
4146 
4147  /* detoast and convert to cstring in caller's context */
4148  s = TextDatumGetCString(val);
4149  check[found].ccbin = MemoryContextStrdup(CacheMemoryContext, s);
4150  pfree(s);
4151 
4152  found++;
4153  }
4154 
4155  systable_endscan(conscan);
4156  heap_close(conrel, AccessShareLock);
4157 
4158  if (found != ncheck)
4159  elog(ERROR, "%d constraint record(s) missing for rel %s",
4160  ncheck - found, RelationGetRelationName(relation));
4161 
4162  /* Sort the records so that CHECKs are applied in a deterministic order */
4163  if (ncheck > 1)
4164  qsort(check, ncheck, sizeof(ConstrCheck), CheckConstraintCmp);
4165 }
4166 
4167 /*
4168  * qsort comparator to sort ConstrCheck entries by name
4169  */
4170 static int
4171 CheckConstraintCmp(const void *a, const void *b)
4172 {
4173  const ConstrCheck *ca = (const ConstrCheck *) a;
4174  const ConstrCheck *cb = (const ConstrCheck *) b;
4175 
4176  return strcmp(ca->ccname, cb->ccname);
4177 }
4178 
4179 /*
4180  * RelationGetFKeyList -- get a list of foreign key info for the relation
4181  *
4182  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
4183  * the given relation. This data is a direct copy of relevant fields from
4184  * pg_constraint. The list items are in no particular order.
4185  *
4186  * CAUTION: the returned list is part of the relcache's data, and could
4187  * vanish in a relcache entry reset. Callers must inspect or copy it
4188  * before doing anything that might trigger a cache flush, such as
4189  * system catalog accesses. copyObject() can be used if desired.
4190  * (We define it this way because current callers want to filter and
4191  * modify the list entries anyway, so copying would be a waste of time.)
4192  */
4193 List *
4195 {
4196  List *result;
4197  Relation conrel;
4198  SysScanDesc conscan;
4199  ScanKeyData skey;
4200  HeapTuple htup;
4201  List *oldlist;
4202  MemoryContext oldcxt;
4203 
4204  /* Quick exit if we already computed the list. */
4205  if (relation->rd_fkeyvalid)
4206  return relation->rd_fkeylist;
4207 
4208  /* Fast path: if it doesn't have any triggers, it can't have FKs */
4209  if (!relation->rd_rel->relhastriggers)
4210  return NIL;
4211 
4212  /*
4213  * We build the list we intend to return (in the caller's context) while
4214  * doing the scan. After successfully completing the scan, we copy that
4215  * list into the relcache entry. This avoids cache-context memory leakage
4216  * if we get some sort of error partway through.
4217  */
4218  result = NIL;
4219 
4220  /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
4221  ScanKeyInit(&skey,
4223  BTEqualStrategyNumber, F_OIDEQ,
4224  ObjectIdGetDatum(RelationGetRelid(relation)));
4225 
4227  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
4228  NULL, 1, &skey);
4229 
4230  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
4231  {
4232  Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
4233  ForeignKeyCacheInfo *info;
4234  Datum adatum;
4235  bool isnull;
4236  ArrayType *arr;
4237  int nelem;
4238 
4239  /* consider only foreign keys */
4240  if (constraint->contype != CONSTRAINT_FOREIGN)
4241  continue;
4242 
4243  info = makeNode(ForeignKeyCacheInfo);
4244  info->conrelid = constraint->conrelid;
4245  info->confrelid = constraint->confrelid;
4246 
4247  /* Extract data from conkey field */
4248  adatum = fastgetattr(htup, Anum_pg_constraint_conkey,
4249  conrel->rd_att, &isnull);
4250  if (isnull)
4251  elog(ERROR, "null conkey for rel %s",
4252  RelationGetRelationName(relation));
4253 
4254  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4255  nelem = ARR_DIMS(arr)[0];
4256  if (ARR_NDIM(arr) != 1 ||
4257  nelem < 1 ||
4258  nelem > INDEX_MAX_KEYS ||
4259  ARR_HASNULL(arr) ||
4260  ARR_ELEMTYPE(arr) != INT2OID)
4261  elog(ERROR, "conkey is not a 1-D smallint array");
4262 
4263  info->nkeys = nelem;
4264  memcpy(info->conkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4265 
4266  /* Likewise for confkey */
4267  adatum = fastgetattr(htup, Anum_pg_constraint_confkey,
4268  conrel->rd_att, &isnull);
4269  if (isnull)
4270  elog(ERROR, "null confkey for rel %s",
4271  RelationGetRelationName(relation));
4272 
4273  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4274  nelem = ARR_DIMS(arr)[0];
4275  if (ARR_NDIM(arr) != 1 ||
4276  nelem != info->nkeys ||
4277  ARR_HASNULL(arr) ||
4278  ARR_ELEMTYPE(arr) != INT2OID)
4279  elog(ERROR, "confkey is not a 1-D smallint array");
4280 
4281  memcpy(info->confkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
4282 
4283  /* Likewise for conpfeqop */
4285  conrel->rd_att, &isnull);
4286  if (isnull)
4287  elog(ERROR, "null conpfeqop for rel %s",
4288  RelationGetRelationName(relation));
4289 
4290  arr = DatumGetArrayTypeP(adatum); /* ensure not toasted */
4291  nelem = ARR_DIMS(arr)[0];
4292  if (ARR_NDIM(arr) != 1 ||
4293  nelem != info->nkeys ||
4294  ARR_HASNULL(arr) ||
4295  ARR_ELEMTYPE(arr) != OIDOID)
4296  elog(ERROR, "conpfeqop is not a 1-D OID array");
4297 
4298  memcpy(info->conpfeqop, ARR_DATA_PTR(arr), nelem * sizeof(Oid));
4299 
4300  /* Add FK's node to the result list */
4301  result = lappend(result, info);
4302  }
4303 
4304  systable_endscan(conscan);
4305  heap_close(conrel, AccessShareLock);
4306 
4307  /* Now save a copy of the completed list in the relcache entry. */
4309  oldlist = relation->rd_fkeylist;
4310  relation->rd_fkeylist = copyObject(result);
4311  relation->rd_fkeyvalid = true;
4312  MemoryContextSwitchTo(oldcxt);
4313 
4314  /* Don't leak the old list, if there is one */
4315  list_free_deep(oldlist);
4316 
4317  return result;
4318 }
4319 
4320 /*
4321  * RelationGetIndexList -- get a list of OIDs of indexes on this relation
4322  *
4323  * The index list is created only if someone requests it. We scan pg_index
4324  * to find relevant indexes, and add the list to the relcache entry so that
4325  * we won't have to compute it again. Note that shared cache inval of a
4326  * relcache entry will delete the old list and set rd_indexvalid to 0,
4327  * so that we must recompute the index list on next request. This handles
4328  * creation or deletion of an index.
4329  *
4330  * Indexes that are marked not IndexIsLive are omitted from the returned list.
4331  * Such indexes are expected to be dropped momentarily, and should not be
4332  * touched at all by any caller of this function.
4333  *
4334  * The returned list is guaranteed to be sorted in order by OID. This is
4335  * needed by the executor, since for index types that we obtain exclusive
4336  * locks on when updating the index, all backends must lock the indexes in
4337  * the same order or we will get deadlocks (see ExecOpenIndices()). Any
4338  * consistent ordering would do, but ordering by OID is easy.
4339  *
4340  * Since shared cache inval causes the relcache's copy of the list to go away,
4341  * we return a copy of the list palloc'd in the caller's context. The caller
4342  * may list_free() the returned list after scanning it. This is necessary
4343  * since the caller will typically be doing syscache lookups on the relevant
4344  * indexes, and syscache lookup could cause SI messages to be processed!
4345  *
4346  * We also update rd_oidindex, which this module treats as effectively part
4347  * of the index list. rd_oidindex is valid when rd_indexvalid isn't zero;
4348  * it is the pg_class OID of a unique index on OID when the relation has one,
4349  * and InvalidOid if there is no such index.
4350  *
4351  * In exactly the same way, we update rd_pkindex, which is the OID of the
4352  * relation's primary key index if any, else InvalidOid; and rd_replidindex,
4353  * which is the pg_class OID of an index to be used as the relation's
4354  * replication identity index, or InvalidOid if there is no such index.
4355  */
4356 List *
4358 {
4359  Relation indrel;
4360  SysScanDesc indscan;
4361  ScanKeyData skey;
4362  HeapTuple htup;
4363  List *result;
4364  List *oldlist;
4365  char replident = relation->rd_rel->relreplident;
4366  Oid oidIndex = InvalidOid;
4367  Oid pkeyIndex = InvalidOid;
4368  Oid candidateIndex = InvalidOid;
4369  MemoryContext oldcxt;
4370 
4371  /* Quick exit if we already computed the list. */
4372  if (relation->rd_indexvalid != 0)
4373  return list_copy(relation->rd_indexlist);
4374 
4375  /*
4376  * We build the list we intend to return (in the caller's context) while
4377  * doing the scan. After successfully completing the scan, we copy that
4378  * list into the relcache entry. This avoids cache-context memory leakage
4379  * if we get some sort of error partway through.
4380  */
4381  result = NIL;
4382  oidIndex = InvalidOid;
4383 
4384  /* Prepare to scan pg_index for entries having indrelid = this rel. */
4385  ScanKeyInit(&skey,
4387  BTEqualStrategyNumber, F_OIDEQ,
4388  ObjectIdGetDatum(RelationGetRelid(relation)));
4389 
4391  indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true,
4392  NULL, 1, &skey);
4393 
4394  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4395  {
4397  Datum indclassDatum;
4398  oidvector *indclass;
4399  bool isnull;
4400 
4401  /*
4402  * Ignore any indexes that are currently being dropped. This will
4403  * prevent them from being searched, inserted into, or considered in
4404  * HOT-safety decisions. It's unsafe to touch such an index at all
4405  * since its catalog entries could disappear at any instant.
4406  */
4407  if (!IndexIsLive(index))
4408  continue;
4409 
4410  /* Add index's OID to result list in the proper order */
4411  result = insert_ordered_oid(result, index->indexrelid);
4412 
4413  /*
4414  * indclass cannot be referenced directly through the C struct,
4415  * because it comes after the variable-width indkey field. Must
4416  * extract the datum the hard way...
4417  */
4418  indclassDatum = heap_getattr(htup,
4421  &isnull);
4422  Assert(!isnull);
4423  indclass = (oidvector *) DatumGetPointer(indclassDatum);
4424 
4425  /*
4426  * Invalid, non-unique, non-immediate or predicate indexes aren't
4427  * interesting for either oid indexes or replication identity indexes,
4428  * so don't check them.
4429  */
4430  if (!IndexIsValid(index) || !index->indisunique ||
4431  !index->indimmediate ||
4433  continue;
4434 
4435  /* Check to see if is a usable btree index on OID */
4436  if (index->indnatts == 1 &&
4437  index->indkey.values[0] == ObjectIdAttributeNumber &&
4438  indclass->values[0] == OID_BTREE_OPS_OID)
4439  oidIndex = index->indexrelid;
4440 
4441  /* remember primary key index if any */
4442  if (index->indisprimary)
4443  pkeyIndex = index->indexrelid;
4444 
4445  /* remember explicitly chosen replica index */
4446  if (index->indisreplident)
4447  candidateIndex = index->indexrelid;
4448  }
4449 
4450  systable_endscan(indscan);
4451 
4452  heap_close(indrel, AccessShareLock);
4453 
4454  /* Now save a copy of the completed list in the relcache entry. */
4456  oldlist = relation->rd_indexlist;
4457  relation->rd_indexlist = list_copy(result);
4458  relation->rd_oidindex = oidIndex;
4459  relation->rd_pkindex = pkeyIndex;
4460  if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
4461  relation->rd_replidindex = pkeyIndex;
4462  else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
4463  relation->rd_replidindex = candidateIndex;
4464  else
4465  relation->rd_replidindex = InvalidOid;
4466  relation->rd_indexvalid = 1;
4467  MemoryContextSwitchTo(oldcxt);
4468 
4469  /* Don't leak the old list, if there is one */
4470  list_free(oldlist);
4471 
4472  return result;
4473 }
4474 
4475 /*
4476  * RelationGetStatExtList
4477  * get a list of OIDs of statistics objects on this relation
4478  *
4479  * The statistics list is created only if someone requests it, in a way
4480  * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
4481  * relevant statistics, and add the list to the relcache entry so that we
4482  * won't have to compute it again. Note that shared cache inval of a
4483  * relcache entry will delete the old list and set rd_statvalid to 0,
4484  * so that we must recompute the statistics list on next request. This
4485  * handles creation or deletion of a statistics object.
4486  *
4487  * The returned list is guaranteed to be sorted in order by OID, although
4488  * this is not currently needed.
4489  *
4490  * Since shared cache inval causes the relcache's copy of the list to go away,
4491  * we return a copy of the list palloc'd in the caller's context. The caller
4492  * may list_free() the returned list after scanning it. This is necessary
4493  * since the caller will typically be doing syscache lookups on the relevant
4494  * statistics, and syscache lookup could cause SI messages to be processed!
4495  */
4496 List *
4498 {
4499  Relation indrel;
4500  SysScanDesc indscan;
4501  ScanKeyData skey;
4502  HeapTuple htup;
4503  List *result;
4504  List *oldlist;
4505  MemoryContext oldcxt;
4506 
4507  /* Quick exit if we already computed the list. */
4508  if (relation->rd_statvalid != 0)
4509  return list_copy(relation->rd_statlist);
4510 
4511  /*
4512  * We build the list we intend to return (in the caller's context) while
4513  * doing the scan. After successfully completing the scan, we copy that
4514  * list into the relcache entry. This avoids cache-context memory leakage
4515  * if we get some sort of error partway through.
4516  */
4517  result = NIL;
4518 
4519  /*
4520  * Prepare to scan pg_statistic_ext for entries having stxrelid = this
4521  * rel.
4522  */
4523  ScanKeyInit(&skey,
4525  BTEqualStrategyNumber, F_OIDEQ,
4526  ObjectIdGetDatum(RelationGetRelid(relation)));
4527 
4529  indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
4530  NULL, 1, &skey);
4531 
4532  while (HeapTupleIsValid(htup = systable_getnext(indscan)))
4533  result = insert_ordered_oid(result, HeapTupleGetOid(htup));
4534 
4535  systable_endscan(indscan);
4536 
4537  heap_close(indrel, AccessShareLock);
4538 
4539  /* Now save a copy of the completed list in the relcache entry. */
4541  oldlist = relation->rd_statlist;
4542  relation->rd_statlist = list_copy(result);
4543 
4544  relation->rd_statvalid = true;
4545  MemoryContextSwitchTo(oldcxt);
4546 
4547  /* Don't leak the old list, if there is one */
4548  list_free(oldlist);
4549 
4550  return result;
4551 }
4552 
4553 /*
4554  * insert_ordered_oid
4555  * Insert a new Oid into a sorted list of Oids, preserving ordering
4556  *
4557  * Building the ordered list this way is O(N^2), but with a pretty small
4558  * constant, so for the number of entries we expect it will probably be
4559  * faster than trying to apply qsort(). Most tables don't have very many
4560  * indexes...
4561  */
4562 static List *
4564 {
4565  ListCell *prev;
4566 
4567  /* Does the datum belong at the front? */
4568  if (list == NIL || datum < linitial_oid(list))
4569  return lcons_oid(datum, list);
4570  /* No, so find the entry it belongs after */
4571  prev = list_head(list);
4572  for (;;)
4573  {
4574  ListCell *curr = lnext(prev);
4575 
4576  if (curr == NULL || datum < lfirst_oid(curr))
4577  break; /* it belongs after 'prev', before 'curr' */
4578 
4579  prev = curr;
4580  }
4581  /* Insert datum into list after 'prev' */
4582  lappend_cell_oid(list, prev, datum);
4583  return list;
4584 }
4585 
4586 /*
4587  * RelationSetIndexList -- externally force the index list contents
4588  *
4589  * This is used to temporarily override what we think the set of valid
4590  * indexes is (including the presence or absence of an OID index).
4591  * The forcing will be valid only until transaction commit or abort.
4592  *
4593  * This should only be applied to nailed relations, because in a non-nailed
4594  * relation the hacked index list could be lost at any time due to SI
4595  * messages. In practice it is only used on pg_class (see REINDEX).
4596  *
4597  * It is up to the caller to make sure the given list is correctly ordered.
4598  *
4599  * We deliberately do not change rd_indexattr here: even when operating
4600  * with a temporary partial index list, HOT-update decisions must be made
4601  * correctly with respect to the full index set. It is up to the caller
4602  * to ensure that a correct rd_indexattr set has been cached before first
4603  * calling RelationSetIndexList; else a subsequent inquiry might cause a
4604  * wrong rd_indexattr set to get computed and cached. Likewise, we do not
4605  * touch rd_keyattr, rd_pkattr or rd_idattr.
4606  */
4607 void
4608 RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
4609 {
4610  MemoryContext oldcxt;
4611 
4612  Assert(relation->rd_isnailed);
4613  /* Copy the list into the cache context (could fail for lack of mem) */
4615  indexIds = list_copy(indexIds);
4616  MemoryContextSwitchTo(oldcxt);
4617  /* Okay to replace old list */
4618  list_free(relation->rd_indexlist);
4619  relation->rd_indexlist = indexIds;
4620  relation->rd_oidindex = oidIndex;
4621 
4622  /*
4623  * For the moment, assume the target rel hasn't got a pk or replica index.
4624  * We'll load them on demand in the API that wraps access to them.
4625  */
4626  relation->rd_pkindex = InvalidOid;
4627  relation->rd_replidindex = InvalidOid;
4628  relation->rd_indexvalid = 2; /* mark list as forced */
4629  /* Flag relation as needing eoxact cleanup (to reset the list) */
4630  EOXactListAdd(relation);
4631 }
4632 
4633 /*
4634  * RelationGetOidIndex -- get the pg_class OID of the relation's OID index
4635  *
4636  * Returns InvalidOid if there is no such index.
4637  */
4638 Oid
4640 {
4641  List *ilist;
4642 
4643  /*
4644  * If relation doesn't have OIDs at all, caller is probably confused. (We
4645  * could just silently return InvalidOid, but it seems better to throw an
4646  * assertion.)
4647  */
4648  Assert(relation->rd_rel->relhasoids);
4649 
4650  if (relation->rd_indexvalid == 0)
4651  {
4652  /* RelationGetIndexList does the heavy lifting. */
4653  ilist = RelationGetIndexList(relation);
4654  list_free(ilist);
4655  Assert(relation->rd_indexvalid != 0);
4656  }
4657 
4658  return relation->rd_oidindex;
4659 }
4660 
4661 /*
4662  * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
4663  *
4664  * Returns InvalidOid if there is no such index.
4665  */
4666 Oid
4668 {
4669  List *ilist;
4670 
4671  if (relation->rd_indexvalid == 0)
4672  {
4673  /* RelationGetIndexList does the heavy lifting. */
4674  ilist = RelationGetIndexList(relation);
4675  list_free(ilist);
4676  Assert(relation->rd_indexvalid != 0);
4677  }
4678 
4679  return relation->rd_pkindex;
4680 }
4681 
4682 /*
4683  * RelationGetReplicaIndex -- get OID of the relation's replica identity index
4684  *
4685  * Returns InvalidOid if there is no such index.
4686  */
4687 Oid
4689 {
4690  List *ilist;
4691 
4692  if (relation->rd_indexvalid == 0)
4693  {
4694  /* RelationGetIndexList does the heavy lifting. */
4695  ilist = RelationGetIndexList(relation);
4696  list_free(ilist);
4697  Assert(relation->rd_indexvalid != 0);
4698  }
4699 
4700  return relation->rd_replidindex;
4701 }
4702 
4703 /*
4704  * RelationGetIndexExpressions -- get the index expressions for an index
4705  *
4706  * We cache the result of transforming pg_index.indexprs into a node tree.
4707  * If the rel is not an index or has no expressional columns, we return NIL.
4708  * Otherwise, the returned tree is copied into the caller's memory context.
4709  * (We don't want to return a pointer to the relcache copy, since it could
4710  * disappear due to relcache invalidation.)
4711  */
4712 List *
4714 {
4715  List *result;
4716  Datum exprsDatum;
4717  bool isnull;
4718  char *exprsString;
4719  MemoryContext oldcxt;
4720 
4721  /* Quick exit if we already computed the result. */
4722  if (relation->rd_indexprs)
4723  return copyObject(relation->rd_indexprs);
4724 
4725  /* Quick exit if there is nothing to do. */
4726  if (relation->rd_indextuple == NULL ||
4728  return NIL;
4729 
4730  /*
4731  * We build the tree we intend to return in the caller's context. After
4732  * successfully completing the work, we copy it into the relcache entry.
4733  * This avoids problems if we get some sort of error partway through.
4734  */
4735  exprsDatum = heap_getattr(relation->rd_indextuple,
4738  &isnull);
4739  Assert(!isnull);
4740  exprsString = TextDatumGetCString(exprsDatum);
4741  result = (List *) stringToNode(exprsString);
4742  pfree(exprsString);
4743 
4744  /*
4745  * Run the expressions through eval_const_expressions. This is not just an
4746  * optimization, but is necessary, because the planner will be comparing
4747  * them to similarly-processed qual clauses, and may fail to detect valid
4748  * matches without this. We don't bother with canonicalize_qual, however.
4749  */
4750  result = (List *) eval_const_expressions(NULL, (Node *) result);
4751 
4752  result = (List *) canonicalize_qual((Expr *) result);
4753 
4754  /* May as well fix opfuncids too */
4755  fix_opfuncids((Node *) result);
4756 
4757  /* Now save a copy of the completed tree in the relcache entry. */
4758  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4759  relation->rd_indexprs = copyObject(result);
4760  MemoryContextSwitchTo(oldcxt);
4761 
4762  return result;
4763 }
4764 
4765 /*
4766  * RelationGetIndexPredicate -- get the index predicate for an index
4767  *
4768  * We cache the result of transforming pg_index.indpred into an implicit-AND
4769  * node tree (suitable for use in planning).
4770  * If the rel is not an index or has no predicate, we return NIL.
4771  * Otherwise, the returned tree is copied into the caller's memory context.
4772  * (We don't want to return a pointer to the relcache copy, since it could
4773  * disappear due to relcache invalidation.)
4774  */
4775 List *
4777 {
4778  List *result;
4779  Datum predDatum;
4780  bool isnull;
4781  char *predString;
4782  MemoryContext oldcxt;
4783 
4784  /* Quick exit if we already computed the result. */
4785  if (relation->rd_indpred)
4786  return copyObject(relation->rd_indpred);
4787 
4788  /* Quick exit if there is nothing to do. */
4789  if (relation->rd_indextuple == NULL ||
4791  return NIL;
4792 
4793  /*
4794  * We build the tree we intend to return in the caller's context. After
4795  * successfully completing the work, we copy it into the relcache entry.
4796  * This avoids problems if we get some sort of error partway through.
4797  */
4798  predDatum = heap_getattr(relation->rd_indextuple,
4801  &isnull);
4802  Assert(!isnull);
4803  predString = TextDatumGetCString(predDatum);
4804  result = (List *) stringToNode(predString);
4805  pfree(predString);
4806 
4807  /*
4808  * Run the expression through const-simplification and canonicalization.
4809  * This is not just an optimization, but is necessary, because the planner
4810  * will be comparing it to similarly-processed qual clauses, and may fail
4811  * to detect valid matches without this. This must match the processing
4812  * done to qual clauses in preprocess_expression()! (We can skip the
4813  * stuff involving subqueries, however, since we don't allow any in index
4814  * predicates.)
4815  */
4816  result = (List *) eval_const_expressions(NULL, (Node *) result);
4817 
4818  result = (List *) canonicalize_qual((Expr *) result);
4819 
4820  /* Also convert to implicit-AND format */
4821  result = make_ands_implicit((Expr *) result);
4822 
4823  /* May as well fix opfuncids too */
4824  fix_opfuncids((Node *) result);
4825 
4826  /* Now save a copy of the completed tree in the relcache entry. */
4827  oldcxt = MemoryContextSwitchTo(relation->rd_indexcxt);
4828  relation->rd_indpred = copyObject(result);
4829  MemoryContextSwitchTo(oldcxt);
4830 
4831  return result;
4832 }
4833 
4834 /*
4835  * RelationGetIndexAttrBitmap -- get a bitmap of index attribute numbers
4836  *
4837  * The result has a bit set for each attribute used anywhere in the index
4838  * definitions of all the indexes on this relation. (This includes not only
4839  * simple index keys, but attributes used in expressions and partial-index
4840  * predicates.)
4841  *
4842  * Depending on attrKind, a bitmap covering the attnums for all index columns,
4843  * for all potential foreign key columns, or for all columns in the configured
4844  * replica identity index is returned.
4845  *
4846  * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
4847  * we can include system attributes (e.g., OID) in the bitmap representation.
4848  *
4849  * Caller had better hold at least RowExclusiveLock on the target relation
4850  * to ensure it is safe (deadlock-free) for us to take locks on the relation's
4851  * indexes. Note that since the introduction of CREATE INDEX CONCURRENTLY,
4852  * that lock level doesn't guarantee a stable set of indexes, so we have to
4853  * be prepared to retry here in case of a change in the set of indexes.
4854  *
4855  * The returned result is palloc'd in the caller's memory context and should
4856  * be bms_free'd when not needed anymore.
4857  */
4858 Bitmapset *
4860 {
4861  Bitmapset *indexattrs; /* indexed columns */
4862  Bitmapset *uindexattrs; /* columns in unique indexes */
4863  Bitmapset *pkindexattrs; /* columns in the primary index */
4864  Bitmapset *idindexattrs; /* columns in the replica identity */
4865  List *indexoidlist;
4866  List *newindexoidlist;
4867  Oid relpkindex;
4868  Oid relreplindex;
4869  ListCell *l;
4870  MemoryContext oldcxt;
4871 
4872  /* Quick exit if we already computed the result. */
4873  if (relation->rd_indexattr != NULL)
4874  {
4875  switch (attrKind)
4876  {
4877  case INDEX_ATTR_BITMAP_ALL:
4878  return bms_copy(relation->rd_indexattr);
4879  case INDEX_ATTR_BITMAP_KEY:
4880  return bms_copy(relation->rd_keyattr);
4882  return bms_copy(relation->rd_pkattr);
4884  return bms_copy(relation->rd_idattr);
4885  default:
4886  elog(ERROR, "unknown attrKind %u", attrKind);
4887  }
4888  }
4889 
4890  /* Fast path if definitely no indexes */
4891  if (!RelationGetForm(relation)->relhasindex)
4892  return NULL;
4893 
4894  /*
4895  * Get cached list of index OIDs. If we have to start over, we do so here.
4896  */
4897 restart:
4898  indexoidlist = RelationGetIndexList(relation);
4899 
4900  /* Fall out if no indexes (but relhasindex was set) */
4901  if (indexoidlist == NIL)
4902  return NULL;
4903 
4904  /*
4905  * Copy the rd_pkindex and rd_replidindex values computed by
4906  * RelationGetIndexList before proceeding. This is needed because a
4907  * relcache flush could occur inside index_open below, resetting the
4908  * fields managed by RelationGetIndexList. We need to do the work with
4909  * stable values of these fields.
4910  */
4911  relpkindex = relation->rd_pkindex;
4912  relreplindex = relation->rd_replidindex;
4913 
4914  /*
4915  * For each index, add referenced attributes to indexattrs.
4916  *
4917  * Note: we consider all indexes returned by RelationGetIndexList, even if
4918  * they are not indisready or indisvalid. This is important because an
4919  * index for which CREATE INDEX CONCURRENTLY has just started must be
4920  * included in HOT-safety decisions (see README.HOT). If a DROP INDEX
4921  * CONCURRENTLY is far enough along that we should ignore the index, it
4922  * won't be returned at all by RelationGetIndexList.
4923  */
4924  indexattrs = NULL;
4925  uindexattrs = NULL;
4926  pkindexattrs = NULL;
4927  idindexattrs = NULL;
4928  foreach(l, indexoidlist)
4929  {
4930  Oid indexOid = lfirst_oid(l);
4931  Relation indexDesc;
4932  IndexInfo *indexInfo;
4933  int i;
4934  bool isKey; /* candidate key */
4935  bool isPK; /* primary key */
4936  bool isIDKey; /* replica identity index */
4937 
4938  indexDesc = index_open(indexOid, AccessShareLock);
4939 
4940  /* Extract index key information from the index's pg_index row */
4941  indexInfo = BuildIndexInfo(indexDesc);
4942 
4943  /* Can this index be referenced by a foreign key? */
4944  isKey = indexInfo->ii_Unique &&
4945  indexInfo->ii_Expressions == NIL &&
4946  indexInfo->ii_Predicate == NIL;
4947 
4948  /* Is this a primary key? */
4949  isPK = (indexOid == relpkindex);
4950 
4951  /* Is this index the configured (or default) replica identity? */
4952  isIDKey = (indexOid == relreplindex);
4953 
4954  /* Collect simple attribute references */
4955  for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
4956  {
4957  int attrnum = indexInfo->ii_KeyAttrNumbers[i];
4958 
4959  if (attrnum != 0)
4960  {
4961  indexattrs = bms_add_member(indexattrs,
4963 
4964  if (isKey)
4965  uindexattrs = bms_add_member(uindexattrs,
4967 
4968  if (isPK)
4969  pkindexattrs = bms_add_member(pkindexattrs,
4971 
4972  if (isIDKey)
4973  idindexattrs = bms_add_member(idindexattrs,
4975  }
4976  }
4977 
4978  /* Collect all attributes used in expressions, too */
4979  pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
4980 
4981  /* Collect all attributes in the index predicate, too */
4982  pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
4983 
4984  index_close(indexDesc, AccessShareLock);
4985  }
4986 
4987  /*
4988  * During one of the index_opens in the above loop, we might have received
4989  * a relcache flush event on this relcache entry, which might have been
4990  * signaling a change in the rel's index list. If so, we'd better start
4991  * over to ensure we deliver up-to-date attribute bitmaps.
4992  */
4993  newindexoidlist = RelationGetIndexList(relation);
4994  if (equal(indexoidlist, newindexoidlist) &&
4995  relpkindex == relation->rd_pkindex &&
4996  relreplindex == relation->rd_replidindex)
4997  {
4998  /* Still the same index set, so proceed */
4999  list_free(newindexoidlist);
5000  list_free(indexoidlist);
5001  }
5002  else
5003  {
5004  /* Gotta do it over ... might as well not leak memory */
5005  list_free(newindexoidlist);
5006  list_free(indexoidlist);
5007  bms_free(uindexattrs);
5008  bms_free(pkindexattrs);
5009  bms_free(idindexattrs);
5010  bms_free(indexattrs);
5011 
5012  goto restart;
5013  }
5014 
5015  /* Don't leak the old values of these bitmaps, if any */
5016  bms_free(relation->rd_indexattr);
5017  relation->rd_indexattr = NULL;
5018  bms_free(relation->rd_keyattr);
5019  relation->rd_keyattr = NULL;
5020  bms_free(relation->rd_pkattr);
5021  relation->rd_pkattr = NULL;
5022  bms_free(relation->rd_idattr);
5023  relation->rd_idattr = NULL;
5024 
5025  /*
5026  * Now save copies of the bitmaps in the relcache entry. We intentionally
5027  * set rd_indexattr last, because that's the one that signals validity of
5028  * the values; if we run out of memory before making that copy, we won't
5029  * leave the relcache entry looking like the other ones are valid but
5030  * empty.
5031  */
5033  relation->rd_keyattr = bms_copy(uindexattrs);
5034  relation->rd_pkattr = bms_copy(pkindexattrs);
5035  relation->rd_idattr = bms_copy(idindexattrs);
5036  relation->rd_indexattr = bms_copy(indexattrs);
5037  MemoryContextSwitchTo(oldcxt);
5038 
5039  /* We return our original working copy for caller to play with */
5040  switch (attrKind)
5041  {
5042  case INDEX_ATTR_BITMAP_ALL:
5043  return indexattrs;
5044  case INDEX_ATTR_BITMAP_KEY:
5045  return uindexattrs;
5047  return bms_copy(relation->rd_pkattr);
5049  return idindexattrs;
5050  default:
5051  elog(ERROR, "unknown attrKind %u", attrKind);
5052  return NULL;
5053  }
5054 }
5055 
5056 /*
5057  * RelationGetExclusionInfo -- get info about index's exclusion constraint
5058  *
5059  * This should be called only for an index that is known to have an
5060  * associated exclusion constraint. It returns arrays (palloc'd in caller's
5061  * context) of the exclusion operator OIDs, their underlying functions'
5062  * OIDs, and their strategy numbers in the index's opclasses. We cache
5063  * all this information since it requires a fair amount of work to get.
5064  */
5065 void
5067  Oid **operators,
5068  Oid **procs,
5069  uint16 **strategies)
5070 {
5071  int ncols = indexRelation->rd_rel->relnatts;
5072  Oid *ops;
5073  Oid *funcs;
5074  uint16 *strats;
5075  Relation conrel;
5076  SysScanDesc conscan;
5077  ScanKeyData skey[1];
5078  HeapTuple htup;
5079  bool found;
5080  MemoryContext oldcxt;
5081  int i;
5082 
5083  /* Allocate result space in caller context */
5084  *operators = ops = (Oid *) palloc(sizeof(Oid) * ncols);
5085  *procs = funcs = (Oid *) palloc(sizeof(Oid) * ncols);
5086  *strategies = strats = (uint16 *) palloc(sizeof(uint16) * ncols);
5087 
5088  /* Quick exit if we have the data cached already */
5089  if (indexRelation->rd_exclstrats != NULL)
5090  {
5091  memcpy(ops, indexRelation->rd_exclops, sizeof(Oid) * ncols);
5092  memcpy(funcs, indexRelation->rd_exclprocs, sizeof(Oid) * ncols);
5093  memcpy(strats, indexRelation->rd_exclstrats, sizeof(uint16) * ncols);
5094  return;
5095  }
5096 
5097  /*
5098  * Search pg_constraint for the constraint associated with the index. To
5099  * make this not too painfully slow, we use the index on conrelid; that
5100  * will hold the parent relation's OID not the index's own OID.
5101  */
5102  ScanKeyInit(&skey[0],
5104  BTEqualStrategyNumber, F_OIDEQ,
5105  ObjectIdGetDatum(indexRelation->rd_index->indrelid));
5106 
5108  conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
5109  NULL, 1, skey);
5110  found = false;
5111 
5112  while (HeapTupleIsValid(htup = systable_getnext(conscan)))
5113  {
5115  Datum val;
5116  bool isnull;
5117  ArrayType *arr;
5118  int nelem;
5119 
5120  /* We want the exclusion constraint owning the index */
5121  if (conform->contype != CONSTRAINT_EXCLUSION ||
5122  conform->conindid != RelationGetRelid(indexRelation))
5123  continue;
5124 
5125  /* There should be only one */
5126  if (found)
5127  elog(ERROR, "unexpected exclusion constraint record found for rel %s",
5128  RelationGetRelationName(indexRelation));
5129  found = true;
5130 
5131  /* Extract the operator OIDS from conexclop */
5132  val = fastgetattr(htup,
5134  conrel->rd_att, &isnull);
5135  if (isnull)
5136  elog(ERROR, "null conexclop for rel %s",
5137  RelationGetRelationName(indexRelation));
5138 
5139  arr = DatumGetArrayTypeP(val); /* ensure not toasted */
5140  nelem = ARR_DIMS(arr)[0];
5141  if (ARR_NDIM(arr) != 1 ||
5142  nelem != ncols ||
5143  ARR_HASNULL(arr) ||
5144  ARR_ELEMTYPE(arr) != OIDOID)
5145  elog(ERROR, "conexclop is not a 1-D Oid array");
5146 
5147  memcpy(ops, ARR_DATA_PTR(arr), sizeof(Oid) * ncols);
5148  }
5149 
5150  systable_endscan(conscan);
5151  heap_close(conrel, AccessShareLock);
5152 
5153  if (!found)
5154  elog(ERROR, "exclusion constraint record missing for rel %s",
5155  RelationGetRelationName(indexRelation));
5156 
5157  /* We need the func OIDs and strategy numbers too */
5158  for (i = 0; i < ncols; i++)
5159  {
5160  funcs[i] = get_opcode(ops[i]);
5161  strats[i] = get_op_opfamily_strategy(ops[i],
5162  indexRelation->rd_opfamily[i]);
5163  /* shouldn't fail, since it was checked at index creation */
5164  if (strats[i] == InvalidStrategy)
5165  elog(ERROR, "could not find strategy for operator %u in family %u",
5166  ops[i], indexRelation->rd_opfamily[i]);
5167  }
5168 
5169  /* Save a copy of the results in the relcache entry. */
5170  oldcxt = MemoryContextSwitchTo(indexRelation->rd_indexcxt);
5171  indexRelation->rd_exclops = (Oid *) palloc(sizeof(Oid) * ncols);
5172  indexRelation->rd_exclprocs = (Oid *) palloc(sizeof(Oid) * ncols);
5173  indexRelation->rd_exclstrats = (uint16 *) palloc(sizeof(uint16) * ncols);
5174  memcpy(indexRelation->rd_exclops, ops, sizeof(Oid) * ncols);
5175  memcpy(indexRelation->rd_exclprocs, funcs, sizeof(Oid) * ncols);
5176  memcpy(indexRelation->rd_exclstrats, strats, sizeof(uint16) * ncols);
5177  MemoryContextSwitchTo(oldcxt);
5178 }
5179 
5180 /*
5181  * Get publication actions for the given relation.
5182  */
5183 struct PublicationActions *
5185 {
5186  List *puboids;
5187  ListCell *lc;
5188  MemoryContext oldcxt;
5189  PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
5190 
5191  if (relation->rd_pubactions)
5192  return memcpy(pubactions, relation->rd_pubactions,
5193  sizeof(PublicationActions));
5194 
5195  /* Fetch the publication membership info. */
5196  puboids = GetRelationPublications(RelationGetRelid(relation));
5197  puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
5198 
5199  foreach(lc, puboids)
5200  {
5201  Oid pubid = lfirst_oid(lc);
5202  HeapTuple tup;
5203  Form_pg_publication pubform;
5204 
5206 
5207  if (!HeapTupleIsValid(tup))
5208  elog(ERROR, "cache lookup failed for publication %u", pubid);
5209 
5210  pubform = (Form_pg_publication) GETSTRUCT(tup);
5211 
5212  pubactions->pubinsert |= pubform->pubinsert;
5213  pubactions->pubupdate |= pubform->pubupdate;
5214  pubactions->pubdelete |= pubform->pubdelete;
5215 
5216  ReleaseSysCache(tup);
5217 
5218  /*
5219  * If we know everything is replicated, there is no point to check for
5220  * other publications.
5221  */
5222  if (pubactions->pubinsert && pubactions->pubupdate &&
5223  pubactions->pubdelete)
5224  break;
5225  }
5226 
5227  if (relation->rd_pubactions)
5228  {
5229  pfree(relation->rd_pubactions);
5230  relation->rd_pubactions = NULL;
5231  }
5232 
5233  /* Now save copy of the actions in the relcache entry. */
5235  relation->rd_pubactions = palloc(sizeof(PublicationActions));
5236  memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
5237  MemoryContextSwitchTo(oldcxt);
5238 
5239  return pubactions;
5240 }
5241 
5242 /*
5243  * Routines to support ereport() reports of relation-related errors
5244  *
5245  * These could have been put into elog.c, but it seems like a module layering
5246  * violation to have elog.c calling relcache or syscache stuff --- and we
5247  * definitely don't want elog.h including rel.h. So we put them here.
5248  */
5249 
5250 /*
5251  * errtable --- stores schema_name and table_name of a table
5252  * within the current errordata.
5253  */
5254 int
5256 {
5260 
5261  return 0; /* return value does not matter */
5262 }
5263 
5264 /*
5265  * errtablecol --- stores schema_name, table_name and column_name
5266  * of a table column within the current errordata.
5267  *
5268  * The column is specified by attribute number --- for most callers, this is
5269  * easier and less error-prone than getting the column name for themselves.
5270  */
5271 int
5272 errtablecol(Relation rel, int attnum)
5273 {
5274  TupleDesc reldesc = RelationGetDescr(rel);
5275  const char *colname;
5276 
5277  /* Use reldesc if it's a user attribute, else consult the catalogs */
5278  if (attnum > 0 && attnum <= reldesc->natts)
5279  colname = NameStr(TupleDescAttr(reldesc, attnum - 1)->attname);
5280  else
5281  colname = get_relid_attribute_name(RelationGetRelid(rel), attnum);
5282 
5283  return errtablecolname(rel, colname);
5284 }
5285 
5286 /*
5287  * errtablecolname --- stores schema_name, table_name and column_name
5288  * of a table column within the current errordata, where the column name is
5289  * given directly rather than extracted from the relation's catalog data.
5290  *
5291  * Don't use this directly unless errtablecol() is inconvenient for some
5292  * reason. This might possibly be needed during intermediate states in ALTER
5293  * TABLE, for instance.
5294  */
5295 int
5296 errtablecolname(Relation rel, const char *colname)
5297 {
5298  errtable(rel);
5300 
5301  return 0; /* return value does not matter */
5302 }
5303 
5304 /*
5305  * errtableconstraint --- stores schema_name, table_name and constraint_name
5306  * of a table-related constraint within the current errordata.
5307  */
5308 int
5309 errtableconstraint(Relation rel, const char *conname)
5310 {
5311  errtable(rel);
5313 
5314  return 0; /* return value does not matter */
5315 }
5316 
5317 
5318 /*
5319  * load_relcache_init_file, write_relcache_init_file
5320  *
5321  * In late 1992, we started regularly having databases with more than
5322  * a thousand classes in them. With this number of classes, it became
5323  * critical to do indexed lookups on the system catalogs.
5324  *
5325  * Bootstrapping these lookups is very hard. We want to be able to
5326  * use an index on pg_attribute, for example, but in order to do so,
5327  * we must have read pg_attribute for the attributes in the index,
5328  * which implies that we need to use the index.
5329  *
5330  * In order to get around the problem, we do the following:
5331  *
5332  * + When the database system is initialized (at initdb time), we
5333  * don't use indexes. We do sequential scans.
5334  *
5335  * + When the backend is started up in normal mode, we load an image
5336  * of the appropriate relation descriptors, in internal format,
5337  * from an initialization file in the data/base/... directory.
5338  *
5339  * + If the initialization file isn't there, then we create the
5340  * relation descriptors using sequential scans and write 'em to
5341  * the initialization file for use by subsequent backends.
5342  *
5343  * As of Postgres 9.0, there is one local initialization file in each
5344  * database, plus one shared initialization file for shared catalogs.
5345  *
5346  * We could dispense with the initialization files and just build the
5347  * critical reldescs the hard way on every backend startup, but that
5348  * slows down backend startup noticeably.
5349  *
5350  * We can in fact go further, and save more relcache entries than
5351  * just the ones that are absolutely critical; this allows us to speed
5352  * up backend startup by not having to build such entries the hard way.
5353  * Presently, all the catalog and index entries that are referred to
5354  * by catcaches are stored in the initialization files.
5355  *
5356  * The same mechanism that detects when catcache and relcache entries
5357  * need to be invalidated (due to catalog updates) also arranges to
5358  * unlink the initialization files when the contents may be out of date.
5359  * The files will then be rebuilt during the next backend startup.
5360  */
5361 
5362 /*
5363  * load_relcache_init_file -- attempt to load cache from the shared
5364  * or local cache init file
5365  *
5366  * If successful, return TRUE and set criticalRelcachesBuilt or
5367  * criticalSharedRelcachesBuilt to true.
5368  * If not successful, return FALSE.
5369  *
5370  * NOTE: we assume we are already switched into CacheMemoryContext.
5371  */
5372 static bool
5374 {
5375  FILE *fp;
5376  char initfilename[MAXPGPATH];
5377  Relation *rels;
5378  int relno,
5379  num_rels,
5380  max_rels,
5381  nailed_rels,
5382  nailed_indexes,
5383  magic;
5384  int i;
5385 
5386  if (shared)
5387  snprintf(initfilename, sizeof(initfilename), "global/%s",
5389  else
5390  snprintf(initfilename, sizeof(initfilename), "%s/%s",
5392 
5393  fp = AllocateFile(initfilename, PG_BINARY_R);
5394  if (fp == NULL)
5395  return false;
5396 
5397  /*
5398  * Read the index relcache entries from the file. Note we will not enter
5399  * any of them into the cache if the read fails partway through; this
5400  * helps to guard against broken init files.
5401  */
5402  max_rels = 100;
5403  rels = (Relation *) palloc(max_rels * sizeof(Relation));
5404  num_rels = 0;
5405  nailed_rels = nailed_indexes = 0;
5406 
5407  /* check for correct magic number (compatible version) */
5408  if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
5409  goto read_failed;
5410  if (magic != RELCACHE_INIT_FILEMAGIC)
5411  goto read_failed;
5412 
5413  for (relno = 0;; relno++)
5414  {
5415  Size len;
5416  size_t nread;
5417  Relation rel;
5418  Form_pg_class relform;
5419  bool has_not_null;
5420 
5421  /* first read the relation descriptor length */
5422  nread = fread(&len, 1, sizeof(len), fp);
5423  if (nread != sizeof(len))
5424  {
5425  if (nread == 0)
5426  break; /* end of file */
5427  goto read_failed;
5428  }
5429 
5430  /* safety check for incompatible relcache layout */
5431  if (len != sizeof(RelationData))
5432  goto read_failed;
5433 
5434  /* allocate another relcache header */
5435  if (num_rels >= max_rels)
5436  {
5437  max_rels *= 2;
5438  rels = (Relation *) repalloc(rels, max_rels * sizeof(Relation));
5439  }
5440 
5441  rel = rels[num_rels++] = (Relation) palloc(len);
5442 
5443  /* then, read the Relation structure */
5444  if (fread(rel, 1, len, fp) != len)
5445  goto read_failed;
5446 
5447  /* next read the relation tuple form */
5448  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5449  goto read_failed;
5450 
5451  relform = (Form_pg_class) palloc(len);
5452  if (fread(relform, 1, len, fp) != len)
5453  goto read_failed;
5454 
5455  rel->rd_rel = relform;
5456 
5457  /* initialize attribute tuple forms */
5458  rel->rd_att = CreateTemplateTupleDesc(relform->relnatts,
5459  relform->relhasoids);
5460  rel->rd_att->tdrefcount = 1; /* mark as refcounted */
5461 
5462  rel->rd_att->tdtypeid = relform->reltype;
5463  rel->rd_att->tdtypmod = -1; /* unnecessary, but... */
5464 
5465  /* next read all the attribute tuple form data entries */
5466  has_not_null = false;
5467  for (i = 0; i < relform->relnatts; i++)
5468  {
5469  Form_pg_attribute attr = TupleDescAttr(rel->rd_att, i);
5470 
5471  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5472  goto read_failed;
5473  if (len != ATTRIBUTE_FIXED_PART_SIZE)
5474  goto read_failed;
5475  if (fread(attr, 1, len, fp) != len)
5476  goto read_failed;
5477 
5478  has_not_null |= attr->attnotnull;
5479  }
5480 
5481  /* next read the access method specific field */
5482  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5483  goto read_failed;
5484  if (len > 0)
5485  {
5486  rel->rd_options = palloc(len);
5487  if (fread(rel->rd_options, 1, len, fp) != len)
5488  goto read_failed;
5489  if (len != VARSIZE(rel->rd_options))
5490  goto read_failed; /* sanity check */
5491  }
5492  else
5493  {
5494  rel->rd_options = NULL;
5495  }
5496 
5497  /* mark not-null status */
5498  if (has_not_null)
5499  {
5500  TupleConstr *constr = (TupleConstr *) palloc0(sizeof(TupleConstr));
5501 
5502  constr->has_not_null = true;
5503  rel->rd_att->constr = constr;
5504  }
5505 
5506  /* If it's an index, there's more to do */
5507  if (rel->rd_rel->relkind == RELKIND_INDEX)
5508  {
5509  MemoryContext indexcxt;
5510  Oid *opfamily;
5511  Oid *opcintype;
5512  RegProcedure *support;
5513  int nsupport;
5514  int16 *indoption;
5515  Oid *indcollation;
5516 
5517  /* Count nailed indexes to ensure we have 'em all */
5518  if (rel->rd_isnailed)
5519  nailed_indexes++;
5520 
5521  /* next, read the pg_index tuple */
5522  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5523  goto read_failed;
5524 
5525  rel->rd_indextuple = (HeapTuple) palloc(len);
5526  if (fread(rel->rd_indextuple, 1, len, fp) != len)
5527  goto read_failed;
5528 
5529  /* Fix up internal pointers in the tuple -- see heap_copytuple */
5530  rel->rd_indextuple->t_data = (HeapTupleHeader) ((char *) rel->rd_indextuple + HEAPTUPLESIZE);
5532 
5533  /*
5534  * prepare index info context --- parameters should match
5535  * RelationInitIndexAccessInfo
5536  */
5540  rel->rd_indexcxt = indexcxt;
5541 
5542  /*
5543  * Now we can fetch the index AM's API struct. (We can't store
5544  * that in the init file, since it contains function pointers that
5545  * might vary across server executions. Fortunately, it should be
5546  * safe to call the amhandler even while bootstrapping indexes.)
5547  */
5548  InitIndexAmRoutine(rel);
5549 
5550  /* next, read the vector of opfamily OIDs */
5551  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5552  goto read_failed;
5553 
5554  opfamily = (Oid *) MemoryContextAlloc(indexcxt, len);
5555  if (fread(opfamily, 1, len, fp) != len)
5556  goto read_failed;
5557 
5558  rel->rd_opfamily = opfamily;
5559 
5560  /* next, read the vector of opcintype OIDs */
5561  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5562  goto read_failed;
5563 
5564  opcintype = (Oid *) MemoryContextAlloc(indexcxt, len);
5565  if (fread(opcintype, 1, len, fp) != len)
5566  goto read_failed;
5567 
5568  rel->rd_opcintype = opcintype;
5569 
5570  /* next, read the vector of support procedure OIDs */
5571  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5572  goto read_failed;
5573  support = (RegProcedure *) MemoryContextAlloc(indexcxt, len);
5574  if (fread(support, 1, len, fp) != len)
5575  goto read_failed;
5576 
5577  rel->rd_support = support;
5578 
5579  /* next, read the vector of collation OIDs */
5580  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5581  goto read_failed;
5582 
5583  indcollation = (Oid *) MemoryContextAlloc(indexcxt, len);
5584  if (fread(indcollation, 1, len, fp) != len)
5585  goto read_failed;
5586 
5587  rel->rd_indcollation = indcollation;
5588 
5589  /* finally, read the vector of indoption values */
5590  if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
5591  goto read_failed;
5592 
5593  indoption = (int16 *) MemoryContextAlloc(indexcxt, len);
5594  if (fread(indoption, 1, len, fp) != len)
5595  goto read_failed;
5596 
5597  rel->rd_indoption = indoption;
5598 
5599  /* set up zeroed fmgr-info vector */
5600  nsupport = relform->relnatts * rel->rd_amroutine->amsupport;
5601  rel->rd_supportinfo = (FmgrInfo *)
5602  MemoryContextAllocZero(indexcxt, nsupport * sizeof(FmgrInfo));
5603  }
5604  else
5605  {
5606  /* Count nailed rels to ensure we have 'em all */
5607  if (rel->rd_isnailed)
5608  nailed_rels++;
5609 
5610  Assert(rel->rd_index == NULL);
5611  Assert(rel->rd_indextuple == NULL);
5612  Assert(rel->rd_indexcxt == NULL);
5613  Assert(rel->rd_amroutine == NULL);
5614  Assert(rel->rd_opfamily == NULL);
5615  Assert(rel->rd_opcintype == NULL);
5616  Assert(rel->rd_support == NULL);
5617  Assert(rel->rd_supportinfo == NULL);
5618  Assert(rel->rd_indoption == NULL);
5619  Assert(rel->rd_indcollation == NULL);
5620  }
5621 
5622  /*
5623  * Rules and triggers are not saved (mainly because the internal
5624  * format is complex and subject to change). They must be rebuilt if
5625  * needed by RelationCacheInitializePhase3. This is not expected to
5626  * be a big performance hit since few system catalogs have such. Ditto
5627  * for RLS policy data, index expressions, predicates, exclusion info,
5628  * and FDW info.
5629  */
5630  rel->rd_rules = NULL;
5631  rel->rd_rulescxt = NULL;
5632  rel->trigdesc = NULL;
5633  rel->rd_rsdesc = NULL;
5634  rel->rd_partkeycxt = NULL;
5635  rel->rd_partkey = NULL;
5636  rel->rd_pdcxt = NULL;
5637  rel->rd_partdesc = NULL;
5638  rel->rd_partcheck = NIL;
5639  rel->rd_indexprs = NIL;
5640  rel->rd_indpred = NIL;
5641  rel->rd_exclops = NULL;
5642  rel->rd_exclprocs = NULL;
5643  rel->rd_exclstrats = NULL;
5644  rel->rd_fdwroutine = NULL;
5645 
5646  /*
5647  * Reset transient-state fields in the relcache entry
5648  */
5649  rel->rd_smgr = NULL;
5650  if (rel->rd_isnailed)
5651  rel->rd_refcnt = 1;
5652  else
5653  rel->rd_refcnt = 0;
5654  rel->rd_indexvalid = 0;
5655  rel->rd_fkeylist = NIL;
5656  rel->rd_fkeyvalid = false;
5657  rel->rd_indexlist = NIL;
5658  rel->rd_oidindex = InvalidOid;
5659  rel->rd_pkindex = InvalidOid;
5660  rel->rd_replidindex = InvalidOid;
5661  rel->rd_indexattr = NULL;
5662  rel->rd_keyattr = NULL;
5663  rel->rd_pkattr = NULL;
5664  rel->rd_idattr = NULL;
5665  rel->rd_pubactions = NULL;
5666  rel->rd_statvalid = false;
5667  rel->rd_statlist = NIL;
5670  rel->rd_amcache = NULL;
5671  MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
5672 
5673  /*
5674  * Recompute lock and physical addressing info. This is needed in
5675  * case the pg_internal.init file was copied from some other database
5676  * by CREATE DATABASE.
5677  */
5678  RelationInitLockInfo(rel);
5680  }
5681 
5682  /*
5683  * We reached the end of the init file without apparent problem. Did we
5684  * get the right number of nailed items? This is a useful crosscheck in
5685  * case the set of critical rels or indexes changes. However, that should
5686  * not happen in a normally-running system, so let's bleat if it does.
5687  *
5688  * For the shared init file, we're called before client authentication is
5689  * done, which means that elog(WARNING) will go only to the postmaster
5690  * log, where it's easily missed. To ensure that developers notice bad
5691  * values of NUM_CRITICAL_SHARED_RELS/NUM_CRITICAL_SHARED_INDEXES, we put
5692  * an Assert(false) there.
5693  */
5694  if (shared)
5695  {
5696  if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
5697  nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
5698  {
5699  elog(WARNING, "found %d nailed shared rels and %d nailed shared indexes in init file, but expected %d and %d respectively",
5700  nailed_rels, nailed_indexes,
5702  /* Make sure we get developers' attention about this */
5703  Assert(false);
5704  /* In production builds, recover by bootstrapping the relcache */
5705  goto read_failed;
5706  }
5707  }
5708  else
5709  {
5710  if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
5711  nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
5712  {
5713  elog(WARNING, "found %d nailed rels and %d nailed indexes in init file, but expected %d and %d respectively",
5714  nailed_rels, nailed_indexes,
5716  /* We don't need an Assert() in this case */
5717  goto read_failed;
5718  }
5719  }
5720 
5721  /*
5722  * OK, all appears well.
5723  *
5724  * Now insert all the new relcache entries into the cache.
5725  */
5726  for (relno = 0; relno < num_rels; relno++)
5727  {
5728  RelationCacheInsert(rels[reln