PostgreSQL Source Code  git master
cluster.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * cluster.c
4  * CLUSTER a table on an index. This is now also used for VACUUM FULL.
5  *
6  * There is hardly anything left of Paul Brown's original implementation...
7  *
8  *
9  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994-5, Regents of the University of California
11  *
12  *
13  * IDENTIFICATION
14  * src/backend/commands/cluster.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/amapi.h"
21 #include "access/heapam.h"
22 #include "access/multixact.h"
23 #include "access/relscan.h"
24 #include "access/tableam.h"
25 #include "access/transam.h"
26 #include "access/toast_internals.h"
27 #include "access/xact.h"
28 #include "access/xlog.h"
29 #include "catalog/pg_am.h"
30 #include "catalog/catalog.h"
31 #include "catalog/dependency.h"
32 #include "catalog/heap.h"
33 #include "catalog/index.h"
34 #include "catalog/namespace.h"
35 #include "catalog/objectaccess.h"
36 #include "catalog/toasting.h"
37 #include "commands/cluster.h"
38 #include "commands/progress.h"
39 #include "commands/tablecmds.h"
40 #include "commands/vacuum.h"
41 #include "miscadmin.h"
42 #include "optimizer/optimizer.h"
43 #include "pgstat.h"
44 #include "storage/bufmgr.h"
45 #include "storage/lmgr.h"
46 #include "storage/predicate.h"
47 #include "utils/acl.h"
48 #include "utils/fmgroids.h"
49 #include "utils/inval.h"
50 #include "utils/lsyscache.h"
51 #include "utils/memutils.h"
52 #include "utils/pg_rusage.h"
53 #include "utils/relmapper.h"
54 #include "utils/snapmgr.h"
55 #include "utils/syscache.h"
56 #include "utils/tuplesort.h"
57 
58 
59 /*
60  * This struct is used to pass around the information on tables to be
61  * clustered. We need this so we can make a list of them when invoked without
62  * a specific table/index pair.
63  */
64 typedef struct
65 {
68 } RelToCluster;
69 
70 
71 static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
72 static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
73  bool verbose, bool *pSwapToastByContent,
74  TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
75 static List *get_tables_to_cluster(MemoryContext cluster_context);
76 
77 
78 /*---------------------------------------------------------------------------
79  * This cluster code allows for clustering multiple tables at once. Because
80  * of this, we cannot just run everything on a single transaction, or we
81  * would be forced to acquire exclusive locks on all the tables being
82  * clustered, simultaneously --- very likely leading to deadlock.
83  *
84  * To solve this we follow a similar strategy to VACUUM code,
85  * clustering each relation in a separate transaction. For this to work,
86  * we need to:
87  * - provide a separate memory context so that we can pass information in
88  * a way that survives across transactions
89  * - start a new transaction every time a new relation is clustered
90  * - check for validity of the information on to-be-clustered relations,
91  * as someone might have deleted a relation behind our back, or
92  * clustered one on a different index
93  * - end the transaction
94  *
95  * The single-relation case does not have any such overhead.
96  *
97  * We also allow a relation to be specified without index. In that case,
98  * the indisclustered bit will be looked up, and an ERROR will be thrown
99  * if there is no index with the bit set.
100  *---------------------------------------------------------------------------
101  */
102 void
103 cluster(ClusterStmt *stmt, bool isTopLevel)
104 {
105  if (stmt->relation != NULL)
106  {
107  /* This is the single-relation case. */
108  Oid tableOid,
109  indexOid = InvalidOid;
110  Relation rel;
111 
112  /* Find, lock, and check permissions on the table */
113  tableOid = RangeVarGetRelidExtended(stmt->relation,
115  0,
117  rel = table_open(tableOid, NoLock);
118 
119  /*
120  * Reject clustering a remote temp table ... their local buffer
121  * manager is not going to cope.
122  */
123  if (RELATION_IS_OTHER_TEMP(rel))
124  ereport(ERROR,
125  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
126  errmsg("cannot cluster temporary tables of other sessions")));
127 
128  /*
129  * Reject clustering a partitioned table.
130  */
131  if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
132  ereport(ERROR,
133  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
134  errmsg("cannot cluster a partitioned table")));
135 
136  if (stmt->indexname == NULL)
137  {
138  ListCell *index;
139 
140  /* We need to find the index that has indisclustered set. */
141  foreach(index, RelationGetIndexList(rel))
142  {
143  HeapTuple idxtuple;
144  Form_pg_index indexForm;
145 
146  indexOid = lfirst_oid(index);
147  idxtuple = SearchSysCache1(INDEXRELID,
148  ObjectIdGetDatum(indexOid));
149  if (!HeapTupleIsValid(idxtuple))
150  elog(ERROR, "cache lookup failed for index %u", indexOid);
151  indexForm = (Form_pg_index) GETSTRUCT(idxtuple);
152  if (indexForm->indisclustered)
153  {
154  ReleaseSysCache(idxtuple);
155  break;
156  }
157  ReleaseSysCache(idxtuple);
158  indexOid = InvalidOid;
159  }
160 
161  if (!OidIsValid(indexOid))
162  ereport(ERROR,
163  (errcode(ERRCODE_UNDEFINED_OBJECT),
164  errmsg("there is no previously clustered index for table \"%s\"",
165  stmt->relation->relname)));
166  }
167  else
168  {
169  /*
170  * The index is expected to be in the same namespace as the
171  * relation.
172  */
173  indexOid = get_relname_relid(stmt->indexname,
174  rel->rd_rel->relnamespace);
175  if (!OidIsValid(indexOid))
176  ereport(ERROR,
177  (errcode(ERRCODE_UNDEFINED_OBJECT),
178  errmsg("index \"%s\" for table \"%s\" does not exist",
179  stmt->indexname, stmt->relation->relname)));
180  }
181 
182  /* close relation, keep lock till commit */
183  table_close(rel, NoLock);
184 
185  /* Do the job. */
186  cluster_rel(tableOid, indexOid, stmt->options);
187  }
188  else
189  {
190  /*
191  * This is the "multi relation" case. We need to cluster all tables
192  * that have some index with indisclustered set.
193  */
194  MemoryContext cluster_context;
195  List *rvs;
196  ListCell *rv;
197 
198  /*
199  * We cannot run this form of CLUSTER inside a user transaction block;
200  * we'd be holding locks way too long.
201  */
202  PreventInTransactionBlock(isTopLevel, "CLUSTER");
203 
204  /*
205  * Create special memory context for cross-transaction storage.
206  *
207  * Since it is a child of PortalContext, it will go away even in case
208  * of error.
209  */
210  cluster_context = AllocSetContextCreate(PortalContext,
211  "Cluster",
213 
214  /*
215  * Build the list of relations to cluster. Note that this lives in
216  * cluster_context.
217  */
218  rvs = get_tables_to_cluster(cluster_context);
219 
220  /* Commit to get out of starting transaction */
223 
224  /* Ok, now that we've got them all, cluster them one by one */
225  foreach(rv, rvs)
226  {
227  RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
228 
229  /* Start a new transaction for each relation. */
231  /* functions in indexes may want a snapshot set */
233  /* Do the job. */
234  cluster_rel(rvtc->tableOid, rvtc->indexOid,
235  stmt->options | CLUOPT_RECHECK);
238  }
239 
240  /* Start a new transaction for the cleanup work. */
242 
243  /* Clean up working storage */
244  MemoryContextDelete(cluster_context);
245  }
246 }
247 
248 /*
249  * cluster_rel
250  *
251  * This clusters the table by creating a new, clustered table and
252  * swapping the relfilenodes of the new table and the old table, so
253  * the OID of the original table is preserved. Thus we do not lose
254  * GRANT, inheritance nor references to this table (this was a bug
255  * in releases through 7.3).
256  *
257  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
258  * the new table, it's better to create the indexes afterwards than to fill
259  * them incrementally while we load the table.
260  *
261  * If indexOid is InvalidOid, the table will be rewritten in physical order
262  * instead of index order. This is the new implementation of VACUUM FULL,
263  * and error messages should refer to the operation as VACUUM not CLUSTER.
264  */
265 void
266 cluster_rel(Oid tableOid, Oid indexOid, int options)
267 {
268  Relation OldHeap;
269  bool verbose = ((options & CLUOPT_VERBOSE) != 0);
270  bool recheck = ((options & CLUOPT_RECHECK) != 0);
271 
272  /* Check for user-requested abort. */
274 
276  if (OidIsValid(indexOid))
279  else
282 
283  /*
284  * We grab exclusive access to the target rel and index for the duration
285  * of the transaction. (This is redundant for the single-transaction
286  * case, since cluster() already did it.) The index lock is taken inside
287  * check_index_is_clusterable.
288  */
289  OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
290 
291  /* If the table has gone away, we can skip processing it */
292  if (!OldHeap)
293  {
295  return;
296  }
297 
298  /*
299  * Since we may open a new transaction for each relation, we have to check
300  * that the relation still is what we think it is.
301  *
302  * If this is a single-transaction CLUSTER, we can skip these tests. We
303  * *must* skip the one on indisclustered since it would reject an attempt
304  * to cluster a not-previously-clustered index.
305  */
306  if (recheck)
307  {
308  HeapTuple tuple;
309  Form_pg_index indexForm;
310 
311  /* Check that the user still owns the relation */
312  if (!pg_class_ownercheck(tableOid, GetUserId()))
313  {
316  return;
317  }
318 
319  /*
320  * Silently skip a temp table for a remote session. Only doing this
321  * check in the "recheck" case is appropriate (which currently means
322  * somebody is executing a database-wide CLUSTER), because there is
323  * another check in cluster() which will stop any attempt to cluster
324  * remote temp tables by name. There is another check in cluster_rel
325  * which is redundant, but we leave it for extra safety.
326  */
327  if (RELATION_IS_OTHER_TEMP(OldHeap))
328  {
331  return;
332  }
333 
334  if (OidIsValid(indexOid))
335  {
336  /*
337  * Check that the index still exists
338  */
340  {
343  return;
344  }
345 
346  /*
347  * Check that the index is still the one with indisclustered set.
348  */
349  tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
350  if (!HeapTupleIsValid(tuple)) /* probably can't happen */
351  {
354  return;
355  }
356  indexForm = (Form_pg_index) GETSTRUCT(tuple);
357  if (!indexForm->indisclustered)
358  {
359  ReleaseSysCache(tuple);
362  return;
363  }
364  ReleaseSysCache(tuple);
365  }
366  }
367 
368  /*
369  * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
370  * would work in most respects, but the index would only get marked as
371  * indisclustered in the current database, leading to unexpected behavior
372  * if CLUSTER were later invoked in another database.
373  */
374  if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
375  ereport(ERROR,
376  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
377  errmsg("cannot cluster a shared catalog")));
378 
379  /*
380  * Don't process temp tables of other backends ... their local buffer
381  * manager is not going to cope.
382  */
383  if (RELATION_IS_OTHER_TEMP(OldHeap))
384  {
385  if (OidIsValid(indexOid))
386  ereport(ERROR,
387  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
388  errmsg("cannot cluster temporary tables of other sessions")));
389  else
390  ereport(ERROR,
391  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
392  errmsg("cannot vacuum temporary tables of other sessions")));
393  }
394 
395  /*
396  * Also check for active uses of the relation in the current transaction,
397  * including open scans and pending AFTER trigger events.
398  */
399  CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
400 
401  /* Check heap and index are valid to cluster on */
402  if (OidIsValid(indexOid))
403  check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
404 
405  /*
406  * Quietly ignore the request if this is a materialized view which has not
407  * been populated from its query. No harm is done because there is no data
408  * to deal with, and we don't want to throw an error if this is part of a
409  * multi-relation request -- for example, CLUSTER was run on the entire
410  * database.
411  */
412  if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
413  !RelationIsPopulated(OldHeap))
414  {
417  return;
418  }
419 
420  /*
421  * All predicate locks on the tuples or pages are about to be made
422  * invalid, because we move tuples around. Promote them to relation
423  * locks. Predicate locks on indexes will be promoted when they are
424  * reindexed.
425  */
427 
428  /* rebuild_relation does all the dirty work */
429  rebuild_relation(OldHeap, indexOid, verbose);
430 
431  /* NB: rebuild_relation does table_close() on OldHeap */
432 
434 }
435 
436 /*
437  * Verify that the specified heap and index are valid to cluster on
438  *
439  * Side effect: obtains lock on the index. The caller may
440  * in some cases already have AccessExclusiveLock on the table, but
441  * not in all cases so we can't rely on the table-level lock for
442  * protection here.
443  */
444 void
445 check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
446 {
447  Relation OldIndex;
448 
449  OldIndex = index_open(indexOid, lockmode);
450 
451  /*
452  * Check that index is in fact an index on the given relation
453  */
454  if (OldIndex->rd_index == NULL ||
455  OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
456  ereport(ERROR,
457  (errcode(ERRCODE_WRONG_OBJECT_TYPE),
458  errmsg("\"%s\" is not an index for table \"%s\"",
459  RelationGetRelationName(OldIndex),
460  RelationGetRelationName(OldHeap))));
461 
462  /* Index AM must allow clustering */
463  if (!OldIndex->rd_indam->amclusterable)
464  ereport(ERROR,
465  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
466  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
467  RelationGetRelationName(OldIndex))));
468 
469  /*
470  * Disallow clustering on incomplete indexes (those that might not index
471  * every row of the relation). We could relax this by making a separate
472  * seqscan pass over the table to copy the missing rows, but that seems
473  * expensive and tedious.
474  */
475  if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
476  ereport(ERROR,
477  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
478  errmsg("cannot cluster on partial index \"%s\"",
479  RelationGetRelationName(OldIndex))));
480 
481  /*
482  * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
483  * it might well not contain entries for every heap row, or might not even
484  * be internally consistent. (But note that we don't check indcheckxmin;
485  * the worst consequence of following broken HOT chains would be that we
486  * might put recently-dead tuples out-of-order in the new table, and there
487  * is little harm in that.)
488  */
489  if (!OldIndex->rd_index->indisvalid)
490  ereport(ERROR,
491  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
492  errmsg("cannot cluster on invalid index \"%s\"",
493  RelationGetRelationName(OldIndex))));
494 
495  /* Drop relcache refcnt on OldIndex, but keep lock */
496  index_close(OldIndex, NoLock);
497 }
498 
499 /*
500  * mark_index_clustered: mark the specified index as the one clustered on
501  *
502  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
503  */
504 void
505 mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
506 {
507  HeapTuple indexTuple;
508  Form_pg_index indexForm;
509  Relation pg_index;
510  ListCell *index;
511 
512  /* Disallow applying to a partitioned table */
513  if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
514  ereport(ERROR,
515  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
516  errmsg("cannot mark index clustered in partitioned table")));
517 
518  /*
519  * If the index is already marked clustered, no need to do anything.
520  */
521  if (OidIsValid(indexOid))
522  {
523  indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
524  if (!HeapTupleIsValid(indexTuple))
525  elog(ERROR, "cache lookup failed for index %u", indexOid);
526  indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
527 
528  if (indexForm->indisclustered)
529  {
530  ReleaseSysCache(indexTuple);
531  return;
532  }
533 
534  ReleaseSysCache(indexTuple);
535  }
536 
537  /*
538  * Check each index of the relation and set/clear the bit as needed.
539  */
540  pg_index = table_open(IndexRelationId, RowExclusiveLock);
541 
542  foreach(index, RelationGetIndexList(rel))
543  {
544  Oid thisIndexOid = lfirst_oid(index);
545 
546  indexTuple = SearchSysCacheCopy1(INDEXRELID,
547  ObjectIdGetDatum(thisIndexOid));
548  if (!HeapTupleIsValid(indexTuple))
549  elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
550  indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
551 
552  /*
553  * Unset the bit if set. We know it's wrong because we checked this
554  * earlier.
555  */
556  if (indexForm->indisclustered)
557  {
558  indexForm->indisclustered = false;
559  CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
560  }
561  else if (thisIndexOid == indexOid)
562  {
563  /* this was checked earlier, but let's be real sure */
564  if (!indexForm->indisvalid)
565  elog(ERROR, "cannot cluster on invalid index %u", indexOid);
566  indexForm->indisclustered = true;
567  CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
568  }
569 
570  InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
571  InvalidOid, is_internal);
572 
573  heap_freetuple(indexTuple);
574  }
575 
576  table_close(pg_index, RowExclusiveLock);
577 }
578 
579 /*
580  * rebuild_relation: rebuild an existing relation in index or physical order
581  *
582  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
583  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
584  *
585  * NB: this routine closes OldHeap at the right time; caller should not.
586  */
587 static void
588 rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
589 {
590  Oid tableOid = RelationGetRelid(OldHeap);
591  Oid tableSpace = OldHeap->rd_rel->reltablespace;
592  Oid OIDNewHeap;
593  char relpersistence;
594  bool is_system_catalog;
595  bool swap_toast_by_content;
596  TransactionId frozenXid;
597  MultiXactId cutoffMulti;
598 
599  /* Mark the correct index as clustered */
600  if (OidIsValid(indexOid))
601  mark_index_clustered(OldHeap, indexOid, true);
602 
603  /* Remember info about rel before closing OldHeap */
604  relpersistence = OldHeap->rd_rel->relpersistence;
605  is_system_catalog = IsSystemRelation(OldHeap);
606 
607  /* Close relcache entry, but keep lock until transaction commit */
608  table_close(OldHeap, NoLock);
609 
610  /* Create the transient table that will receive the re-ordered data */
611  OIDNewHeap = make_new_heap(tableOid, tableSpace,
612  relpersistence,
614 
615  /* Copy the heap data into the new table in the desired order */
616  copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
617  &swap_toast_by_content, &frozenXid, &cutoffMulti);
618 
619  /*
620  * Swap the physical files of the target and transient tables, then
621  * rebuild the target's indexes and throw away the transient table.
622  */
623  finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
624  swap_toast_by_content, false, true,
625  frozenXid, cutoffMulti,
626  relpersistence);
627 }
628 
629 
630 /*
631  * Create the transient table that will be filled with new data during
632  * CLUSTER, ALTER TABLE, and similar operations. The transient table
633  * duplicates the logical structure of the OldHeap, but is placed in
634  * NewTableSpace which might be different from OldHeap's. Also, it's built
635  * with the specified persistence, which might differ from the original's.
636  *
637  * After this, the caller should load the new heap with transferred/modified
638  * data, then call finish_heap_swap to complete the operation.
639  */
640 Oid
641 make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
642  LOCKMODE lockmode)
643 {
644  TupleDesc OldHeapDesc;
645  char NewHeapName[NAMEDATALEN];
646  Oid OIDNewHeap;
647  Oid toastid;
648  Relation OldHeap;
649  HeapTuple tuple;
650  Datum reloptions;
651  bool isNull;
652  Oid namespaceid;
653 
654  OldHeap = table_open(OIDOldHeap, lockmode);
655  OldHeapDesc = RelationGetDescr(OldHeap);
656 
657  /*
658  * Note that the NewHeap will not receive any of the defaults or
659  * constraints associated with the OldHeap; we don't need 'em, and there's
660  * no reason to spend cycles inserting them into the catalogs only to
661  * delete them.
662  */
663 
664  /*
665  * But we do want to use reloptions of the old heap for new heap.
666  */
667  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
668  if (!HeapTupleIsValid(tuple))
669  elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
670  reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
671  &isNull);
672  if (isNull)
673  reloptions = (Datum) 0;
674 
675  if (relpersistence == RELPERSISTENCE_TEMP)
676  namespaceid = LookupCreationNamespace("pg_temp");
677  else
678  namespaceid = RelationGetNamespace(OldHeap);
679 
680  /*
681  * Create the new heap, using a temporary name in the same namespace as
682  * the existing table. NOTE: there is some risk of collision with user
683  * relnames. Working around this seems more trouble than it's worth; in
684  * particular, we can't create the new heap in a different namespace from
685  * the old, or we will have problems with the TEMP status of temp tables.
686  *
687  * Note: the new heap is not a shared relation, even if we are rebuilding
688  * a shared rel. However, we do make the new heap mapped if the source is
689  * mapped. This simplifies swap_relation_files, and is absolutely
690  * necessary for rebuilding pg_class, for reasons explained there.
691  */
692  snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
693 
694  OIDNewHeap = heap_create_with_catalog(NewHeapName,
695  namespaceid,
696  NewTableSpace,
697  InvalidOid,
698  InvalidOid,
699  InvalidOid,
700  OldHeap->rd_rel->relowner,
701  OldHeap->rd_rel->relam,
702  OldHeapDesc,
703  NIL,
704  RELKIND_RELATION,
705  relpersistence,
706  false,
707  RelationIsMapped(OldHeap),
709  reloptions,
710  false,
711  true,
712  true,
713  OIDOldHeap,
714  NULL);
715  Assert(OIDNewHeap != InvalidOid);
716 
717  ReleaseSysCache(tuple);
718 
719  /*
720  * Advance command counter so that the newly-created relation's catalog
721  * tuples will be visible to table_open.
722  */
724 
725  /*
726  * If necessary, create a TOAST table for the new relation.
727  *
728  * If the relation doesn't have a TOAST table already, we can't need one
729  * for the new relation. The other way around is possible though: if some
730  * wide columns have been dropped, NewHeapCreateToastTable can decide that
731  * no TOAST table is needed for the new table.
732  *
733  * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
734  * that the TOAST table will be visible for insertion.
735  */
736  toastid = OldHeap->rd_rel->reltoastrelid;
737  if (OidIsValid(toastid))
738  {
739  /* keep the existing toast table's reloptions, if any */
740  tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
741  if (!HeapTupleIsValid(tuple))
742  elog(ERROR, "cache lookup failed for relation %u", toastid);
743  reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
744  &isNull);
745  if (isNull)
746  reloptions = (Datum) 0;
747 
748  NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode);
749 
750  ReleaseSysCache(tuple);
751  }
752 
753  table_close(OldHeap, NoLock);
754 
755  return OIDNewHeap;
756 }
757 
758 /*
759  * Do the physical copying of table data.
760  *
761  * There are three output parameters:
762  * *pSwapToastByContent is set true if toast tables must be swapped by content.
763  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
764  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
765  */
766 static void
767 copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
768  bool *pSwapToastByContent, TransactionId *pFreezeXid,
769  MultiXactId *pCutoffMulti)
770 {
771  Relation NewHeap,
772  OldHeap,
773  OldIndex;
774  Relation relRelation;
775  HeapTuple reltup;
776  Form_pg_class relform;
780  TransactionId FreezeXid;
782  bool use_sort;
783  double num_tuples = 0,
784  tups_vacuumed = 0,
785  tups_recently_dead = 0;
786  BlockNumber num_pages;
787  int elevel = verbose ? INFO : DEBUG2;
788  PGRUsage ru0;
789 
790  pg_rusage_init(&ru0);
791 
792  /*
793  * Open the relations we need.
794  */
795  NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
796  OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
797  if (OidIsValid(OIDOldIndex))
798  OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
799  else
800  OldIndex = NULL;
801 
802  /*
803  * Their tuple descriptors should be exactly alike, but here we only need
804  * assume that they have the same number of columns.
805  */
806  oldTupDesc = RelationGetDescr(OldHeap);
807  newTupDesc = RelationGetDescr(NewHeap);
808  Assert(newTupDesc->natts == oldTupDesc->natts);
809 
810  /*
811  * If the OldHeap has a toast table, get lock on the toast table to keep
812  * it from being vacuumed. This is needed because autovacuum processes
813  * toast tables independently of their main tables, with no lock on the
814  * latter. If an autovacuum were to start on the toast table after we
815  * compute our OldestXmin below, it would use a later OldestXmin, and then
816  * possibly remove as DEAD toast tuples belonging to main tuples we think
817  * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
818  * tuples.
819  *
820  * We don't need to open the toast relation here, just lock it. The lock
821  * will be held till end of transaction.
822  */
823  if (OldHeap->rd_rel->reltoastrelid)
824  LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
825 
826  /*
827  * If both tables have TOAST tables, perform toast swap by content. It is
828  * possible that the old table has a toast table but the new one doesn't,
829  * if toastable columns have been dropped. In that case we have to do
830  * swap by links. This is okay because swap by content is only essential
831  * for system catalogs, and we don't support schema changes for them.
832  */
833  if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
834  {
835  *pSwapToastByContent = true;
836 
837  /*
838  * When doing swap by content, any toast pointers written into NewHeap
839  * must use the old toast table's OID, because that's where the toast
840  * data will eventually be found. Set this up by setting rd_toastoid.
841  * This also tells toast_save_datum() to preserve the toast value
842  * OIDs, which we want so as not to invalidate toast pointers in
843  * system catalog caches, and to avoid making multiple copies of a
844  * single toast value.
845  *
846  * Note that we must hold NewHeap open until we are done writing data,
847  * since the relcache will not guarantee to remember this setting once
848  * the relation is closed. Also, this technique depends on the fact
849  * that no one will try to read from the NewHeap until after we've
850  * finished writing it and swapping the rels --- otherwise they could
851  * follow the toast pointers to the wrong place. (It would actually
852  * work for values copied over from the old toast table, but not for
853  * any values that we toast which were previously not toasted.)
854  */
855  NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
856  }
857  else
858  *pSwapToastByContent = false;
859 
860  /*
861  * Compute xids used to freeze and weed out dead tuples and multixacts.
862  * Since we're going to rewrite the whole table anyway, there's no reason
863  * not to be aggressive about this.
864  */
865  vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
866  &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
867  NULL);
868 
869  /*
870  * FreezeXid will become the table's new relfrozenxid, and that mustn't go
871  * backwards, so take the max.
872  */
873  if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) &&
874  TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
875  FreezeXid = OldHeap->rd_rel->relfrozenxid;
876 
877  /*
878  * MultiXactCutoff, similarly, shouldn't go backwards either.
879  */
880  if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) &&
881  MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid))
882  MultiXactCutoff = OldHeap->rd_rel->relminmxid;
883 
884  /*
885  * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
886  * the OldHeap. We know how to use a sort to duplicate the ordering of a
887  * btree index, and will use seqscan-and-sort for that case if the planner
888  * tells us it's cheaper. Otherwise, always indexscan if an index is
889  * provided, else plain seqscan.
890  */
891  if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
892  use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
893  else
894  use_sort = false;
895 
896  /* Log what we're doing */
897  if (OldIndex != NULL && !use_sort)
898  ereport(elevel,
899  (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
901  RelationGetRelationName(OldHeap),
902  RelationGetRelationName(OldIndex))));
903  else if (use_sort)
904  ereport(elevel,
905  (errmsg("clustering \"%s.%s\" using sequential scan and sort",
907  RelationGetRelationName(OldHeap))));
908  else
909  ereport(elevel,
910  (errmsg("vacuuming \"%s.%s\"",
912  RelationGetRelationName(OldHeap))));
913 
914  /*
915  * Hand of the actual copying to AM specific function, the generic code
916  * cannot know how to deal with visibility across AMs. Note that this
917  * routine is allowed to set FreezeXid / MultiXactCutoff to different
918  * values (e.g. because the AM doesn't use freezing).
919  */
920  table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
921  OldestXmin, &FreezeXid, &MultiXactCutoff,
922  &num_tuples, &tups_vacuumed,
923  &tups_recently_dead);
924 
925  /* return selected values to caller, get set as relfrozenxid/minmxid */
926  *pFreezeXid = FreezeXid;
927  *pCutoffMulti = MultiXactCutoff;
928 
929  /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
930  NewHeap->rd_toastoid = InvalidOid;
931 
932  num_pages = RelationGetNumberOfBlocks(NewHeap);
933 
934  /* Log what we did */
935  ereport(elevel,
936  (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
937  RelationGetRelationName(OldHeap),
938  tups_vacuumed, num_tuples,
939  RelationGetNumberOfBlocks(OldHeap)),
940  errdetail("%.0f dead row versions cannot be removed yet.\n"
941  "%s.",
942  tups_recently_dead,
943  pg_rusage_show(&ru0))));
944 
945  if (OldIndex != NULL)
946  index_close(OldIndex, NoLock);
947  table_close(OldHeap, NoLock);
948  table_close(NewHeap, NoLock);
949 
950  /* Update pg_class to reflect the correct values of pages and tuples. */
951  relRelation = table_open(RelationRelationId, RowExclusiveLock);
952 
953  reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
954  if (!HeapTupleIsValid(reltup))
955  elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
956  relform = (Form_pg_class) GETSTRUCT(reltup);
957 
958  relform->relpages = num_pages;
959  relform->reltuples = num_tuples;
960 
961  /* Don't update the stats for pg_class. See swap_relation_files. */
962  if (OIDOldHeap != RelationRelationId)
963  CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
964  else
966 
967  /* Clean up. */
968  heap_freetuple(reltup);
969  table_close(relRelation, RowExclusiveLock);
970 
971  /* Make the update visible */
973 }
974 
975 /*
976  * Swap the physical files of two given relations.
977  *
978  * We swap the physical identity (reltablespace, relfilenode) while keeping the
979  * same logical identities of the two relations. relpersistence is also
980  * swapped, which is critical since it determines where buffers live for each
981  * relation.
982  *
983  * We can swap associated TOAST data in either of two ways: recursively swap
984  * the physical content of the toast tables (and their indexes), or swap the
985  * TOAST links in the given relations' pg_class entries. The former is needed
986  * to manage rewrites of shared catalogs (where we cannot change the pg_class
987  * links) while the latter is the only way to handle cases in which a toast
988  * table is added or removed altogether.
989  *
990  * Additionally, the first relation is marked with relfrozenxid set to
991  * frozenXid. It seems a bit ugly to have this here, but the caller would
992  * have to do it anyway, so having it here saves a heap_update. Note: in
993  * the swap-toast-links case, we assume we don't need to change the toast
994  * table's relfrozenxid: the new version of the toast table should already
995  * have relfrozenxid set to RecentXmin, which is good enough.
996  *
997  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
998  * their OIDs are emitted into mapped_tables[]. This is hacky but beats
999  * having to look the information up again later in finish_heap_swap.
1000  */
1001 static void
1002 swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
1003  bool swap_toast_by_content,
1004  bool is_internal,
1005  TransactionId frozenXid,
1006  MultiXactId cutoffMulti,
1007  Oid *mapped_tables)
1008 {
1009  Relation relRelation;
1010  HeapTuple reltup1,
1011  reltup2;
1012  Form_pg_class relform1,
1013  relform2;
1014  Oid relfilenode1,
1015  relfilenode2;
1016  Oid swaptemp;
1017  char swptmpchr;
1018 
1019  /* We need writable copies of both pg_class tuples. */
1020  relRelation = table_open(RelationRelationId, RowExclusiveLock);
1021 
1023  if (!HeapTupleIsValid(reltup1))
1024  elog(ERROR, "cache lookup failed for relation %u", r1);
1025  relform1 = (Form_pg_class) GETSTRUCT(reltup1);
1026 
1028  if (!HeapTupleIsValid(reltup2))
1029  elog(ERROR, "cache lookup failed for relation %u", r2);
1030  relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1031 
1032  relfilenode1 = relform1->relfilenode;
1033  relfilenode2 = relform2->relfilenode;
1034 
1035  if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
1036  {
1037  /*
1038  * Normal non-mapped relations: swap relfilenodes, reltablespaces,
1039  * relpersistence
1040  */
1041  Assert(!target_is_pg_class);
1042 
1043  swaptemp = relform1->relfilenode;
1044  relform1->relfilenode = relform2->relfilenode;
1045  relform2->relfilenode = swaptemp;
1046 
1047  swaptemp = relform1->reltablespace;
1048  relform1->reltablespace = relform2->reltablespace;
1049  relform2->reltablespace = swaptemp;
1050 
1051  swptmpchr = relform1->relpersistence;
1052  relform1->relpersistence = relform2->relpersistence;
1053  relform2->relpersistence = swptmpchr;
1054 
1055  /* Also swap toast links, if we're swapping by links */
1056  if (!swap_toast_by_content)
1057  {
1058  swaptemp = relform1->reltoastrelid;
1059  relform1->reltoastrelid = relform2->reltoastrelid;
1060  relform2->reltoastrelid = swaptemp;
1061  }
1062  }
1063  else
1064  {
1065  /*
1066  * Mapped-relation case. Here we have to swap the relation mappings
1067  * instead of modifying the pg_class columns. Both must be mapped.
1068  */
1069  if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
1070  elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1071  NameStr(relform1->relname));
1072 
1073  /*
1074  * We can't change the tablespace nor persistence of a mapped rel, and
1075  * we can't handle toast link swapping for one either, because we must
1076  * not apply any critical changes to its pg_class row. These cases
1077  * should be prevented by upstream permissions tests, so these checks
1078  * are non-user-facing emergency backstop.
1079  */
1080  if (relform1->reltablespace != relform2->reltablespace)
1081  elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1082  NameStr(relform1->relname));
1083  if (relform1->relpersistence != relform2->relpersistence)
1084  elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1085  NameStr(relform1->relname));
1086  if (!swap_toast_by_content &&
1087  (relform1->reltoastrelid || relform2->reltoastrelid))
1088  elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1089  NameStr(relform1->relname));
1090 
1091  /*
1092  * Fetch the mappings --- shouldn't fail, but be paranoid
1093  */
1094  relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
1095  if (!OidIsValid(relfilenode1))
1096  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1097  NameStr(relform1->relname), r1);
1098  relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
1099  if (!OidIsValid(relfilenode2))
1100  elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1101  NameStr(relform2->relname), r2);
1102 
1103  /*
1104  * Send replacement mappings to relmapper. Note these won't actually
1105  * take effect until CommandCounterIncrement.
1106  */
1107  RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
1108  RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
1109 
1110  /* Pass OIDs of mapped r2 tables back to caller */
1111  *mapped_tables++ = r2;
1112  }
1113 
1114  /*
1115  * In the case of a shared catalog, these next few steps will only affect
1116  * our own database's pg_class row; but that's okay, because they are all
1117  * noncritical updates. That's also an important fact for the case of a
1118  * mapped catalog, because it's possible that we'll commit the map change
1119  * and then fail to commit the pg_class update.
1120  */
1121 
1122  /* set rel1's frozen Xid and minimum MultiXid */
1123  if (relform1->relkind != RELKIND_INDEX)
1124  {
1125  Assert(!TransactionIdIsValid(frozenXid) ||
1126  TransactionIdIsNormal(frozenXid));
1127  relform1->relfrozenxid = frozenXid;
1128  relform1->relminmxid = cutoffMulti;
1129  }
1130 
1131  /* swap size statistics too, since new rel has freshly-updated stats */
1132  {
1133  int32 swap_pages;
1134  float4 swap_tuples;
1135  int32 swap_allvisible;
1136 
1137  swap_pages = relform1->relpages;
1138  relform1->relpages = relform2->relpages;
1139  relform2->relpages = swap_pages;
1140 
1141  swap_tuples = relform1->reltuples;
1142  relform1->reltuples = relform2->reltuples;
1143  relform2->reltuples = swap_tuples;
1144 
1145  swap_allvisible = relform1->relallvisible;
1146  relform1->relallvisible = relform2->relallvisible;
1147  relform2->relallvisible = swap_allvisible;
1148  }
1149 
1150  /*
1151  * Update the tuples in pg_class --- unless the target relation of the
1152  * swap is pg_class itself. In that case, there is zero point in making
1153  * changes because we'd be updating the old data that we're about to throw
1154  * away. Because the real work being done here for a mapped relation is
1155  * just to change the relation map settings, it's all right to not update
1156  * the pg_class rows in this case. The most important changes will instead
1157  * performed later, in finish_heap_swap() itself.
1158  */
1159  if (!target_is_pg_class)
1160  {
1161  CatalogIndexState indstate;
1162 
1163  indstate = CatalogOpenIndexes(relRelation);
1164  CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
1165  indstate);
1166  CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
1167  indstate);
1168  CatalogCloseIndexes(indstate);
1169  }
1170  else
1171  {
1172  /* no update ... but we do still need relcache inval */
1175  }
1176 
1177  /*
1178  * Post alter hook for modified relations. The change to r2 is always
1179  * internal, but r1 depends on the invocation context.
1180  */
1181  InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
1182  InvalidOid, is_internal);
1183  InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
1184  InvalidOid, true);
1185 
1186  /*
1187  * If we have toast tables associated with the relations being swapped,
1188  * deal with them too.
1189  */
1190  if (relform1->reltoastrelid || relform2->reltoastrelid)
1191  {
1192  if (swap_toast_by_content)
1193  {
1194  if (relform1->reltoastrelid && relform2->reltoastrelid)
1195  {
1196  /* Recursively swap the contents of the toast tables */
1197  swap_relation_files(relform1->reltoastrelid,
1198  relform2->reltoastrelid,
1199  target_is_pg_class,
1200  swap_toast_by_content,
1201  is_internal,
1202  frozenXid,
1203  cutoffMulti,
1204  mapped_tables);
1205  }
1206  else
1207  {
1208  /* caller messed up */
1209  elog(ERROR, "cannot swap toast files by content when there's only one");
1210  }
1211  }
1212  else
1213  {
1214  /*
1215  * We swapped the ownership links, so we need to change dependency
1216  * data to match.
1217  *
1218  * NOTE: it is possible that only one table has a toast table.
1219  *
1220  * NOTE: at present, a TOAST table's only dependency is the one on
1221  * its owning table. If more are ever created, we'd need to use
1222  * something more selective than deleteDependencyRecordsFor() to
1223  * get rid of just the link we want.
1224  */
1225  ObjectAddress baseobject,
1226  toastobject;
1227  long count;
1228 
1229  /*
1230  * We disallow this case for system catalogs, to avoid the
1231  * possibility that the catalog we're rebuilding is one of the
1232  * ones the dependency changes would change. It's too late to be
1233  * making any data changes to the target catalog.
1234  */
1235  if (IsSystemClass(r1, relform1))
1236  elog(ERROR, "cannot swap toast files by links for system catalogs");
1237 
1238  /* Delete old dependencies */
1239  if (relform1->reltoastrelid)
1240  {
1241  count = deleteDependencyRecordsFor(RelationRelationId,
1242  relform1->reltoastrelid,
1243  false);
1244  if (count != 1)
1245  elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1246  count);
1247  }
1248  if (relform2->reltoastrelid)
1249  {
1250  count = deleteDependencyRecordsFor(RelationRelationId,
1251  relform2->reltoastrelid,
1252  false);
1253  if (count != 1)
1254  elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1255  count);
1256  }
1257 
1258  /* Register new dependencies */
1259  baseobject.classId = RelationRelationId;
1260  baseobject.objectSubId = 0;
1261  toastobject.classId = RelationRelationId;
1262  toastobject.objectSubId = 0;
1263 
1264  if (relform1->reltoastrelid)
1265  {
1266  baseobject.objectId = r1;
1267  toastobject.objectId = relform1->reltoastrelid;
1268  recordDependencyOn(&toastobject, &baseobject,
1270  }
1271 
1272  if (relform2->reltoastrelid)
1273  {
1274  baseobject.objectId = r2;
1275  toastobject.objectId = relform2->reltoastrelid;
1276  recordDependencyOn(&toastobject, &baseobject,
1278  }
1279  }
1280  }
1281 
1282  /*
1283  * If we're swapping two toast tables by content, do the same for their
1284  * valid index. The swap can actually be safely done only if the relations
1285  * have indexes.
1286  */
1287  if (swap_toast_by_content &&
1288  relform1->relkind == RELKIND_TOASTVALUE &&
1289  relform2->relkind == RELKIND_TOASTVALUE)
1290  {
1291  Oid toastIndex1,
1292  toastIndex2;
1293 
1294  /* Get valid index for each relation */
1295  toastIndex1 = toast_get_valid_index(r1,
1297  toastIndex2 = toast_get_valid_index(r2,
1299 
1300  swap_relation_files(toastIndex1,
1301  toastIndex2,
1302  target_is_pg_class,
1303  swap_toast_by_content,
1304  is_internal,
1307  mapped_tables);
1308  }
1309 
1310  /* Clean up. */
1311  heap_freetuple(reltup1);
1312  heap_freetuple(reltup2);
1313 
1314  table_close(relRelation, RowExclusiveLock);
1315 
1316  /*
1317  * Close both relcache entries' smgr links. We need this kluge because
1318  * both links will be invalidated during upcoming CommandCounterIncrement.
1319  * Whichever of the rels is the second to be cleared will have a dangling
1320  * reference to the other's smgr entry. Rather than trying to avoid this
1321  * by ordering operations just so, it's easiest to close the links first.
1322  * (Fortunately, since one of the entries is local in our transaction,
1323  * it's sufficient to clear out our own relcache this way; the problem
1324  * cannot arise for other backends when they see our update on the
1325  * non-transient relation.)
1326  *
1327  * Caution: the placement of this step interacts with the decision to
1328  * handle toast rels by recursion. When we are trying to rebuild pg_class
1329  * itself, the smgr close on pg_class must happen after all accesses in
1330  * this function.
1331  */
1334 }
1335 
1336 /*
1337  * Remove the transient table that was built by make_new_heap, and finish
1338  * cleaning up (including rebuilding all indexes on the old heap).
1339  */
1340 void
1341 finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
1342  bool is_system_catalog,
1343  bool swap_toast_by_content,
1344  bool check_constraints,
1345  bool is_internal,
1346  TransactionId frozenXid,
1347  MultiXactId cutoffMulti,
1348  char newrelpersistence)
1349 {
1350  ObjectAddress object;
1351  Oid mapped_tables[4];
1352  int reindex_flags;
1353  int i;
1354 
1355  /* Report that we are now swapping relation files */
1358 
1359  /* Zero out possible results from swapped_relation_files */
1360  memset(mapped_tables, 0, sizeof(mapped_tables));
1361 
1362  /*
1363  * Swap the contents of the heap relations (including any toast tables).
1364  * Also set old heap's relfrozenxid to frozenXid.
1365  */
1366  swap_relation_files(OIDOldHeap, OIDNewHeap,
1367  (OIDOldHeap == RelationRelationId),
1368  swap_toast_by_content, is_internal,
1369  frozenXid, cutoffMulti, mapped_tables);
1370 
1371  /*
1372  * If it's a system catalog, queue a sinval message to flush all catcaches
1373  * on the catalog when we reach CommandCounterIncrement.
1374  */
1375  if (is_system_catalog)
1376  CacheInvalidateCatalog(OIDOldHeap);
1377 
1378  /*
1379  * Rebuild each index on the relation (but not the toast table, which is
1380  * all-new at this point). It is important to do this before the DROP
1381  * step because if we are processing a system catalog that will be used
1382  * during DROP, we want to have its indexes available. There is no
1383  * advantage to the other order anyway because this is all transactional,
1384  * so no chance to reclaim disk space before commit. We do not need a
1385  * final CommandCounterIncrement() because reindex_relation does it.
1386  *
1387  * Note: because index_build is called via reindex_relation, it will never
1388  * set indcheckxmin true for the indexes. This is OK even though in some
1389  * sense we are building new indexes rather than rebuilding existing ones,
1390  * because the new heap won't contain any HOT chains at all, let alone
1391  * broken ones, so it can't be necessary to set indcheckxmin.
1392  */
1393  reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
1394  if (check_constraints)
1395  reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
1396 
1397  /*
1398  * Ensure that the indexes have the same persistence as the parent
1399  * relation.
1400  */
1401  if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1402  reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
1403  else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1404  reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
1405 
1406  /* Report that we are now reindexing relations */
1409 
1410  reindex_relation(OIDOldHeap, reindex_flags, 0);
1411 
1412  /* Report that we are now doing clean up */
1415 
1416  /*
1417  * If the relation being rebuild is pg_class, swap_relation_files()
1418  * couldn't update pg_class's own pg_class entry (check comments in
1419  * swap_relation_files()), thus relfrozenxid was not updated. That's
1420  * annoying because a potential reason for doing a VACUUM FULL is a
1421  * imminent or actual anti-wraparound shutdown. So, now that we can
1422  * access the new relation using its indices, update relfrozenxid.
1423  * pg_class doesn't have a toast relation, so we don't need to update the
1424  * corresponding toast relation. Not that there's little point moving all
1425  * relfrozenxid updates here since swap_relation_files() needs to write to
1426  * pg_class for non-mapped relations anyway.
1427  */
1428  if (OIDOldHeap == RelationRelationId)
1429  {
1430  Relation relRelation;
1431  HeapTuple reltup;
1432  Form_pg_class relform;
1433 
1434  relRelation = table_open(RelationRelationId, RowExclusiveLock);
1435 
1436  reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
1437  if (!HeapTupleIsValid(reltup))
1438  elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1439  relform = (Form_pg_class) GETSTRUCT(reltup);
1440 
1441  relform->relfrozenxid = frozenXid;
1442  relform->relminmxid = cutoffMulti;
1443 
1444  CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1445 
1446  table_close(relRelation, RowExclusiveLock);
1447  }
1448 
1449  /* Destroy new heap with old filenode */
1450  object.classId = RelationRelationId;
1451  object.objectId = OIDNewHeap;
1452  object.objectSubId = 0;
1453 
1454  /*
1455  * The new relation is local to our transaction and we know nothing
1456  * depends on it, so DROP_RESTRICT should be OK.
1457  */
1459 
1460  /* performDeletion does CommandCounterIncrement at end */
1461 
1462  /*
1463  * Now we must remove any relation mapping entries that we set up for the
1464  * transient table, as well as its toast table and toast index if any. If
1465  * we fail to do this before commit, the relmapper will complain about new
1466  * permanent map entries being added post-bootstrap.
1467  */
1468  for (i = 0; OidIsValid(mapped_tables[i]); i++)
1469  RelationMapRemoveMapping(mapped_tables[i]);
1470 
1471  /*
1472  * At this point, everything is kosher except that, if we did toast swap
1473  * by links, the toast table's name corresponds to the transient table.
1474  * The name is irrelevant to the backend because it's referenced by OID,
1475  * but users looking at the catalogs could be confused. Rename it to
1476  * prevent this problem.
1477  *
1478  * Note no lock required on the relation, because we already hold an
1479  * exclusive lock on it.
1480  */
1481  if (!swap_toast_by_content)
1482  {
1483  Relation newrel;
1484 
1485  newrel = table_open(OIDOldHeap, NoLock);
1486  if (OidIsValid(newrel->rd_rel->reltoastrelid))
1487  {
1488  Oid toastidx;
1489  char NewToastName[NAMEDATALEN];
1490 
1491  /* Get the associated valid index to be renamed */
1492  toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
1493  AccessShareLock);
1494 
1495  /* rename the toast table ... */
1496  snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1497  OIDOldHeap);
1498  RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1499  NewToastName, true, false);
1500 
1501  /* ... and its valid index too. */
1502  snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
1503  OIDOldHeap);
1504 
1505  RenameRelationInternal(toastidx,
1506  NewToastName, true, true);
1507  }
1508  relation_close(newrel, NoLock);
1509  }
1510 
1511  /* if it's not a catalog table, clear any missing attribute settings */
1512  if (!is_system_catalog)
1513  {
1514  Relation newrel;
1515 
1516  newrel = table_open(OIDOldHeap, NoLock);
1517  RelationClearMissing(newrel);
1518  relation_close(newrel, NoLock);
1519  }
1520 }
1521 
1522 
1523 /*
1524  * Get a list of tables that the current user owns and
1525  * have indisclustered set. Return the list in a List * of RelToCluster
1526  * with the tableOid and the indexOid on which the table is already
1527  * clustered.
1528  */
1529 static List *
1531 {
1532  Relation indRelation;
1533  TableScanDesc scan;
1534  ScanKeyData entry;
1535  HeapTuple indexTuple;
1537  MemoryContext old_context;
1538  RelToCluster *rvtc;
1539  List *rvs = NIL;
1540 
1541  /*
1542  * Get all indexes that have indisclustered set and are owned by
1543  * appropriate user. System relations or nailed-in relations cannot ever
1544  * have indisclustered set, because CLUSTER will refuse to set it when
1545  * called with one of them as argument.
1546  */
1547  indRelation = table_open(IndexRelationId, AccessShareLock);
1548  ScanKeyInit(&entry,
1549  Anum_pg_index_indisclustered,
1550  BTEqualStrategyNumber, F_BOOLEQ,
1551  BoolGetDatum(true));
1552  scan = table_beginscan_catalog(indRelation, 1, &entry);
1553  while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1554  {
1555  index = (Form_pg_index) GETSTRUCT(indexTuple);
1556 
1557  if (!pg_class_ownercheck(index->indrelid, GetUserId()))
1558  continue;
1559 
1560  /*
1561  * We have to build the list in a different memory context so it will
1562  * survive the cross-transaction processing
1563  */
1564  old_context = MemoryContextSwitchTo(cluster_context);
1565 
1566  rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1567  rvtc->tableOid = index->indrelid;
1568  rvtc->indexOid = index->indexrelid;
1569  rvs = lappend(rvs, rvtc);
1570 
1571  MemoryContextSwitchTo(old_context);
1572  }
1573  table_endscan(scan);
1574 
1575  relation_close(indRelation, AccessShareLock);
1576 
1577  return rvs;
1578 }
#define RelationIsPopulated(relation)
Definition: rel.h:568
#define NIL
Definition: pg_list.h:65
struct IndexAmRoutine * rd_indam
Definition: rel.h:157
void RangeVarCallbackOwnsTable(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg)
Definition: tablecmds.c:14776
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define AllocSetContextCreate
Definition: memutils.h:170
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
bool plan_cluster_use_sort(Oid tableOid, Oid indexOid)
Definition: planner.c:6126
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
bool IsSystemRelation(Relation relation)
Definition: catalog.c:70
void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool is_system_catalog, bool swap_toast_by_content, bool check_constraints, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, char newrelpersistence)
Definition: cluster.c:1341
uint32 TransactionId
Definition: c.h:507
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, struct ScanKeyData *key)
Definition: tableam.c:98
#define RelationGetDescr(relation)
Definition: rel.h:445
int LOCKMODE
Definition: lockdefs.h:26
Oid GetUserId(void)
Definition: miscinit.c:380
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
Definition: pgstat.c:3199
Oid LookupCreationNamespace(const char *nspname)
Definition: namespace.c:2928
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3220
void CommitTransactionCommand(void)
Definition: xact.c:2895
long deleteDependencyRecordsFor(Oid classId, Oid objectId, bool skipExtensionDeps)
Definition: pg_depend.c:190
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
int errcode(int sqlerrcode)
Definition: elog.c:570
#define PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES
Definition: progress.h:52
#define INFO
Definition: elog.h:33
bool heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
Definition: heaptuple.c:359
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:880
uint32 BlockNumber
Definition: block.h:31
void PopActiveSnapshot(void)
Definition: snapmgr.c:814
void recordDependencyOn(const ObjectAddress *depender, const ObjectAddress *referenced, DependencyType behavior)
Definition: pg_depend.c:43
#define REINDEX_REL_SUPPRESS_INDEX_USE
Definition: index.h:137
Form_pg_class rd_rel
Definition: rel.h:83
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
unsigned int Oid
Definition: postgres_ext.h:31
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:306
#define OidIsValid(objectId)
Definition: c.h:638
#define InvokeObjectPostAlterHookArg(classId, objectId, subId, auxiliaryId, is_internal)
Definition: objectaccess.h:166
void cluster_rel(Oid tableOid, Oid indexOid, int options)
Definition: cluster.c:266
Relation try_relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:89
Oid tableOid
Definition: cluster.c:66
signed int int32
Definition: c.h:346
bool IsSystemClass(Oid relid, Form_pg_class reltuple)
Definition: catalog.c:82
struct HeapTupleData * rd_indextuple
Definition: rel.h:145
MemoryContext PortalContext
Definition: mcxt.c:53
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
Definition: type.h:89
#define NAMEDATALEN
char * relname
Definition: primnodes.h:68
Form_pg_index rd_index
Definition: rel.h:143
#define SearchSysCacheExists1(cacheId, key1)
Definition: syscache.h:183
char * indexname
Definition: parsenodes.h:3174
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
Definition: cluster.c:588
char relpersistence
Definition: pg_class.h:78
ItemPointerData t_self
Definition: htup.h:65
#define PROGRESS_CLUSTER_COMMAND_VACUUM_FULL
Definition: progress.h:58
Oid get_relname_relid(const char *relname, Oid relnamespace)
Definition: lsyscache.c:1687
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
#define DEBUG2
Definition: elog.h:24
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3094
#define NoLock
Definition: lockdefs.h:34
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1290
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:735
Oid rd_toastoid
Definition: rel.h:201
#define RowExclusiveLock
Definition: lockdefs.h:38
int errdetail(const char *fmt,...)
Definition: elog.c:860
static MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:147
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition: xact.c:3328
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
Definition: dependency.c:315
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:453
static TransactionId OldestXmin
Definition: vacuumlazy.c:145
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
Definition: namespace.c:228
void cluster(ClusterStmt *stmt, bool isTopLevel)
Definition: cluster.c:103
#define MultiXactIdIsValid(multi)
Definition: multixact.h:27
#define PROGRESS_CLUSTER_COMMAND_CLUSTER
Definition: progress.h:57
void RelationClearMissing(Relation rel)
Definition: heap.c:1997
void CheckTableNotInUse(Relation rel, const char *stmt)
Definition: tablecmds.c:3416
static List * get_tables_to_cluster(MemoryContext cluster_context)
Definition: cluster.c:1530
#define ereport(elevel, rest)
Definition: elog.h:141
#define PROGRESS_CLUSTER_COMMAND
Definition: progress.h:38
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
void TransferPredicateLocksToHeapRelation(Relation relation)
Definition: predicate.c:3097
List * lappend(List *list, void *datum)
Definition: list.c:322
static int verbose
Definition: pg_basebackup.c:90
void CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup, CatalogIndexState indstate)
Definition: indexing.c:245
#define RelationIsMapped(relation)
Definition: rel.h:468
FormData_pg_index * Form_pg_index
Definition: pg_index.h:66
void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
Definition: cluster.c:505
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1124
float float4
Definition: c.h:490
static int elevel
Definition: vacuumlazy.c:143
void pgstat_progress_end_command(void)
Definition: pgstat.c:3271
Oid RelationMapOidToFilenode(Oid relationId, bool shared)
Definition: relmapper.c:159
uintptr_t Datum
Definition: postgres.h:367
void CommandCounterIncrement(void)
Definition: xact.c:1003
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1172
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1385
#define InvalidMultiXactId
Definition: multixact.h:23
bool amclusterable
Definition: amapi.h:193
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:198
static void swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, bool swap_toast_by_content, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, Oid *mapped_tables)
Definition: cluster.c:1002
#define BoolGetDatum(X)
Definition: postgres.h:402
#define InvalidOid
Definition: postgres_ext.h:36
void RelationCloseSmgrByOid(Oid relationId)
Definition: relcache.c:2882
TransactionId MultiXactId
Definition: c.h:517
void CacheInvalidateCatalog(Oid catalogId)
Definition: inval.c:1246
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define REINDEX_REL_FORCE_INDEXES_UNLOGGED
Definition: index.h:139
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:206
#define Assert(condition)
Definition: c.h:732
#define lfirst(lc)
Definition: pg_list.h:190
void RelationMapRemoveMapping(Oid relationId)
Definition: relmapper.c:373
Oid heap_create_with_catalog(const char *relname, Oid relnamespace, Oid reltablespace, Oid relid, Oid reltypeid, Oid reloftypeid, Oid ownerid, Oid accessmtd, TupleDesc tupdesc, List *cooked_constraints, char relkind, char relpersistence, bool shared_relation, bool mapped_relation, OnCommitAction oncommit, Datum reloptions, bool use_user_acl, bool allow_system_table_mods, bool is_internal, Oid relrewrite, ObjectAddress *typaddress)
Definition: heap.c:1065
#define RELATION_IS_OTHER_TEMP(relation)
Definition: rel.h:549
bool pg_class_ownercheck(Oid class_oid, Oid roleid)
Definition: aclchk.c:4755
void StartTransactionCommand(void)
Definition: xact.c:2794
CatalogIndexState CatalogOpenIndexes(Relation heapRel)
Definition: indexing.c:42
void CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
Definition: indexing.c:224
#define REINDEX_REL_CHECK_CONSTRAINTS
Definition: index.h:138
#define PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP
Definition: progress.h:54
static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti)
Definition: cluster.c:767
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3142
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4348
Oid indexOid
Definition: cluster.c:67
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:152
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:831
FormData_pg_class * Form_pg_class
Definition: pg_class.h:150
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:174
#define AccessExclusiveLock
Definition: lockdefs.h:45
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:784
void check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
Definition: cluster.c:445
#define elog(elevel,...)
Definition: elog.h:226
int i
#define NameStr(name)
Definition: c.h:609
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition: scankey.c:76
#define REINDEX_REL_FORCE_INDEXES_PERMANENT
Definition: index.h:140
void CatalogCloseIndexes(CatalogIndexState indstate)
Definition: indexing.c:60
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
void CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
Definition: inval.c:1306
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:108
#define PROGRESS_CLUSTER_PHASE_REBUILD_INDEX
Definition: progress.h:53
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:123
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
RangeVar * relation
Definition: parsenodes.h:3173
void RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
Definition: tablecmds.c:3314
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:39
bool reindex_relation(Oid relid, int flags, int options)
Definition: index.c:3580
Definition: pg_list.h:50
#define snprintf
Definition: port.h:192
#define RelationGetRelid(relation)
Definition: rel.h:419
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition: tableam.h:1400
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:126
#define BTEqualStrategyNumber
Definition: stratnum.h:31
Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence, LOCKMODE lockmode)
Definition: cluster.c:641
#define lfirst_oid(lc)
Definition: pg_list.h:192
void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, bool immediate)
Definition: relmapper.c:261
Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock)
#define PERFORM_DELETION_INTERNAL
Definition: dependency.h:134
#define RelationGetNamespace(relation)
Definition: rel.h:460
void NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode)
Definition: toasting.c:66