PostgreSQL Source Code git master
Loading...
Searching...
No Matches
cluster.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * cluster.c
4 * REPACK a table; formerly known as CLUSTER. VACUUM FULL also uses
5 * parts of this code.
6 *
7 *
8 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
9 * Portions Copyright (c) 1994-5, Regents of the University of California
10 *
11 *
12 * IDENTIFICATION
13 * src/backend/commands/cluster.c
14 *
15 *-------------------------------------------------------------------------
16 */
17#include "postgres.h"
18
19#include "access/amapi.h"
20#include "access/heapam.h"
21#include "access/multixact.h"
22#include "access/relscan.h"
23#include "access/tableam.h"
25#include "access/transam.h"
26#include "access/xact.h"
27#include "catalog/catalog.h"
28#include "catalog/dependency.h"
29#include "catalog/heap.h"
30#include "catalog/index.h"
31#include "catalog/namespace.h"
33#include "catalog/pg_am.h"
34#include "catalog/pg_inherits.h"
35#include "catalog/toasting.h"
36#include "commands/cluster.h"
37#include "commands/defrem.h"
38#include "commands/progress.h"
39#include "commands/tablecmds.h"
40#include "commands/vacuum.h"
41#include "miscadmin.h"
42#include "optimizer/optimizer.h"
43#include "pgstat.h"
44#include "storage/bufmgr.h"
45#include "storage/lmgr.h"
46#include "storage/predicate.h"
47#include "utils/acl.h"
48#include "utils/fmgroids.h"
49#include "utils/guc.h"
50#include "utils/inval.h"
51#include "utils/lsyscache.h"
52#include "utils/memutils.h"
53#include "utils/pg_rusage.h"
54#include "utils/relmapper.h"
55#include "utils/snapmgr.h"
56#include "utils/syscache.h"
57
58/*
59 * This struct is used to pass around the information on tables to be
60 * clustered. We need this so we can make a list of them when invoked without
61 * a specific table/index pair.
62 */
63typedef struct
64{
68
70 Oid indexOid, Oid userid, int options);
73 bool verbose, bool *pSwapToastByContent,
75static List *get_tables_to_repack(RepackCommand cmd, bool usingindex,
78 Oid relid, bool rel_is_index,
81 Oid relid, Oid userid);
83 ClusterParams *params);
84static Oid determine_clustered_index(Relation rel, bool usingindex,
85 const char *indexname);
86static const char *RepackCommandAsString(RepackCommand cmd);
87
88
89/*
90 * The repack code allows for processing multiple tables at once. Because
91 * of this, we cannot just run everything on a single transaction, or we
92 * would be forced to acquire exclusive locks on all the tables being
93 * clustered, simultaneously --- very likely leading to deadlock.
94 *
95 * To solve this we follow a similar strategy to VACUUM code, processing each
96 * relation in a separate transaction. For this to work, we need to:
97 *
98 * - provide a separate memory context so that we can pass information in
99 * a way that survives across transactions
100 * - start a new transaction every time a new relation is clustered
101 * - check for validity of the information on to-be-clustered relations,
102 * as someone might have deleted a relation behind our back, or
103 * clustered one on a different index
104 * - end the transaction
105 *
106 * The single-relation case does not have any such overhead.
107 *
108 * We also allow a relation to be repacked following an index, but without
109 * naming a specific one. In that case, the indisclustered bit will be
110 * looked up, and an ERROR will be thrown if no so-marked index is found.
111 */
112void
114{
115 ClusterParams params = {0};
116 Relation rel = NULL;
118 List *rtcs;
119
120 /* Parse option list */
121 foreach_node(DefElem, opt, stmt->params)
122 {
123 if (strcmp(opt->defname, "verbose") == 0)
124 params.options |= defGetBoolean(opt) ? CLUOPT_VERBOSE : 0;
125 else if (strcmp(opt->defname, "analyze") == 0 ||
126 strcmp(opt->defname, "analyse") == 0)
127 params.options |= defGetBoolean(opt) ? CLUOPT_ANALYZE : 0;
128 else
131 errmsg("unrecognized %s option \"%s\"",
132 RepackCommandAsString(stmt->command),
133 opt->defname),
134 parser_errposition(pstate, opt->location));
135 }
136
137 /*
138 * If a single relation is specified, process it and we're done ... unless
139 * the relation is a partitioned table, in which case we fall through.
140 */
141 if (stmt->relation != NULL)
142 {
143 rel = process_single_relation(stmt, &params);
144 if (rel == NULL)
145 return; /* all done */
146 }
147
148 /*
149 * Don't allow ANALYZE in the multiple-relation case for now. Maybe we
150 * can add support for this later.
151 */
152 if (params.options & CLUOPT_ANALYZE)
155 errmsg("cannot execute %s on multiple tables",
156 "REPACK (ANALYZE)"));
157
158 /*
159 * By here, we know we are in a multi-table situation. In order to avoid
160 * holding locks for too long, we want to process each table in its own
161 * transaction. This forces us to disallow running inside a user
162 * transaction block.
163 */
165
166 /* Also, we need a memory context to hold our list of relations */
168 "Repack",
170
171 params.options |= CLUOPT_RECHECK;
172
173 /*
174 * If we don't have a relation yet, determine a relation list. If we do,
175 * then it must be a partitioned table, and we want to process its
176 * partitions.
177 */
178 if (rel == NULL)
179 {
180 Assert(stmt->indexname == NULL);
181 rtcs = get_tables_to_repack(stmt->command, stmt->usingindex,
184 }
185 else
186 {
187 Oid relid;
188 bool rel_is_index;
189
190 Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
191
192 /*
193 * If USING INDEX was specified, resolve the index name now and pass
194 * it down.
195 */
196 if (stmt->usingindex)
197 {
198 /*
199 * If no index name was specified when repacking a partitioned
200 * table, punt for now. Maybe we can improve this later.
201 */
202 if (!stmt->indexname)
203 {
204 if (stmt->command == REPACK_COMMAND_CLUSTER)
207 errmsg("there is no previously clustered index for table \"%s\"",
209 else
212 /*- translator: first %s is name of a SQL command, eg. REPACK */
213 errmsg("cannot execute %s on partitioned table \"%s\" USING INDEX with no index name",
214 RepackCommandAsString(stmt->command),
216 }
217
218 relid = determine_clustered_index(rel, stmt->usingindex,
219 stmt->indexname);
220 if (!OidIsValid(relid))
221 elog(ERROR, "unable to determine index to cluster on");
223
224 rel_is_index = true;
225 }
226 else
227 {
228 relid = RelationGetRelid(rel);
229 rel_is_index = false;
230 }
231
233 relid, rel_is_index,
235
236 /* close parent relation, releasing lock on it */
238 rel = NULL;
239 }
240
241 /* Commit to get out of starting transaction */
244
245 /* Cluster the tables, each in a separate transaction */
246 Assert(rel == NULL);
248 {
249 /* Start a new transaction for each relation. */
251
252 /*
253 * Open the target table, coping with the case where it has been
254 * dropped.
255 */
256 rel = try_table_open(rtc->tableOid, AccessExclusiveLock);
257 if (rel == NULL)
258 {
260 continue;
261 }
262
263 /* functions in indexes may want a snapshot set */
265
266 /* Process this table */
267 cluster_rel(stmt->command, rel, rtc->indexOid, &params);
268 /* cluster_rel closes the relation, but keeps lock */
269
272 }
273
274 /* Start a new transaction for the cleanup work. */
276
277 /* Clean up working storage */
279}
280
281/*
282 * cluster_rel
283 *
284 * This clusters the table by creating a new, clustered table and
285 * swapping the relfilenumbers of the new table and the old table, so
286 * the OID of the original table is preserved. Thus we do not lose
287 * GRANT, inheritance nor references to this table.
288 *
289 * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
290 * the new table, it's better to create the indexes afterwards than to fill
291 * them incrementally while we load the table.
292 *
293 * If indexOid is InvalidOid, the table will be rewritten in physical order
294 * instead of index order.
295 *
296 * 'cmd' indicates which command is being executed, to be used for error
297 * messages.
298 */
299void
301 ClusterParams *params)
302{
303 Oid tableOid = RelationGetRelid(OldHeap);
304 Oid save_userid;
305 int save_sec_context;
306 int save_nestlevel;
307 bool verbose = ((params->options & CLUOPT_VERBOSE) != 0);
308 bool recheck = ((params->options & CLUOPT_RECHECK) != 0);
310
312
313 /* Check for user-requested abort. */
315
318
319 /*
320 * Switch to the table owner's userid, so that any index functions are run
321 * as that user. Also lock down security-restricted operations and
322 * arrange to make GUC variable changes local to this command.
323 */
324 GetUserIdAndSecContext(&save_userid, &save_sec_context);
325 SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
326 save_sec_context | SECURITY_RESTRICTED_OPERATION);
327 save_nestlevel = NewGUCNestLevel();
329
330 /*
331 * Since we may open a new transaction for each relation, we have to check
332 * that the relation still is what we think it is.
333 *
334 * If this is a single-transaction CLUSTER, we can skip these tests. We
335 * *must* skip the one on indisclustered since it would reject an attempt
336 * to cluster a not-previously-clustered index.
337 */
338 if (recheck &&
339 !cluster_rel_recheck(cmd, OldHeap, indexOid, save_userid,
340 params->options))
341 goto out;
342
343 /*
344 * We allow repacking shared catalogs only when not using an index. It
345 * would work to use an index in most respects, but the index would only
346 * get marked as indisclustered in the current database, leading to
347 * unexpected behavior if CLUSTER were later invoked in another database.
348 */
349 if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
352 /*- translator: first %s is name of a SQL command, eg. REPACK */
353 errmsg("cannot execute %s on a shared catalog",
355
356 /*
357 * Don't process temp tables of other backends ... their local buffer
358 * manager is not going to cope.
359 */
363 /*- translator: first %s is name of a SQL command, eg. REPACK */
364 errmsg("cannot execute %s on temporary tables of other sessions",
366
367 /*
368 * Also check for active uses of the relation in the current transaction,
369 * including open scans and pending AFTER trigger events.
370 */
372
373 /* Check heap and index are valid to cluster on */
374 if (OidIsValid(indexOid))
375 {
376 /* verify the index is good and lock it */
378 /* also open it */
379 index = index_open(indexOid, NoLock);
380 }
381 else
382 index = NULL;
383
384 /*
385 * When allow_system_table_mods is turned off, we disallow repacking a
386 * catalog on a particular index unless that's already the clustered index
387 * for that catalog.
388 *
389 * XXX We don't check for this in CLUSTER, because it's historically been
390 * allowed.
391 */
392 if (cmd != REPACK_COMMAND_CLUSTER &&
393 !allowSystemTableMods && OidIsValid(indexOid) &&
394 IsCatalogRelation(OldHeap) && !index->rd_index->indisclustered)
397 errmsg("permission denied: \"%s\" is a system catalog",
399 errdetail("System catalogs can only be clustered by the index they're already clustered on, if any, unless \"%s\" is enabled.",
400 "allow_system_table_mods"));
401
402 /*
403 * Quietly ignore the request if this is a materialized view which has not
404 * been populated from its query. No harm is done because there is no data
405 * to deal with, and we don't want to throw an error if this is part of a
406 * multi-relation request -- for example, CLUSTER was run on the entire
407 * database.
408 */
409 if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
411 {
413 goto out;
414 }
415
416 Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
417 OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
418 OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
419
420 /*
421 * All predicate locks on the tuples or pages are about to be made
422 * invalid, because we move tuples around. Promote them to relation
423 * locks. Predicate locks on indexes will be promoted when they are
424 * reindexed.
425 */
427
428 /* rebuild_relation does all the dirty work */
430 /* rebuild_relation closes OldHeap, and index if valid */
431
432out:
433 /* Roll back any GUC changes executed by index functions */
434 AtEOXact_GUC(false, save_nestlevel);
435
436 /* Restore userid and security context */
437 SetUserIdAndSecContext(save_userid, save_sec_context);
438
440}
441
442/*
443 * Check if the table (and its index) still meets the requirements of
444 * cluster_rel().
445 */
446static bool
448 Oid userid, int options)
449{
450 Oid tableOid = RelationGetRelid(OldHeap);
451
452 /* Check that the user still has privileges for the relation */
453 if (!repack_is_permitted_for_relation(cmd, tableOid, userid))
454 {
456 return false;
457 }
458
459 /*
460 * Silently skip a temp table for a remote session. Only doing this check
461 * in the "recheck" case is appropriate (which currently means somebody is
462 * executing a database-wide CLUSTER or on a partitioned table), because
463 * there is another check in cluster() which will stop any attempt to
464 * cluster remote temp tables by name. There is another check in
465 * cluster_rel which is redundant, but we leave it for extra safety.
466 */
468 {
470 return false;
471 }
472
473 if (OidIsValid(indexOid))
474 {
475 /*
476 * Check that the index still exists
477 */
479 {
481 return false;
482 }
483
484 /*
485 * Check that the index is still the one with indisclustered set, if
486 * needed.
487 */
488 if ((options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
489 !get_index_isclustered(indexOid))
490 {
492 return false;
493 }
494 }
495
496 return true;
497}
498
499/*
500 * Verify that the specified heap and index are valid to cluster on
501 *
502 * Side effect: obtains lock on the index. The caller may
503 * in some cases already have AccessExclusiveLock on the table, but
504 * not in all cases so we can't rely on the table-level lock for
505 * protection here.
506 */
507void
509{
511
512 OldIndex = index_open(indexOid, lockmode);
513
514 /*
515 * Check that index is in fact an index on the given relation
516 */
517 if (OldIndex->rd_index == NULL ||
518 OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
521 errmsg("\"%s\" is not an index for table \"%s\"",
524
525 /* Index AM must allow clustering */
526 if (!OldIndex->rd_indam->amclusterable)
529 errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
531
532 /*
533 * Disallow clustering on incomplete indexes (those that might not index
534 * every row of the relation). We could relax this by making a separate
535 * seqscan pass over the table to copy the missing rows, but that seems
536 * expensive and tedious.
537 */
538 if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
541 errmsg("cannot cluster on partial index \"%s\"",
543
544 /*
545 * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
546 * it might well not contain entries for every heap row, or might not even
547 * be internally consistent. (But note that we don't check indcheckxmin;
548 * the worst consequence of following broken HOT chains would be that we
549 * might put recently-dead tuples out-of-order in the new table, and there
550 * is little harm in that.)
551 */
552 if (!OldIndex->rd_index->indisvalid)
555 errmsg("cannot cluster on invalid index \"%s\"",
557
558 /* Drop relcache refcnt on OldIndex, but keep lock */
560}
561
562/*
563 * mark_index_clustered: mark the specified index as the one clustered on
564 *
565 * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
566 */
567void
568mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
569{
574
575 Assert(rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE);
576
577 /*
578 * If the index is already marked clustered, no need to do anything.
579 */
580 if (OidIsValid(indexOid))
581 {
582 if (get_index_isclustered(indexOid))
583 return;
584 }
585
586 /*
587 * Check each index of the relation and set/clear the bit as needed.
588 */
590
591 foreach(index, RelationGetIndexList(rel))
592 {
594
598 elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
600
601 /*
602 * Unset the bit if set. We know it's wrong because we checked this
603 * earlier.
604 */
605 if (indexForm->indisclustered)
606 {
607 indexForm->indisclustered = false;
609 }
610 else if (thisIndexOid == indexOid)
611 {
612 /* this was checked earlier, but let's be real sure */
613 if (!indexForm->indisvalid)
614 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
615 indexForm->indisclustered = true;
617 }
618
620 InvalidOid, is_internal);
621
623 }
624
626}
627
628/*
629 * rebuild_relation: rebuild an existing relation in index or physical order
630 *
631 * OldHeap: table to rebuild.
632 * index: index to cluster by, or NULL to rewrite in physical order.
633 *
634 * On entry, heap and index (if one is given) must be open, and
635 * AccessExclusiveLock held on them.
636 * On exit, they are closed, but locks on them are not released.
637 */
638static void
640{
641 Oid tableOid = RelationGetRelid(OldHeap);
642 Oid accessMethod = OldHeap->rd_rel->relam;
643 Oid tableSpace = OldHeap->rd_rel->reltablespace;
646 char relpersistence;
651
654
655 /* for CLUSTER or REPACK USING INDEX, mark the index as the one to use */
656 if (index != NULL)
658
659 /* Remember info about rel before closing OldHeap */
660 relpersistence = OldHeap->rd_rel->relpersistence;
662
663 /*
664 * Create the transient table that will receive the re-ordered data.
665 *
666 * OldHeap is already locked, so no need to lock it again. make_new_heap
667 * obtains AccessExclusiveLock on the new heap and its toast table.
668 */
669 OIDNewHeap = make_new_heap(tableOid, tableSpace,
670 accessMethod,
671 relpersistence,
672 NoLock);
675
676 /* Copy the heap data into the new table in the desired order */
679
680
681 /* Close relcache entries, but keep lock until transaction commit */
683 if (index)
685
686 /*
687 * Close the new relation so it can be dropped as soon as the storage is
688 * swapped. The relation is not visible to others, so no need to unlock it
689 * explicitly.
690 */
692
693 /*
694 * Swap the physical files of the target and transient tables, then
695 * rebuild the target's indexes and throw away the transient table.
696 */
698 swap_toast_by_content, false, true,
700 relpersistence);
701}
702
703
704/*
705 * Create the transient table that will be filled with new data during
706 * CLUSTER, ALTER TABLE, and similar operations. The transient table
707 * duplicates the logical structure of the OldHeap; but will have the
708 * specified physical storage properties NewTableSpace, NewAccessMethod, and
709 * relpersistence.
710 *
711 * After this, the caller should load the new heap with transferred/modified
712 * data, then call finish_heap_swap to complete the operation.
713 */
714Oid
716 char relpersistence, LOCKMODE lockmode)
717{
721 Oid toastid;
723 HeapTuple tuple;
724 Datum reloptions;
725 bool isNull;
727
728 OldHeap = table_open(OIDOldHeap, lockmode);
730
731 /*
732 * Note that the NewHeap will not receive any of the defaults or
733 * constraints associated with the OldHeap; we don't need 'em, and there's
734 * no reason to spend cycles inserting them into the catalogs only to
735 * delete them.
736 */
737
738 /*
739 * But we do want to use reloptions of the old heap for new heap.
740 */
742 if (!HeapTupleIsValid(tuple))
743 elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
745 &isNull);
746 if (isNull)
747 reloptions = (Datum) 0;
748
749 if (relpersistence == RELPERSISTENCE_TEMP)
751 else
753
754 /*
755 * Create the new heap, using a temporary name in the same namespace as
756 * the existing table. NOTE: there is some risk of collision with user
757 * relnames. Working around this seems more trouble than it's worth; in
758 * particular, we can't create the new heap in a different namespace from
759 * the old, or we will have problems with the TEMP status of temp tables.
760 *
761 * Note: the new heap is not a shared relation, even if we are rebuilding
762 * a shared rel. However, we do make the new heap mapped if the source is
763 * mapped. This simplifies swap_relation_files, and is absolutely
764 * necessary for rebuilding pg_class, for reasons explained there.
765 */
766 snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
767
774 OldHeap->rd_rel->relowner,
777 NIL,
779 relpersistence,
780 false,
783 reloptions,
784 false,
785 true,
786 true,
788 NULL);
790
791 ReleaseSysCache(tuple);
792
793 /*
794 * Advance command counter so that the newly-created relation's catalog
795 * tuples will be visible to table_open.
796 */
798
799 /*
800 * If necessary, create a TOAST table for the new relation.
801 *
802 * If the relation doesn't have a TOAST table already, we can't need one
803 * for the new relation. The other way around is possible though: if some
804 * wide columns have been dropped, NewHeapCreateToastTable can decide that
805 * no TOAST table is needed for the new table.
806 *
807 * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
808 * that the TOAST table will be visible for insertion.
809 */
810 toastid = OldHeap->rd_rel->reltoastrelid;
811 if (OidIsValid(toastid))
812 {
813 /* keep the existing toast table's reloptions, if any */
815 if (!HeapTupleIsValid(tuple))
816 elog(ERROR, "cache lookup failed for relation %u", toastid);
818 &isNull);
819 if (isNull)
820 reloptions = (Datum) 0;
821
822 NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
823
824 ReleaseSysCache(tuple);
825 }
826
828
829 return OIDNewHeap;
830}
831
832/*
833 * Do the physical copying of table data.
834 *
835 * There are three output parameters:
836 * *pSwapToastByContent is set true if toast tables must be swapped by content.
837 * *pFreezeXid receives the TransactionId used as freeze cutoff point.
838 * *pCutoffMulti receives the MultiXactId used as a cutoff point.
839 */
840static void
844{
850 VacuumParams params;
851 struct VacuumCutoffs cutoffs;
852 bool use_sort;
853 double num_tuples = 0,
854 tups_vacuumed = 0,
856 BlockNumber num_pages;
857 int elevel = verbose ? INFO : DEBUG2;
859 char *nspname;
860
862
863 /* Store a copy of the namespace name for logging purposes */
865
866 /*
867 * Their tuple descriptors should be exactly alike, but here we only need
868 * assume that they have the same number of columns.
869 */
872 Assert(newTupDesc->natts == oldTupDesc->natts);
873
874 /*
875 * If the OldHeap has a toast table, get lock on the toast table to keep
876 * it from being vacuumed. This is needed because autovacuum processes
877 * toast tables independently of their main tables, with no lock on the
878 * latter. If an autovacuum were to start on the toast table after we
879 * compute our OldestXmin below, it would use a later OldestXmin, and then
880 * possibly remove as DEAD toast tuples belonging to main tuples we think
881 * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
882 * tuples.
883 *
884 * We don't need to open the toast relation here, just lock it. The lock
885 * will be held till end of transaction.
886 */
887 if (OldHeap->rd_rel->reltoastrelid)
888 LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
889
890 /*
891 * If both tables have TOAST tables, perform toast swap by content. It is
892 * possible that the old table has a toast table but the new one doesn't,
893 * if toastable columns have been dropped. In that case we have to do
894 * swap by links. This is okay because swap by content is only essential
895 * for system catalogs, and we don't support schema changes for them.
896 */
897 if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
898 {
899 *pSwapToastByContent = true;
900
901 /*
902 * When doing swap by content, any toast pointers written into NewHeap
903 * must use the old toast table's OID, because that's where the toast
904 * data will eventually be found. Set this up by setting rd_toastoid.
905 * This also tells toast_save_datum() to preserve the toast value
906 * OIDs, which we want so as not to invalidate toast pointers in
907 * system catalog caches, and to avoid making multiple copies of a
908 * single toast value.
909 *
910 * Note that we must hold NewHeap open until we are done writing data,
911 * since the relcache will not guarantee to remember this setting once
912 * the relation is closed. Also, this technique depends on the fact
913 * that no one will try to read from the NewHeap until after we've
914 * finished writing it and swapping the rels --- otherwise they could
915 * follow the toast pointers to the wrong place. (It would actually
916 * work for values copied over from the old toast table, but not for
917 * any values that we toast which were previously not toasted.)
918 */
919 NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
920 }
921 else
922 *pSwapToastByContent = false;
923
924 /*
925 * Compute xids used to freeze and weed out dead tuples and multixacts.
926 * Since we're going to rewrite the whole table anyway, there's no reason
927 * not to be aggressive about this.
928 */
929 memset(&params, 0, sizeof(VacuumParams));
930 vacuum_get_cutoffs(OldHeap, params, &cutoffs);
931
932 /*
933 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
934 * backwards, so take the max.
935 */
936 {
937 TransactionId relfrozenxid = OldHeap->rd_rel->relfrozenxid;
938
941 cutoffs.FreezeLimit = relfrozenxid;
942 }
943
944 /*
945 * MultiXactCutoff, similarly, shouldn't go backwards either.
946 */
947 {
948 MultiXactId relminmxid = OldHeap->rd_rel->relminmxid;
949
952 cutoffs.MultiXactCutoff = relminmxid;
953 }
954
955 /*
956 * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
957 * the OldHeap. We know how to use a sort to duplicate the ordering of a
958 * btree index, and will use seqscan-and-sort for that case if the planner
959 * tells us it's cheaper. Otherwise, always indexscan if an index is
960 * provided, else plain seqscan.
961 */
962 if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
965 else
966 use_sort = false;
967
968 /* Log what we're doing */
969 if (OldIndex != NULL && !use_sort)
970 ereport(elevel,
971 errmsg("repacking \"%s.%s\" using index scan on \"%s\"",
972 nspname,
975 else if (use_sort)
976 ereport(elevel,
977 errmsg("repacking \"%s.%s\" using sequential scan and sort",
978 nspname,
980 else
981 ereport(elevel,
982 errmsg("repacking \"%s.%s\" in physical order",
983 nspname,
985
986 /*
987 * Hand off the actual copying to AM specific function, the generic code
988 * cannot know how to deal with visibility across AMs. Note that this
989 * routine is allowed to set FreezeXid / MultiXactCutoff to different
990 * values (e.g. because the AM doesn't use freezing).
991 */
993 cutoffs.OldestXmin, &cutoffs.FreezeLimit,
994 &cutoffs.MultiXactCutoff,
995 &num_tuples, &tups_vacuumed,
997
998 /* return selected values to caller, get set as relfrozenxid/minmxid */
999 *pFreezeXid = cutoffs.FreezeLimit;
1000 *pCutoffMulti = cutoffs.MultiXactCutoff;
1001
1002 /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
1003 NewHeap->rd_toastoid = InvalidOid;
1004
1006
1007 /* Log what we did */
1008 ereport(elevel,
1009 (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
1010 nspname,
1012 tups_vacuumed, num_tuples,
1014 errdetail("%.0f dead row versions cannot be removed yet.\n"
1015 "%s.",
1017 pg_rusage_show(&ru0))));
1018
1019 /* Update pg_class to reflect the correct values of pages and tuples. */
1021
1025 elog(ERROR, "cache lookup failed for relation %u",
1028
1029 relform->relpages = num_pages;
1030 relform->reltuples = num_tuples;
1031
1032 /* Don't update the stats for pg_class. See swap_relation_files. */
1035 else
1037
1038 /* Clean up. */
1041
1042 /* Make the update visible */
1044}
1045
1046/*
1047 * Swap the physical files of two given relations.
1048 *
1049 * We swap the physical identity (reltablespace, relfilenumber) while keeping
1050 * the same logical identities of the two relations. relpersistence is also
1051 * swapped, which is critical since it determines where buffers live for each
1052 * relation.
1053 *
1054 * We can swap associated TOAST data in either of two ways: recursively swap
1055 * the physical content of the toast tables (and their indexes), or swap the
1056 * TOAST links in the given relations' pg_class entries. The former is needed
1057 * to manage rewrites of shared catalogs (where we cannot change the pg_class
1058 * links) while the latter is the only way to handle cases in which a toast
1059 * table is added or removed altogether.
1060 *
1061 * Additionally, the first relation is marked with relfrozenxid set to
1062 * frozenXid. It seems a bit ugly to have this here, but the caller would
1063 * have to do it anyway, so having it here saves a heap_update. Note: in
1064 * the swap-toast-links case, we assume we don't need to change the toast
1065 * table's relfrozenxid: the new version of the toast table should already
1066 * have relfrozenxid set to RecentXmin, which is good enough.
1067 *
1068 * Lastly, if r2 and its toast table and toast index (if any) are mapped,
1069 * their OIDs are emitted into mapped_tables[]. This is hacky but beats
1070 * having to look the information up again later in finish_heap_swap.
1071 */
1072static void
1075 bool is_internal,
1079{
1082 reltup2;
1084 relform2;
1088 char swptmpchr;
1089 Oid relam1,
1090 relam2;
1091
1092 /* We need writable copies of both pg_class tuples. */
1094
1097 elog(ERROR, "cache lookup failed for relation %u", r1);
1099
1102 elog(ERROR, "cache lookup failed for relation %u", r2);
1104
1105 relfilenumber1 = relform1->relfilenode;
1106 relfilenumber2 = relform2->relfilenode;
1107 relam1 = relform1->relam;
1108 relam2 = relform2->relam;
1109
1112 {
1113 /*
1114 * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
1115 * relpersistence
1116 */
1118
1119 swaptemp = relform1->relfilenode;
1120 relform1->relfilenode = relform2->relfilenode;
1121 relform2->relfilenode = swaptemp;
1122
1123 swaptemp = relform1->reltablespace;
1124 relform1->reltablespace = relform2->reltablespace;
1125 relform2->reltablespace = swaptemp;
1126
1127 swaptemp = relform1->relam;
1128 relform1->relam = relform2->relam;
1129 relform2->relam = swaptemp;
1130
1131 swptmpchr = relform1->relpersistence;
1132 relform1->relpersistence = relform2->relpersistence;
1133 relform2->relpersistence = swptmpchr;
1134
1135 /* Also swap toast links, if we're swapping by links */
1137 {
1138 swaptemp = relform1->reltoastrelid;
1139 relform1->reltoastrelid = relform2->reltoastrelid;
1140 relform2->reltoastrelid = swaptemp;
1141 }
1142 }
1143 else
1144 {
1145 /*
1146 * Mapped-relation case. Here we have to swap the relation mappings
1147 * instead of modifying the pg_class columns. Both must be mapped.
1148 */
1151 elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1152 NameStr(relform1->relname));
1153
1154 /*
1155 * We can't change the tablespace nor persistence of a mapped rel, and
1156 * we can't handle toast link swapping for one either, because we must
1157 * not apply any critical changes to its pg_class row. These cases
1158 * should be prevented by upstream permissions tests, so these checks
1159 * are non-user-facing emergency backstop.
1160 */
1161 if (relform1->reltablespace != relform2->reltablespace)
1162 elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1163 NameStr(relform1->relname));
1164 if (relform1->relpersistence != relform2->relpersistence)
1165 elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1166 NameStr(relform1->relname));
1167 if (relform1->relam != relform2->relam)
1168 elog(ERROR, "cannot change access method of mapped relation \"%s\"",
1169 NameStr(relform1->relname));
1170 if (!swap_toast_by_content &&
1171 (relform1->reltoastrelid || relform2->reltoastrelid))
1172 elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1173 NameStr(relform1->relname));
1174
1175 /*
1176 * Fetch the mappings --- shouldn't fail, but be paranoid
1177 */
1180 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1181 NameStr(relform1->relname), r1);
1184 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1185 NameStr(relform2->relname), r2);
1186
1187 /*
1188 * Send replacement mappings to relmapper. Note these won't actually
1189 * take effect until CommandCounterIncrement.
1190 */
1191 RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
1192 RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
1193
1194 /* Pass OIDs of mapped r2 tables back to caller */
1195 *mapped_tables++ = r2;
1196 }
1197
1198 /*
1199 * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
1200 * subtransaction. The rel2 storage (swapped from rel1) may or may not be
1201 * new.
1202 */
1203 {
1204 Relation rel1,
1205 rel2;
1206
1209 rel2->rd_createSubid = rel1->rd_createSubid;
1210 rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
1211 rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
1215 }
1216
1217 /*
1218 * In the case of a shared catalog, these next few steps will only affect
1219 * our own database's pg_class row; but that's okay, because they are all
1220 * noncritical updates. That's also an important fact for the case of a
1221 * mapped catalog, because it's possible that we'll commit the map change
1222 * and then fail to commit the pg_class update.
1223 */
1224
1225 /* set rel1's frozen Xid and minimum MultiXid */
1226 if (relform1->relkind != RELKIND_INDEX)
1227 {
1230 relform1->relfrozenxid = frozenXid;
1231 relform1->relminmxid = cutoffMulti;
1232 }
1233
1234 /* swap size statistics too, since new rel has freshly-updated stats */
1235 {
1240
1241 swap_pages = relform1->relpages;
1242 relform1->relpages = relform2->relpages;
1243 relform2->relpages = swap_pages;
1244
1245 swap_tuples = relform1->reltuples;
1246 relform1->reltuples = relform2->reltuples;
1247 relform2->reltuples = swap_tuples;
1248
1249 swap_allvisible = relform1->relallvisible;
1250 relform1->relallvisible = relform2->relallvisible;
1251 relform2->relallvisible = swap_allvisible;
1252
1253 swap_allfrozen = relform1->relallfrozen;
1254 relform1->relallfrozen = relform2->relallfrozen;
1255 relform2->relallfrozen = swap_allfrozen;
1256 }
1257
1258 /*
1259 * Update the tuples in pg_class --- unless the target relation of the
1260 * swap is pg_class itself. In that case, there is zero point in making
1261 * changes because we'd be updating the old data that we're about to throw
1262 * away. Because the real work being done here for a mapped relation is
1263 * just to change the relation map settings, it's all right to not update
1264 * the pg_class rows in this case. The most important changes will instead
1265 * performed later, in finish_heap_swap() itself.
1266 */
1267 if (!target_is_pg_class)
1268 {
1270
1273 indstate);
1275 indstate);
1277 }
1278 else
1279 {
1280 /* no update ... but we do still need relcache inval */
1283 }
1284
1285 /*
1286 * Now that pg_class has been updated with its relevant information for
1287 * the swap, update the dependency of the relations to point to their new
1288 * table AM, if it has changed.
1289 */
1290 if (relam1 != relam2)
1291 {
1293 r1,
1295 relam1,
1296 relam2) != 1)
1297 elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
1299 get_rel_name(r1));
1301 r2,
1303 relam2,
1304 relam1) != 1)
1305 elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
1307 get_rel_name(r2));
1308 }
1309
1310 /*
1311 * Post alter hook for modified relations. The change to r2 is always
1312 * internal, but r1 depends on the invocation context.
1313 */
1315 InvalidOid, is_internal);
1317 InvalidOid, true);
1318
1319 /*
1320 * If we have toast tables associated with the relations being swapped,
1321 * deal with them too.
1322 */
1323 if (relform1->reltoastrelid || relform2->reltoastrelid)
1324 {
1326 {
1327 if (relform1->reltoastrelid && relform2->reltoastrelid)
1328 {
1329 /* Recursively swap the contents of the toast tables */
1330 swap_relation_files(relform1->reltoastrelid,
1331 relform2->reltoastrelid,
1334 is_internal,
1335 frozenXid,
1338 }
1339 else
1340 {
1341 /* caller messed up */
1342 elog(ERROR, "cannot swap toast files by content when there's only one");
1343 }
1344 }
1345 else
1346 {
1347 /*
1348 * We swapped the ownership links, so we need to change dependency
1349 * data to match.
1350 *
1351 * NOTE: it is possible that only one table has a toast table.
1352 *
1353 * NOTE: at present, a TOAST table's only dependency is the one on
1354 * its owning table. If more are ever created, we'd need to use
1355 * something more selective than deleteDependencyRecordsFor() to
1356 * get rid of just the link we want.
1357 */
1360 long count;
1361
1362 /*
1363 * We disallow this case for system catalogs, to avoid the
1364 * possibility that the catalog we're rebuilding is one of the
1365 * ones the dependency changes would change. It's too late to be
1366 * making any data changes to the target catalog.
1367 */
1369 elog(ERROR, "cannot swap toast files by links for system catalogs");
1370
1371 /* Delete old dependencies */
1372 if (relform1->reltoastrelid)
1373 {
1375 relform1->reltoastrelid,
1376 false);
1377 if (count != 1)
1378 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1379 count);
1380 }
1381 if (relform2->reltoastrelid)
1382 {
1384 relform2->reltoastrelid,
1385 false);
1386 if (count != 1)
1387 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1388 count);
1389 }
1390
1391 /* Register new dependencies */
1393 baseobject.objectSubId = 0;
1395 toastobject.objectSubId = 0;
1396
1397 if (relform1->reltoastrelid)
1398 {
1399 baseobject.objectId = r1;
1400 toastobject.objectId = relform1->reltoastrelid;
1403 }
1404
1405 if (relform2->reltoastrelid)
1406 {
1407 baseobject.objectId = r2;
1408 toastobject.objectId = relform2->reltoastrelid;
1411 }
1412 }
1413 }
1414
1415 /*
1416 * If we're swapping two toast tables by content, do the same for their
1417 * valid index. The swap can actually be safely done only if the relations
1418 * have indexes.
1419 */
1421 relform1->relkind == RELKIND_TOASTVALUE &&
1422 relform2->relkind == RELKIND_TOASTVALUE)
1423 {
1426
1427 /* Get valid index for each relation */
1432
1437 is_internal,
1441 }
1442
1443 /* Clean up. */
1446
1448}
1449
1450/*
1451 * Remove the transient table that was built by make_new_heap, and finish
1452 * cleaning up (including rebuilding all indexes on the old heap).
1453 */
1454void
1456 bool is_system_catalog,
1458 bool check_constraints,
1459 bool is_internal,
1462 char newrelpersistence)
1463{
1464 ObjectAddress object;
1465 Oid mapped_tables[4];
1466 int reindex_flags;
1468 int i;
1469
1470 /* Report that we are now swapping relation files */
1473
1474 /* Zero out possible results from swapped_relation_files */
1475 memset(mapped_tables, 0, sizeof(mapped_tables));
1476
1477 /*
1478 * Swap the contents of the heap relations (including any toast tables).
1479 * Also set old heap's relfrozenxid to frozenXid.
1480 */
1483 swap_toast_by_content, is_internal,
1485
1486 /*
1487 * If it's a system catalog, queue a sinval message to flush all catcaches
1488 * on the catalog when we reach CommandCounterIncrement.
1489 */
1492
1493 /*
1494 * Rebuild each index on the relation (but not the toast table, which is
1495 * all-new at this point). It is important to do this before the DROP
1496 * step because if we are processing a system catalog that will be used
1497 * during DROP, we want to have its indexes available. There is no
1498 * advantage to the other order anyway because this is all transactional,
1499 * so no chance to reclaim disk space before commit. We do not need a
1500 * final CommandCounterIncrement() because reindex_relation does it.
1501 *
1502 * Note: because index_build is called via reindex_relation, it will never
1503 * set indcheckxmin true for the indexes. This is OK even though in some
1504 * sense we are building new indexes rather than rebuilding existing ones,
1505 * because the new heap won't contain any HOT chains at all, let alone
1506 * broken ones, so it can't be necessary to set indcheckxmin.
1507 */
1511
1512 /*
1513 * Ensure that the indexes have the same persistence as the parent
1514 * relation.
1515 */
1516 if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1518 else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1520
1521 /* Report that we are now reindexing relations */
1524
1526
1527 /* Report that we are now doing clean up */
1530
1531 /*
1532 * If the relation being rebuilt is pg_class, swap_relation_files()
1533 * couldn't update pg_class's own pg_class entry (check comments in
1534 * swap_relation_files()), thus relfrozenxid was not updated. That's
1535 * annoying because a potential reason for doing a VACUUM FULL is a
1536 * imminent or actual anti-wraparound shutdown. So, now that we can
1537 * access the new relation using its indices, update relfrozenxid.
1538 * pg_class doesn't have a toast relation, so we don't need to update the
1539 * corresponding toast relation. Not that there's little point moving all
1540 * relfrozenxid updates here since swap_relation_files() needs to write to
1541 * pg_class for non-mapped relations anyway.
1542 */
1544 {
1548
1550
1553 elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1555
1556 relform->relfrozenxid = frozenXid;
1557 relform->relminmxid = cutoffMulti;
1558
1560
1562 }
1563
1564 /* Destroy new heap with old filenumber */
1565 object.classId = RelationRelationId;
1566 object.objectId = OIDNewHeap;
1567 object.objectSubId = 0;
1568
1569 /*
1570 * The new relation is local to our transaction and we know nothing
1571 * depends on it, so DROP_RESTRICT should be OK.
1572 */
1574
1575 /* performDeletion does CommandCounterIncrement at end */
1576
1577 /*
1578 * Now we must remove any relation mapping entries that we set up for the
1579 * transient table, as well as its toast table and toast index if any. If
1580 * we fail to do this before commit, the relmapper will complain about new
1581 * permanent map entries being added post-bootstrap.
1582 */
1583 for (i = 0; OidIsValid(mapped_tables[i]); i++)
1585
1586 /*
1587 * At this point, everything is kosher except that, if we did toast swap
1588 * by links, the toast table's name corresponds to the transient table.
1589 * The name is irrelevant to the backend because it's referenced by OID,
1590 * but users looking at the catalogs could be confused. Rename it to
1591 * prevent this problem.
1592 *
1593 * Note no lock required on the relation, because we already hold an
1594 * exclusive lock on it.
1595 */
1597 {
1599
1601 if (OidIsValid(newrel->rd_rel->reltoastrelid))
1602 {
1603 Oid toastidx;
1605
1606 /* Get the associated valid index to be renamed */
1607 toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
1608 NoLock);
1609
1610 /* rename the toast table ... */
1611 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1612 OIDOldHeap);
1613 RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1614 NewToastName, true, false);
1615
1616 /* ... and its valid index too. */
1617 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
1618 OIDOldHeap);
1619
1621 NewToastName, true, true);
1622
1623 /*
1624 * Reset the relrewrite for the toast. The command-counter
1625 * increment is required here as we are about to update the tuple
1626 * that is updated as part of RenameRelationInternal.
1627 */
1629 ResetRelRewrite(newrel->rd_rel->reltoastrelid);
1630 }
1632 }
1633
1634 /* if it's not a catalog table, clear any missing attribute settings */
1635 if (!is_system_catalog)
1636 {
1638
1642 }
1643}
1644
1645/*
1646 * Determine which relations to process, when REPACK/CLUSTER is called
1647 * without specifying a table name. The exact process depends on whether
1648 * USING INDEX was given or not, and in any case we only return tables and
1649 * materialized views that the current user has privileges to repack/cluster.
1650 *
1651 * If USING INDEX was given, we scan pg_index to find those that have
1652 * indisclustered set; if it was not given, scan pg_class and return all
1653 * tables.
1654 *
1655 * Return it as a list of RelToCluster in the given memory context.
1656 */
1657static List *
1659{
1661 TableScanDesc scan;
1662 HeapTuple tuple;
1663 List *rtcs = NIL;
1664
1665 if (usingindex)
1666 {
1667 ScanKeyData entry;
1668
1669 /*
1670 * For USING INDEX, scan pg_index to find those with indisclustered.
1671 */
1673 ScanKeyInit(&entry,
1676 BoolGetDatum(true));
1677 scan = table_beginscan_catalog(catalog, 1, &entry);
1678 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1679 {
1683
1684 index = (Form_pg_index) GETSTRUCT(tuple);
1685
1686 /*
1687 * Try to obtain a light lock on the index's table, to ensure it
1688 * doesn't go away while we collect the list. If we cannot, just
1689 * disregard it. Be sure to release this if we ultimately decide
1690 * not to process the table!
1691 */
1693 continue;
1694
1695 /* Verify that the table still exists; skip if not */
1697 {
1699 continue;
1700 }
1701
1702 /* noisily skip rels which the user can't process */
1703 if (!repack_is_permitted_for_relation(cmd, index->indrelid,
1704 GetUserId()))
1705 {
1707 continue;
1708 }
1709
1710 /* Use a permanent memory context for the result list */
1713 rtc->tableOid = index->indrelid;
1714 rtc->indexOid = index->indexrelid;
1715 rtcs = lappend(rtcs, rtc);
1717 }
1718 }
1719 else
1720 {
1723
1724 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1725 {
1727 Form_pg_class class;
1729
1730 class = (Form_pg_class) GETSTRUCT(tuple);
1731
1732 /*
1733 * Try to obtain a light lock on the table, to ensure it doesn't
1734 * go away while we collect the list. If we cannot, just
1735 * disregard the table. Be sure to release this if we ultimately
1736 * decide not to process the table!
1737 */
1739 continue;
1740
1741 /* Verify that the table still exists */
1743 {
1745 continue;
1746 }
1747
1748 /* Can only process plain tables and matviews */
1749 if (class->relkind != RELKIND_RELATION &&
1750 class->relkind != RELKIND_MATVIEW)
1751 {
1753 continue;
1754 }
1755
1756 /* noisily skip rels which the user can't process */
1758 GetUserId()))
1759 {
1761 continue;
1762 }
1763
1764 /* Use a permanent memory context for the result list */
1767 rtc->tableOid = class->oid;
1768 rtc->indexOid = InvalidOid;
1769 rtcs = lappend(rtcs, rtc);
1771 }
1772 }
1773
1774 table_endscan(scan);
1776
1777 return rtcs;
1778}
1779
1780/*
1781 * Given a partitioned table or its index, return a list of RelToCluster for
1782 * all the leaf child tables/indexes.
1783 *
1784 * 'rel_is_index' tells whether 'relid' is that of an index (true) or of the
1785 * owning relation.
1786 */
1787static List *
1790{
1791 List *inhoids;
1792 List *rtcs = NIL;
1793
1794 /*
1795 * Do not lock the children until they're processed. Note that we do hold
1796 * a lock on the parent partitioned table.
1797 */
1800 {
1801 Oid table_oid,
1802 index_oid;
1805
1806 if (rel_is_index)
1807 {
1808 /* consider only leaf indexes */
1810 continue;
1811
1814 }
1815 else
1816 {
1817 /* consider only leaf relations */
1819 continue;
1820
1823 }
1824
1825 /*
1826 * It's possible that the user does not have privileges to CLUSTER the
1827 * leaf partition despite having them on the partitioned table. Skip
1828 * if so.
1829 */
1831 continue;
1832
1833 /* Use a permanent memory context for the result list */
1836 rtc->tableOid = table_oid;
1837 rtc->indexOid = index_oid;
1838 rtcs = lappend(rtcs, rtc);
1840 }
1841
1842 return rtcs;
1843}
1844
1845
1846/*
1847 * Return whether userid has privileges to REPACK relid. If not, this
1848 * function emits a WARNING.
1849 */
1850static bool
1852{
1854
1855 if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK)
1856 return true;
1857
1859 errmsg("permission denied to execute %s on \"%s\", skipping it",
1861 get_rel_name(relid)));
1862
1863 return false;
1864}
1865
1866
1867/*
1868 * Given a RepackStmt with an indicated relation name, resolve the relation
1869 * name, obtain lock on it, then determine what to do based on the relation
1870 * type: if it's table and not partitioned, repack it as indicated (using an
1871 * existing clustered index, or following the given one), and return NULL.
1872 *
1873 * On the other hand, if the table is partitioned, do nothing further and
1874 * instead return the opened and locked relcache entry, so that caller can
1875 * process the partitions using the multiple-table handling code. In this
1876 * case, if an index name is given, it's up to the caller to resolve it.
1877 */
1878static Relation
1880{
1881 Relation rel;
1882 Oid tableOid;
1883
1884 Assert(stmt->relation != NULL);
1885 Assert(stmt->command == REPACK_COMMAND_CLUSTER ||
1886 stmt->command == REPACK_COMMAND_REPACK);
1887
1888 /*
1889 * Make sure ANALYZE is specified if a column list is present.
1890 */
1891 if ((params->options & CLUOPT_ANALYZE) == 0 && stmt->relation->va_cols != NIL)
1892 ereport(ERROR,
1894 errmsg("ANALYZE option must be specified when a column list is provided"));
1895
1896 /*
1897 * Find, lock, and check permissions on the table. We obtain
1898 * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
1899 * single-transaction case.
1900 */
1901 tableOid = RangeVarGetRelidExtended(stmt->relation->relation,
1903 0,
1905 NULL);
1906 rel = table_open(tableOid, NoLock);
1907
1908 /*
1909 * Reject clustering a remote temp table ... their local buffer manager is
1910 * not going to cope.
1911 */
1912 if (RELATION_IS_OTHER_TEMP(rel))
1913 ereport(ERROR,
1915 /*- translator: first %s is name of a SQL command, eg. REPACK */
1916 errmsg("cannot execute %s on temporary tables of other sessions",
1917 RepackCommandAsString(stmt->command)));
1918
1919 /*
1920 * For partitioned tables, let caller handle this. Otherwise, process it
1921 * here and we're done.
1922 */
1923 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1924 return rel;
1925 else
1926 {
1927 Oid indexOid;
1928
1929 indexOid = determine_clustered_index(rel, stmt->usingindex,
1930 stmt->indexname);
1931 if (OidIsValid(indexOid))
1933 cluster_rel(stmt->command, rel, indexOid, params);
1934
1935 /*
1936 * Do an analyze, if requested. We close the transaction and start a
1937 * new one, so that we don't hold the stronger lock for longer than
1938 * needed.
1939 */
1940 if (params->options & CLUOPT_ANALYZE)
1941 {
1943
1946
1949
1950 vac_params.options |= VACOPT_ANALYZE;
1951 if (params->options & CLUOPT_VERBOSE)
1952 vac_params.options |= VACOPT_VERBOSE;
1953 analyze_rel(tableOid, NULL, vac_params,
1954 stmt->relation->va_cols, true, NULL);
1957 }
1958
1959 return NULL;
1960 }
1961}
1962
1963/*
1964 * Given a relation and the usingindex/indexname options in a
1965 * REPACK USING INDEX or CLUSTER command, return the OID of the
1966 * index to use for clustering the table.
1967 *
1968 * Caller must hold lock on the relation so that the set of indexes
1969 * doesn't change, and must call check_index_is_clusterable.
1970 */
1971static Oid
1972determine_clustered_index(Relation rel, bool usingindex, const char *indexname)
1973{
1974 Oid indexOid;
1975
1976 if (indexname == NULL && usingindex)
1977 {
1978 /*
1979 * If USING INDEX with no name is given, find a clustered index, or
1980 * error out if none.
1981 */
1982 indexOid = InvalidOid;
1984 {
1986 {
1987 indexOid = idxoid;
1988 break;
1989 }
1990 }
1991
1992 if (!OidIsValid(indexOid))
1993 ereport(ERROR,
1995 errmsg("there is no previously clustered index for table \"%s\"",
1997 }
1998 else if (indexname != NULL)
1999 {
2000 /* An index was specified; obtain its OID. */
2001 indexOid = get_relname_relid(indexname, rel->rd_rel->relnamespace);
2002 if (!OidIsValid(indexOid))
2003 ereport(ERROR,
2005 errmsg("index \"%s\" for table \"%s\" does not exist",
2006 indexname, RelationGetRelationName(rel)));
2007 }
2008 else
2009 indexOid = InvalidOid;
2010
2011 return indexOid;
2012}
2013
2014static const char *
2016{
2017 switch (cmd)
2018 {
2020 return "REPACK";
2022 return "VACUUM";
2024 return "CLUSTER";
2025 }
2026 return "???"; /* keep compiler quiet */
2027}
@ ACLCHECK_OK
Definition acl.h:184
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition aclchk.c:4082
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_REPACK
uint32 BlockNumber
Definition block.h:31
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
#define NameStr(name)
Definition c.h:837
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:243
#define Assert(condition)
Definition c.h:945
TransactionId MultiXactId
Definition c.h:748
int32_t int32
Definition c.h:614
float float4
Definition c.h:715
uint32 TransactionId
Definition c.h:738
#define OidIsValid(objectId)
Definition c.h:860
bool IsSystemRelation(Relation relation)
Definition catalog.c:74
bool IsCatalogRelation(Relation relation)
Definition catalog.c:104
bool IsSystemClass(Oid relid, Form_pg_class reltuple)
Definition catalog.c:86
static Relation process_single_relation(RepackStmt *stmt, ClusterParams *params)
Definition cluster.c:1879
static List * get_tables_to_repack_partitioned(RepackCommand cmd, Oid relid, bool rel_is_index, MemoryContext permcxt)
Definition cluster.c:1788
static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti)
Definition cluster.c:841
void check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
Definition cluster.c:508
void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool is_system_catalog, bool swap_toast_by_content, bool check_constraints, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, char newrelpersistence)
Definition cluster.c:1455
static bool repack_is_permitted_for_relation(RepackCommand cmd, Oid relid, Oid userid)
Definition cluster.c:1851
void ExecRepack(ParseState *pstate, RepackStmt *stmt, bool isTopLevel)
Definition cluster.c:113
static List * get_tables_to_repack(RepackCommand cmd, bool usingindex, MemoryContext permcxt)
Definition cluster.c:1658
static const char * RepackCommandAsString(RepackCommand cmd)
Definition cluster.c:2015
static bool cluster_rel_recheck(RepackCommand cmd, Relation OldHeap, Oid indexOid, Oid userid, int options)
Definition cluster.c:447
Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod, char relpersistence, LOCKMODE lockmode)
Definition cluster.c:715
static void rebuild_relation(Relation OldHeap, Relation index, bool verbose)
Definition cluster.c:639
void cluster_rel(RepackCommand cmd, Relation OldHeap, Oid indexOid, ClusterParams *params)
Definition cluster.c:300
void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
Definition cluster.c:568
static void swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, bool swap_toast_by_content, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, Oid *mapped_tables)
Definition cluster.c:1073
static Oid determine_clustered_index(Relation rel, bool usingindex, const char *indexname)
Definition cluster.c:1972
#define CLUOPT_VERBOSE
Definition cluster.h:23
#define CLUOPT_ANALYZE
Definition cluster.h:26
#define CLUOPT_RECHECK_ISCLUSTERED
Definition cluster.h:25
#define CLUOPT_RECHECK
Definition cluster.h:24
void analyze_rel(Oid relid, RangeVar *relation, const VacuumParams params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy)
Definition analyze.c:109
bool defGetBoolean(DefElem *def)
Definition define.c:93
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
Definition dependency.c:279
@ DEPENDENCY_INTERNAL
Definition dependency.h:35
#define PERFORM_DELETION_INTERNAL
Definition dependency.h:92
int errcode(int sqlerrcode)
Definition elog.c:874
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:36
#define DEBUG2
Definition elog.h:29
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define INFO
Definition elog.h:34
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
bool allowSystemTableMods
Definition globals.c:130
int NewGUCNestLevel(void)
Definition guc.c:2142
void RestrictSearchPath(void)
Definition guc.c:2153
void AtEOXact_GUC(bool isCommit, int nestLevel)
Definition guc.c:2169
void RelationClearMissing(Relation rel)
Definition heap.c:1964
Oid heap_create_with_catalog(const char *relname, Oid relnamespace, Oid reltablespace, Oid relid, Oid reltypeid, Oid reloftypeid, Oid ownerid, Oid accessmtd, TupleDesc tupdesc, List *cooked_constraints, char relkind, char relpersistence, bool shared_relation, bool mapped_relation, OnCommitAction oncommit, Datum reloptions, bool use_user_acl, bool allow_system_table_mods, bool is_internal, Oid relrewrite, ObjectAddress *typaddress)
Definition heap.c:1122
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition heapam.c:1420
bool heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
Definition heaptuple.c:456
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1384
#define HeapTupleIsValid(tuple)
Definition htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
#define stmt
Oid IndexGetRelation(Oid indexId, bool missing_ok)
Definition index.c:3584
bool reindex_relation(const ReindexStmt *stmt, Oid relid, int flags, const ReindexParams *params)
Definition index.c:3949
#define REINDEX_REL_FORCE_INDEXES_UNLOGGED
Definition index.h:168
#define REINDEX_REL_SUPPRESS_INDEX_USE
Definition index.h:166
#define REINDEX_REL_FORCE_INDEXES_PERMANENT
Definition index.h:169
#define REINDEX_REL_CHECK_CONSTRAINTS
Definition index.h:167
void index_close(Relation relation, LOCKMODE lockmode)
Definition indexam.c:177
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition indexam.c:133
void CatalogTupleUpdateWithInfo(Relation heapRel, const ItemPointerData *otid, HeapTuple tup, CatalogIndexState indstate)
Definition indexing.c:337
void CatalogTupleUpdate(Relation heapRel, const ItemPointerData *otid, HeapTuple tup)
Definition indexing.c:313
void CatalogCloseIndexes(CatalogIndexState indstate)
Definition indexing.c:61
CatalogIndexState CatalogOpenIndexes(Relation heapRel)
Definition indexing.c:43
void CacheInvalidateCatalog(Oid catalogId)
Definition inval.c:1612
void CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
Definition inval.c:1669
int i
Definition isn.c:77
List * lappend(List *list, void *datum)
Definition list.c:339
bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
Definition lmgr.c:151
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition lmgr.c:229
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition lmgr.c:107
bool CheckRelationLockedByMe(Relation relation, LOCKMODE lockmode, bool orstronger)
Definition lmgr.c:334
bool CheckRelationOidLockedByMe(Oid relid, LOCKMODE lockmode, bool orstronger)
Definition lmgr.c:351
int LOCKMODE
Definition lockdefs.h:26
#define NoLock
Definition lockdefs.h:34
#define AccessExclusiveLock
Definition lockdefs.h:43
#define AccessShareLock
Definition lockdefs.h:36
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_rel_name(Oid relid)
Definition lsyscache.c:2148
char get_rel_relkind(Oid relid)
Definition lsyscache.c:2223
Oid get_rel_namespace(Oid relid)
Definition lsyscache.c:2172
bool get_index_isclustered(Oid index_oid)
Definition lsyscache.c:3823
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3588
Oid get_relname_relid(const char *relname, Oid relnamespace)
Definition lsyscache.c:2105
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
MemoryContext PortalContext
Definition mcxt.c:175
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define SECURITY_RESTRICTED_OPERATION
Definition miscadmin.h:319
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition miscinit.c:613
Oid GetUserId(void)
Definition miscinit.c:470
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition miscinit.c:620
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2857
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define InvalidMultiXactId
Definition multixact.h:25
Oid LookupCreationNamespace(const char *nspname)
Definition namespace.c:3500
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
Definition namespace.c:442
static char * errmsg
#define InvokeObjectPostAlterHookArg(classId, objectId, subId, auxiliaryId, is_internal)
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
int parser_errposition(ParseState *pstate, int location)
Definition parse_node.c:106
RepackCommand
@ REPACK_COMMAND_REPACK
@ REPACK_COMMAND_CLUSTER
@ REPACK_COMMAND_VACUUMFULL
#define ACL_MAINTAIN
Definition parsenodes.h:90
@ DROP_RESTRICT
static int verbose
FormData_pg_class * Form_pg_class
Definition pg_class.h:160
#define NAMEDATALEN
void recordDependencyOn(const ObjectAddress *depender, const ObjectAddress *referenced, DependencyType behavior)
Definition pg_depend.c:47
long changeDependencyFor(Oid classId, Oid objectId, Oid refClassId, Oid oldRefObjectId, Oid newRefObjectId)
Definition pg_depend.c:459
long deleteDependencyRecordsFor(Oid classId, Oid objectId, bool skipExtensionDeps)
Definition pg_depend.c:303
END_CATALOG_STRUCT typedef FormData_pg_index * Form_pg_index
Definition pg_index.h:74
List * find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
#define NIL
Definition pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition pg_list.h:469
#define foreach_node(type, var, lst)
Definition pg_list.h:496
#define foreach_oid(var, lst)
Definition pg_list.h:471
#define lfirst_oid(lc)
Definition pg_list.h:174
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
bool plan_cluster_use_sort(Oid tableOid, Oid indexOid)
Definition planner.c:6899
#define snprintf
Definition port.h:260
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
#define InvalidOid
unsigned int Oid
void TransferPredicateLocksToHeapRelation(Relation relation)
Definition predicate.c:3132
static int fb(int x)
@ ONCOMMIT_NOOP
Definition primnodes.h:59
#define PROGRESS_REPACK_PHASE
Definition progress.h:86
#define PROGRESS_REPACK_COMMAND
Definition progress.h:85
#define PROGRESS_REPACK_PHASE_SWAP_REL_FILES
Definition progress.h:101
#define PROGRESS_REPACK_PHASE_FINAL_CLEANUP
Definition progress.h:103
#define PROGRESS_REPACK_PHASE_REBUILD_INDEX
Definition progress.h:102
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationIsMapped(relation)
Definition rel.h:563
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsPopulated(relation)
Definition rel.h:686
#define RELATION_IS_OTHER_TEMP(relation)
Definition rel.h:667
#define RelationGetNamespace(relation)
Definition rel.h:555
List * RelationGetIndexList(Relation relation)
Definition relcache.c:4826
void RelationAssumeNewRelfilelocator(Relation relation)
Definition relcache.c:3967
void RelationMapRemoveMapping(Oid relationId)
Definition relmapper.c:439
RelFileNumber RelationMapOidToFilenumber(Oid relationId, bool shared)
Definition relmapper.c:166
void RelationMapUpdateMap(Oid relationId, RelFileNumber fileNumber, bool shared, bool immediate)
Definition relmapper.c:326
Oid RelFileNumber
Definition relpath.h:25
#define RelFileNumberIsValid(relnumber)
Definition relpath.h:27
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition scankey.c:76
@ ForwardScanDirection
Definition sdir.h:28
Snapshot GetTransactionSnapshot(void)
Definition snapmgr.c:272
void PushActiveSnapshot(Snapshot snapshot)
Definition snapmgr.c:682
void PopActiveSnapshot(void)
Definition snapmgr.c:775
void relation_close(Relation relation, LOCKMODE lockmode)
Definition relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition relation.c:47
#define BTEqualStrategyNumber
Definition stratnum.h:31
bits32 options
Definition cluster.h:31
Definition pg_list.h:54
Oid indexOid
Definition cluster.c:66
Oid tableOid
Definition cluster.c:65
Form_pg_class rd_rel
Definition rel.h:111
TransactionId FreezeLimit
Definition vacuum.h:289
TransactionId OldestXmin
Definition vacuum.h:279
TransactionId relfrozenxid
Definition vacuum.h:263
MultiXactId relminmxid
Definition vacuum.h:264
MultiXactId MultiXactCutoff
Definition vacuum.h:290
Definition type.h:96
void ReleaseSysCache(HeapTuple tuple)
Definition syscache.c:264
HeapTuple SearchSysCache1(SysCacheIdentifier cacheId, Datum key1)
Definition syscache.c:220
Datum SysCacheGetAttr(SysCacheIdentifier cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition syscache.c:595
#define SearchSysCacheCopy1(cacheId, key1)
Definition syscache.h:91
#define SearchSysCacheExists1(cacheId, key1)
Definition syscache.h:100
Relation try_table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:60
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, ScanKeyData *key)
Definition tableam.c:113
static void table_endscan(TableScanDesc scan)
Definition tableam.h:1004
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition tableam.h:1668
void ResetRelRewrite(Oid myrelid)
Definition tablecmds.c:4394
void CheckTableNotInUse(Relation rel, const char *stmt)
Definition tablecmds.c:4447
void RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
Definition tablecmds.c:4301
void RangeVarCallbackMaintainsTable(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg)
Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock)
void NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode, Oid OIDOldToast)
Definition toasting.c:64
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
#define VACOPT_VERBOSE
Definition vacuum.h:182
#define VACOPT_ANALYZE
Definition vacuum.h:181
void CommandCounterIncrement(void)
Definition xact.c:1102
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition xact.c:3670
void StartTransactionCommand(void)
Definition xact.c:3081
void CommitTransactionCommand(void)
Definition xact.c:3179