PostgreSQL Source Code git master
Loading...
Searching...
No Matches
repack.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * repack.c
4 * REPACK a table; formerly known as CLUSTER. VACUUM FULL also uses
5 * parts of this code.
6 *
7 * There are two somewhat different ways to rewrite a table. In non-
8 * concurrent mode, it's easy: take AccessExclusiveLock, create a new
9 * transient relation, copy the tuples over to the relfilenode of the new
10 * relation, swap the relfilenodes, then drop the old relation.
11 *
12 * In concurrent mode, we lock the table with only ShareUpdateExclusiveLock,
13 * then do an initial copy as above. However, while the tuples are being
14 * copied, concurrent transactions could modify the table. To cope with those
15 * changes, we rely on logical decoding to obtain them from WAL. A bgworker
16 * consumes WAL while the initial copy is ongoing (to prevent excessive WAL
17 * from being reserved), and accumulates the changes in a file. Once the
18 * initial copy is complete, we read the changes from the file and re-apply
19 * them on the new heap. Then we upgrade our ShareUpdateExclusiveLock to
20 * AccessExclusiveLock and swap the relfilenodes. This way, the time we hold
21 * a strong lock on the table is much reduced, and the bloat is eliminated.
22 *
23 *
24 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
25 * Portions Copyright (c) 1994-5, Regents of the University of California
26 *
27 *
28 * IDENTIFICATION
29 * src/backend/commands/repack.c
30 *
31 *-------------------------------------------------------------------------
32 */
33#include "postgres.h"
34
35#include "access/amapi.h"
36#include "access/heapam.h"
37#include "access/multixact.h"
38#include "access/relscan.h"
39#include "access/tableam.h"
41#include "access/transam.h"
42#include "access/xact.h"
43#include "catalog/catalog.h"
44#include "catalog/dependency.h"
45#include "catalog/heap.h"
46#include "catalog/index.h"
47#include "catalog/namespace.h"
49#include "catalog/pg_am.h"
51#include "catalog/pg_inherits.h"
52#include "catalog/toasting.h"
53#include "commands/defrem.h"
54#include "commands/progress.h"
55#include "commands/repack.h"
57#include "commands/tablecmds.h"
58#include "commands/vacuum.h"
59#include "executor/executor.h"
60#include "libpq/pqformat.h"
61#include "libpq/pqmq.h"
62#include "miscadmin.h"
63#include "optimizer/optimizer.h"
64#include "pgstat.h"
65#include "storage/bufmgr.h"
66#include "storage/lmgr.h"
67#include "storage/predicate.h"
68#include "storage/proc.h"
69#include "utils/acl.h"
70#include "utils/fmgroids.h"
71#include "utils/guc.h"
73#include "utils/inval.h"
74#include "utils/lsyscache.h"
75#include "utils/memutils.h"
76#include "utils/pg_rusage.h"
77#include "utils/relmapper.h"
78#include "utils/snapmgr.h"
79#include "utils/syscache.h"
80#include "utils/wait_event_types.h"
81
82/*
83 * This struct is used to pass around the information on tables to be
84 * clustered. We need this so we can make a list of them when invoked without
85 * a specific table/index pair.
86 */
87typedef struct
88{
92
93/*
94 * The first file exported by the decoding worker must contain a snapshot, the
95 * following ones contain the data changes.
96 */
97#define WORKER_FILE_SNAPSHOT 0
98
99/*
100 * Information needed to apply concurrent data changes.
101 */
102typedef struct ChangeContext
103{
104 /* The relation the changes are applied to. */
106
107 /* Needed to update indexes of rel_dst. */
110
111 /*
112 * Existing tuples to UPDATE and DELETE are located via this index. We
113 * keep the scankey in partially initialized state to avoid repeated work.
114 * sk_argument is completed on the fly.
115 */
119
120 /* Sequential number of the file containing the changes. */
123
124/*
125 * Backend-local information to control the decoding worker.
126 */
127typedef struct DecodingWorker
128{
129 /* The worker. */
131
132 /* DecodingWorkerShared is in this segment. */
134
135 /* Handle of the error queue. */
138
139/* Pointer to currently running decoding worker. */
141
142/*
143 * Is there a message sent by a repack worker that the backend needs to
144 * receive?
145 */
147
148static LOCKMODE RepackLockLevel(bool concurrent);
150 Oid indexOid, Oid userid, LOCKMODE lmode,
151 int options);
155 Oid ident_idx);
157 Snapshot snapshot,
158 bool verbose,
162static List *get_tables_to_repack(RepackCommand cmd, bool usingindex,
165 Oid relid, bool rel_is_index,
168 Oid relid, Oid userid);
169
171static void apply_concurrent_insert(Relation rel, TupleTableSlot *slot,
176static void apply_concurrent_delete(Relation rel, TupleTableSlot *slot);
177static void restore_tuple(BufFile *file, Relation relation,
178 TupleTableSlot *slot);
179static void adjust_toast_pointers(Relation relation, TupleTableSlot *dest,
180 TupleTableSlot *src);
182 TupleTableSlot *locator,
184static void process_concurrent_changes(XLogRecPtr end_of_wal,
186 bool done);
188 Relation relation,
199 LOCKMODE lockmode,
200 bool isTopLevel,
201 ClusterParams *params);
202static Oid determine_clustered_index(Relation rel, bool usingindex,
203 const char *indexname);
204
205static void start_repack_decoding_worker(Oid relid);
206static void stop_repack_decoding_worker(void);
208
209static void ProcessRepackMessage(StringInfo msg);
210static const char *RepackCommandAsString(RepackCommand cmd);
211
212
213/*
214 * The repack code allows for processing multiple tables at once. Because
215 * of this, we cannot just run everything on a single transaction, or we
216 * would be forced to acquire exclusive locks on all the tables being
217 * clustered, simultaneously --- very likely leading to deadlock.
218 *
219 * To solve this we follow a similar strategy to VACUUM code, processing each
220 * relation in a separate transaction. For this to work, we need to:
221 *
222 * - provide a separate memory context so that we can pass information in
223 * a way that survives across transactions
224 * - start a new transaction every time a new relation is clustered
225 * - check for validity of the information on to-be-clustered relations,
226 * as someone might have deleted a relation behind our back, or
227 * clustered one on a different index
228 * - end the transaction
229 *
230 * The single-relation case does not have any such overhead.
231 *
232 * We also allow a relation to be repacked following an index, but without
233 * naming a specific one. In that case, the indisclustered bit will be
234 * looked up, and an ERROR will be thrown if no so-marked index is found.
235 */
236void
238{
239 ClusterParams params = {0};
240 Relation rel = NULL;
242 LOCKMODE lockmode;
243 List *rtcs;
244
245 /* Parse option list */
246 foreach_node(DefElem, opt, stmt->params)
247 {
248 if (strcmp(opt->defname, "verbose") == 0)
249 params.options |= defGetBoolean(opt) ? CLUOPT_VERBOSE : 0;
250 else if (strcmp(opt->defname, "analyze") == 0 ||
251 strcmp(opt->defname, "analyse") == 0)
252 params.options |= defGetBoolean(opt) ? CLUOPT_ANALYZE : 0;
253 else if (strcmp(opt->defname, "concurrently") == 0 &&
254 defGetBoolean(opt))
255 {
256 if (stmt->command != REPACK_COMMAND_REPACK)
259 errmsg("CONCURRENTLY option not supported for %s",
260 RepackCommandAsString(stmt->command)));
261 params.options |= CLUOPT_CONCURRENT;
262 }
263 else
266 errmsg("unrecognized %s option \"%s\"",
267 RepackCommandAsString(stmt->command),
268 opt->defname),
269 parser_errposition(pstate, opt->location));
270 }
271
272 /* Determine the lock mode to use. */
273 lockmode = RepackLockLevel((params.options & CLUOPT_CONCURRENT) != 0);
274
275 if ((params.options & CLUOPT_CONCURRENT) != 0)
276 {
277 /*
278 * Make sure we're not in a transaction block.
279 *
280 * The reason is that repack_setup_logical_decoding() could wait
281 * indefinitely for our XID to complete. (The deadlock detector would
282 * not recognize it because we'd be waiting for ourselves, i.e. no
283 * real lock conflict.) It would be possible to run in a transaction
284 * block if we had no XID, but this restriction is simpler for users
285 * to understand and we don't lose any functionality.
286 */
287 PreventInTransactionBlock(isTopLevel, "REPACK (CONCURRENTLY)");
288 }
289
290 /*
291 * If a single relation is specified, process it and we're done ... unless
292 * the relation is a partitioned table, in which case we fall through.
293 */
294 if (stmt->relation != NULL)
295 {
296 rel = process_single_relation(stmt, lockmode, isTopLevel, &params);
297 if (rel == NULL)
298 return; /* all done */
299 }
300
301 /*
302 * Don't allow ANALYZE in the multiple-relation case for now. Maybe we
303 * can add support for this later.
304 */
305 if (params.options & CLUOPT_ANALYZE)
308 errmsg("cannot execute %s on multiple tables",
309 "REPACK (ANALYZE)"));
310
311 /*
312 * By here, we know we are in a multi-table situation.
313 *
314 * Concurrent processing is currently considered rather special (e.g. in
315 * terms of resources consumed) so it is not performed in bulk.
316 */
317 if (params.options & CLUOPT_CONCURRENT)
318 {
319 if (rel != NULL)
320 {
321 Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
324 errmsg("REPACK (CONCURRENTLY) is not supported for partitioned tables"),
325 errhint("Consider running the command on individual partitions."));
326 }
327 else
330 errmsg("REPACK (CONCURRENTLY) requires an explicit table name"));
331 }
332
333 /*
334 * In order to avoid holding locks for too long, we want to process each
335 * table in its own transaction. This forces us to disallow running
336 * inside a user transaction block.
337 */
339
340 /* Also, we need a memory context to hold our list of relations */
342 "Repack",
344
345 /*
346 * Since we open a new transaction for each relation, we have to check
347 * that the relation still is what we think it is.
348 *
349 * In single-transaction CLUSTER, we don't need the overhead.
350 */
351 params.options |= CLUOPT_RECHECK;
352
353 /*
354 * If we don't have a relation yet, determine a relation list. If we do,
355 * then it must be a partitioned table, and we want to process its
356 * partitions.
357 */
358 if (rel == NULL)
359 {
360 Assert(stmt->indexname == NULL);
361 rtcs = get_tables_to_repack(stmt->command, stmt->usingindex,
364 }
365 else
366 {
367 Oid relid;
368 bool rel_is_index;
369
370 Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
371
372 /*
373 * If USING INDEX was specified, resolve the index name now and pass
374 * it down.
375 */
376 if (stmt->usingindex)
377 {
378 /*
379 * If no index name was specified when repacking a partitioned
380 * table, punt for now. Maybe we can improve this later.
381 */
382 if (!stmt->indexname)
383 {
384 if (stmt->command == REPACK_COMMAND_CLUSTER)
387 errmsg("there is no previously clustered index for table \"%s\"",
389 else
392 /*- translator: first %s is name of a SQL command, eg. REPACK */
393 errmsg("cannot execute %s on partitioned table \"%s\" USING INDEX with no index name",
394 RepackCommandAsString(stmt->command),
396 }
397
398 relid = determine_clustered_index(rel, stmt->usingindex,
399 stmt->indexname);
400 if (!OidIsValid(relid))
401 elog(ERROR, "unable to determine index to cluster on");
403
404 rel_is_index = true;
405 }
406 else
407 {
408 relid = RelationGetRelid(rel);
409 rel_is_index = false;
410 }
411
413 relid, rel_is_index,
415
416 /* close parent relation, releasing lock on it */
418 rel = NULL;
419 }
420
421 /* Commit to get out of starting transaction */
424
425 /* Cluster the tables, each in a separate transaction */
426 Assert(rel == NULL);
428 {
429 /* Start a new transaction for each relation. */
431
432 /*
433 * Open the target table, coping with the case where it has been
434 * dropped.
435 */
436 rel = try_table_open(rtc->tableOid, lockmode);
437 if (rel == NULL)
438 {
440 continue;
441 }
442
443 /* functions in indexes may want a snapshot set */
445
446 /* Process this table */
447 cluster_rel(stmt->command, rel, rtc->indexOid, &params, isTopLevel);
448 /* cluster_rel closes the relation, but keeps lock */
449
452 }
453
454 /* Start a new transaction for the cleanup work. */
456
457 /* Clean up working storage */
459}
460
461/*
462 * In the non-concurrent case, we obtain AccessExclusiveLock throughout the
463 * operation to avoid any lock-upgrade hazards. In the concurrent case, we
464 * grab ShareUpdateExclusiveLock (just like VACUUM) for most of the
465 * processing and only acquire AccessExclusiveLock at the end, to swap the
466 * relation -- supposedly for a short time.
467 */
468static LOCKMODE
469RepackLockLevel(bool concurrent)
470{
471 if (concurrent)
473 else
474 return AccessExclusiveLock;
475}
476
477/*
478 * cluster_rel
479 *
480 * This clusters the table by creating a new, clustered table and
481 * swapping the relfilenumbers of the new table and the old table, so
482 * the OID of the original table is preserved. Thus we do not lose
483 * GRANT, inheritance nor references to this table.
484 *
485 * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
486 * the new table, it's better to create the indexes afterwards than to fill
487 * them incrementally while we load the table.
488 *
489 * If indexOid is InvalidOid, the table will be rewritten in physical order
490 * instead of index order.
491 *
492 * Note that, in the concurrent case, the function releases the lock at some
493 * point, in order to get AccessExclusiveLock for the final steps (i.e. to
494 * swap the relation files). To make things simpler, the caller should expect
495 * OldHeap to be closed on return, regardless CLUOPT_CONCURRENT. (The
496 * AccessExclusiveLock is kept till the end of the transaction.)
497 *
498 * 'cmd' indicates which command is being executed, to be used for error
499 * messages.
500 */
501void
503 ClusterParams *params, bool isTopLevel)
504{
505 Oid tableOid = RelationGetRelid(OldHeap);
508 Oid save_userid;
509 int save_sec_context;
510 int save_nestlevel;
511 bool verbose = ((params->options & CLUOPT_VERBOSE) != 0);
512 bool recheck = ((params->options & CLUOPT_RECHECK) != 0);
513 bool concurrent = ((params->options & CLUOPT_CONCURRENT) != 0);
515
516 /* Determine the lock mode to use. */
517 lmode = RepackLockLevel(concurrent);
518
519 /*
520 * Check some preconditions in the concurrent case. This also obtains the
521 * replica index OID.
522 */
523 if (concurrent)
525
526 /* Check for user-requested abort. */
528
531
532 /*
533 * Switch to the table owner's userid, so that any index functions are run
534 * as that user. Also lock down security-restricted operations and
535 * arrange to make GUC variable changes local to this command.
536 */
537 GetUserIdAndSecContext(&save_userid, &save_sec_context);
538 SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
539 save_sec_context | SECURITY_RESTRICTED_OPERATION);
540 save_nestlevel = NewGUCNestLevel();
542
543 /*
544 * Recheck that the relation is still what it was when we started.
545 *
546 * Note that it's critical to skip this in single-relation CLUSTER;
547 * otherwise, we would reject an attempt to cluster using a
548 * not-previously-clustered index.
549 */
550 if (recheck &&
551 !cluster_rel_recheck(cmd, OldHeap, indexOid, save_userid,
552 lmode, params->options))
553 goto out;
554
555 /*
556 * We allow repacking shared catalogs only when not using an index. It
557 * would work to use an index in most respects, but the index would only
558 * get marked as indisclustered in the current database, leading to
559 * unexpected behavior if CLUSTER were later invoked in another database.
560 */
561 if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
564 /*- translator: first %s is name of a SQL command, eg. REPACK */
565 errmsg("cannot execute %s on a shared catalog",
567
568 /*
569 * The CONCURRENTLY case should have been rejected earlier because it does
570 * not support system catalogs.
571 */
572 Assert(!(OldHeap->rd_rel->relisshared && concurrent));
573
574 /*
575 * Don't process temp tables of other backends ... their local buffer
576 * manager is not going to cope.
577 */
581 /*- translator: first %s is name of a SQL command, eg. REPACK */
582 errmsg("cannot execute %s on temporary tables of other sessions",
584
585 /*
586 * Also check for active uses of the relation in the current transaction,
587 * including open scans and pending AFTER trigger events.
588 */
590
591 /* Check heap and index are valid to cluster on */
592 if (OidIsValid(indexOid))
593 {
594 /* verify the index is good and lock it */
596 /* also open it */
597 index = index_open(indexOid, NoLock);
598 }
599 else
600 index = NULL;
601
602 /*
603 * When allow_system_table_mods is turned off, we disallow repacking a
604 * catalog on a particular index unless that's already the clustered index
605 * for that catalog.
606 *
607 * XXX We don't check for this in CLUSTER, because it's historically been
608 * allowed.
609 */
610 if (cmd != REPACK_COMMAND_CLUSTER &&
611 !allowSystemTableMods && OidIsValid(indexOid) &&
612 IsCatalogRelation(OldHeap) && !index->rd_index->indisclustered)
615 errmsg("permission denied: \"%s\" is a system catalog",
617 errdetail("System catalogs can only be clustered by the index they're already clustered on, if any, unless \"%s\" is enabled.",
618 "allow_system_table_mods"));
619
620 /*
621 * Quietly ignore the request if this is a materialized view which has not
622 * been populated from its query. No harm is done because there is no data
623 * to deal with, and we don't want to throw an error if this is part of a
624 * multi-relation request -- for example, CLUSTER was run on the entire
625 * database.
626 */
627 if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
629 {
630 if (index)
633 goto out;
634 }
635
636 Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
637 OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
638 OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
639
640 /*
641 * All predicate locks on the tuples or pages are about to be made
642 * invalid, because we move tuples around. Promote them to relation
643 * locks. Predicate locks on indexes will be promoted when they are
644 * reindexed.
645 *
646 * During concurrent processing, the heap as well as its indexes stay in
647 * operation, so we postpone this step until they are locked using
648 * AccessExclusiveLock near the end of the processing.
649 */
650 if (!concurrent)
652
653 /* rebuild_relation does all the dirty work */
654 PG_TRY();
655 {
657 }
658 PG_FINALLY();
659 {
660 if (concurrent)
661 {
662 /*
663 * Since during normal operation the worker was already asked to
664 * exit, stopping it explicitly is especially important on ERROR.
665 * However it still seems a good practice to make sure that the
666 * worker never survives the REPACK command.
667 */
669 }
670 }
671 PG_END_TRY();
672
673 /* rebuild_relation closes OldHeap, and index if valid */
674
675out:
676 /* Roll back any GUC changes executed by index functions */
677 AtEOXact_GUC(false, save_nestlevel);
678
679 /* Restore userid and security context */
680 SetUserIdAndSecContext(save_userid, save_sec_context);
681
683}
684
685/*
686 * Check if the table (and its index) still meets the requirements of
687 * cluster_rel().
688 */
689static bool
691 Oid userid, LOCKMODE lmode, int options)
692{
693 Oid tableOid = RelationGetRelid(OldHeap);
694
695 /* Check that the user still has privileges for the relation */
696 if (!repack_is_permitted_for_relation(cmd, tableOid, userid))
697 {
699 return false;
700 }
701
702 /*
703 * Silently skip a temp table for a remote session. Only doing this check
704 * in the "recheck" case is appropriate (which currently means somebody is
705 * executing a database-wide CLUSTER or on a partitioned table), because
706 * there is another check in cluster() which will stop any attempt to
707 * cluster remote temp tables by name. There is another check in
708 * cluster_rel which is redundant, but we leave it for extra safety.
709 */
711 {
713 return false;
714 }
715
716 if (OidIsValid(indexOid))
717 {
718 /*
719 * Check that the index still exists
720 */
722 {
724 return false;
725 }
726
727 /*
728 * Check that the index is still the one with indisclustered set, if
729 * needed.
730 */
731 if ((options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
732 !get_index_isclustered(indexOid))
733 {
735 return false;
736 }
737 }
738
739 return true;
740}
741
742/*
743 * Verify that the specified heap and index are valid to cluster on
744 *
745 * Side effect: obtains lock on the index. The caller may
746 * in some cases already have a lock of the same strength on the table, but
747 * not in all cases so we can't rely on the table-level lock for
748 * protection here.
749 */
750void
752{
754
755 OldIndex = index_open(indexOid, lockmode);
756
757 /*
758 * Check that index is in fact an index on the given relation
759 */
760 if (OldIndex->rd_index == NULL ||
761 OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
764 errmsg("\"%s\" is not an index for table \"%s\"",
767
768 /* Index AM must allow clustering */
769 if (!OldIndex->rd_indam->amclusterable)
772 errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
774
775 /*
776 * Disallow clustering on incomplete indexes (those that might not index
777 * every row of the relation). We could relax this by making a separate
778 * seqscan pass over the table to copy the missing rows, but that seems
779 * expensive and tedious.
780 */
781 if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
784 errmsg("cannot cluster on partial index \"%s\"",
786
787 /*
788 * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
789 * it might well not contain entries for every heap row, or might not even
790 * be internally consistent. (But note that we don't check indcheckxmin;
791 * the worst consequence of following broken HOT chains would be that we
792 * might put recently-dead tuples out-of-order in the new table, and there
793 * is little harm in that.)
794 */
795 if (!OldIndex->rd_index->indisvalid)
798 errmsg("cannot cluster on invalid index \"%s\"",
800
801 /* Drop relcache refcnt on OldIndex, but keep lock */
803}
804
805/*
806 * mark_index_clustered: mark the specified index as the one clustered on
807 *
808 * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
809 */
810void
811mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
812{
817
818 Assert(rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE);
819
820 /*
821 * If the index is already marked clustered, no need to do anything.
822 */
823 if (OidIsValid(indexOid))
824 {
825 if (get_index_isclustered(indexOid))
826 return;
827 }
828
829 /*
830 * Check each index of the relation and set/clear the bit as needed.
831 */
833
834 foreach(index, RelationGetIndexList(rel))
835 {
837
841 elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
843
844 /*
845 * Unset the bit if set. We know it's wrong because we checked this
846 * earlier.
847 */
848 if (indexForm->indisclustered)
849 {
850 indexForm->indisclustered = false;
852 }
853 else if (thisIndexOid == indexOid)
854 {
855 /* this was checked earlier, but let's be real sure */
856 if (!indexForm->indisvalid)
857 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
858 indexForm->indisclustered = true;
860 }
861
863 InvalidOid, is_internal);
864
866 }
867
869}
870
871/*
872 * Check if the CONCURRENTLY option is legal for the relation.
873 *
874 * *Ident_idx_p receives OID of the identity index.
875 */
876static void
878{
879 char relpersistence,
880 replident;
882
883 /* Data changes in system relations are not logically decoded. */
884 if (IsCatalogRelation(rel))
887 errmsg("cannot repack relation \"%s\"",
889 errhint("REPACK CONCURRENTLY is not supported for catalog relations."));
890
891 /*
892 * reorderbuffer.c does not seem to handle processing of TOAST relation
893 * alone.
894 */
895 if (IsToastRelation(rel))
898 errmsg("cannot repack relation \"%s\"",
900 errhint("REPACK CONCURRENTLY is not supported for TOAST relations"));
901
902 relpersistence = rel->rd_rel->relpersistence;
903 if (relpersistence != RELPERSISTENCE_PERMANENT)
906 errmsg("cannot repack relation \"%s\"",
908 errhint("REPACK CONCURRENTLY is only allowed for permanent relations."));
909
910 /* With NOTHING, WAL does not contain the old tuple. */
911 replident = rel->rd_rel->relreplident;
912 if (replident == REPLICA_IDENTITY_NOTHING)
915 errmsg("cannot repack relation \"%s\"",
917 errhint("Relation \"%s\" has insufficient replication identity.",
919
920 /*
921 * Obtain the replica identity index -- either one that has been set
922 * explicitly, or the primary key. If none of these cases apply, the
923 * table cannot be repacked concurrently. It might be possible to have
924 * repack work with a FULL replica identity; however that requires more
925 * work and is not implemented yet.
926 */
929 ident_idx = rel->rd_pkindex;
930 if (!OidIsValid(ident_idx))
933 errmsg("cannot process relation \"%s\"",
935 errhint("Relation \"%s\" has no identity index.",
937
939}
940
941
942/*
943 * rebuild_relation: rebuild an existing relation in index or physical order
944 *
945 * OldHeap: table to rebuild. See cluster_rel() for comments on the required
946 * lock strength.
947 *
948 * index: index to cluster by, or NULL to rewrite in physical order.
949 *
950 * ident_idx: identity index, to handle replaying of concurrent data changes
951 * to the new heap. InvalidOid if there's no CONCURRENTLY option.
952 *
953 * On entry, heap and index (if one is given) must be open, and the
954 * appropriate lock held on them -- AccessExclusiveLock for exclusive
955 * processing and ShareUpdateExclusiveLock for concurrent processing.
956 *
957 * On exit, they are closed, but still locked with AccessExclusiveLock.
958 * (The function handles the lock upgrade if 'concurrent' is true.)
959 */
960static void
963{
964 Oid tableOid = RelationGetRelid(OldHeap);
965 Oid accessMethod = OldHeap->rd_rel->relam;
966 Oid tableSpace = OldHeap->rd_rel->reltablespace;
969 char relpersistence;
973 bool concurrent = OidIsValid(ident_idx);
974 Snapshot snapshot = NULL;
975#if USE_ASSERT_CHECKING
977
978 lmode = RepackLockLevel(concurrent);
979
982#endif
983
984 if (concurrent)
985 {
986 /*
987 * The worker needs to be member of the locking group we're the leader
988 * of. We ought to become the leader before the worker starts. The
989 * worker will join the group as soon as it starts.
990 *
991 * This is to make sure that the deadlock described below is
992 * detectable by deadlock.c: if the worker waits for a transaction to
993 * complete and we are waiting for the worker output, then effectively
994 * we (i.e. this backend) are waiting for that transaction.
995 */
997
998 /*
999 * Start the worker that decodes data changes applied while we're
1000 * copying the table contents.
1001 *
1002 * Note that the worker has to wait for all transactions with XID
1003 * already assigned to finish. If some of those transactions is
1004 * waiting for a lock conflicting with ShareUpdateExclusiveLock on our
1005 * table (e.g. it runs CREATE INDEX), we can end up in a deadlock.
1006 * Not sure this risk is worth unlocking/locking the table (and its
1007 * clustering index) and checking again if it's still eligible for
1008 * REPACK CONCURRENTLY.
1009 */
1011
1012 /*
1013 * Wait until the worker has the initial snapshot and retrieve it.
1014 */
1016
1017 PushActiveSnapshot(snapshot);
1018 }
1019
1020 /* for CLUSTER or REPACK USING INDEX, mark the index as the one to use */
1021 if (index != NULL)
1023
1024 /* Remember info about rel before closing OldHeap */
1025 relpersistence = OldHeap->rd_rel->relpersistence;
1026
1027 /*
1028 * Create the transient table that will receive the re-ordered data.
1029 *
1030 * OldHeap is already locked, so no need to lock it again. make_new_heap
1031 * obtains AccessExclusiveLock on the new heap and its toast table.
1032 */
1033 OIDNewHeap = make_new_heap(tableOid, tableSpace,
1034 accessMethod,
1035 relpersistence,
1036 NoLock);
1039
1040 /* Copy the heap data into the new table in the desired order */
1043
1044 /* The historic snapshot won't be needed anymore. */
1045 if (snapshot)
1046 {
1049 }
1050
1051 if (concurrent)
1052 {
1054
1055 /*
1056 * Close the index, but keep the lock. Both heaps will be closed by
1057 * the following call.
1058 */
1059 if (index)
1061
1064
1067 }
1068 else
1069 {
1071
1072 /* Close relcache entries, but keep lock until transaction commit */
1074 if (index)
1076
1077 /*
1078 * Close the new relation so it can be dropped as soon as the storage
1079 * is swapped. The relation is not visible to others, so no need to
1080 * unlock it explicitly.
1081 */
1083
1084 /*
1085 * Swap the physical files of the target and transient tables, then
1086 * rebuild the target's indexes and throw away the transient table.
1087 */
1089 swap_toast_by_content, false, true,
1090 true, /* reindex */
1092 relpersistence);
1093 }
1094}
1095
1096
1097/*
1098 * Create the transient table that will be filled with new data during
1099 * CLUSTER, ALTER TABLE, and similar operations. The transient table
1100 * duplicates the logical structure of the OldHeap; but will have the
1101 * specified physical storage properties NewTableSpace, NewAccessMethod, and
1102 * relpersistence.
1103 *
1104 * After this, the caller should load the new heap with transferred/modified
1105 * data, then call finish_heap_swap to complete the operation.
1106 */
1107Oid
1109 char relpersistence, LOCKMODE lockmode)
1110{
1114 Oid toastid;
1116 HeapTuple tuple;
1117 Datum reloptions;
1118 bool isNull;
1120
1121 OldHeap = table_open(OIDOldHeap, lockmode);
1123
1124 /*
1125 * Note that the NewHeap will not receive any of the defaults or
1126 * constraints associated with the OldHeap; we don't need 'em, and there's
1127 * no reason to spend cycles inserting them into the catalogs only to
1128 * delete them.
1129 */
1130
1131 /*
1132 * But we do want to use reloptions of the old heap for new heap.
1133 */
1135 if (!HeapTupleIsValid(tuple))
1136 elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1137 reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
1138 &isNull);
1139 if (isNull)
1140 reloptions = (Datum) 0;
1141
1142 if (relpersistence == RELPERSISTENCE_TEMP)
1144 else
1146
1147 /*
1148 * Create the new heap, using a temporary name in the same namespace as
1149 * the existing table. NOTE: there is some risk of collision with user
1150 * relnames. Working around this seems more trouble than it's worth; in
1151 * particular, we can't create the new heap in a different namespace from
1152 * the old, or we will have problems with the TEMP status of temp tables.
1153 *
1154 * Note: the new heap is not a shared relation, even if we are rebuilding
1155 * a shared rel. However, we do make the new heap mapped if the source is
1156 * mapped. This simplifies swap_relation_files, and is absolutely
1157 * necessary for rebuilding pg_class, for reasons explained there.
1158 */
1159 snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
1160
1164 InvalidOid,
1165 InvalidOid,
1166 InvalidOid,
1167 OldHeap->rd_rel->relowner,
1170 NIL,
1172 relpersistence,
1173 false,
1176 reloptions,
1177 false,
1178 true,
1179 true,
1180 OIDOldHeap,
1181 NULL);
1183
1184 ReleaseSysCache(tuple);
1185
1186 /*
1187 * Advance command counter so that the newly-created relation's catalog
1188 * tuples will be visible to table_open.
1189 */
1191
1192 /*
1193 * If necessary, create a TOAST table for the new relation.
1194 *
1195 * If the relation doesn't have a TOAST table already, we can't need one
1196 * for the new relation. The other way around is possible though: if some
1197 * wide columns have been dropped, NewHeapCreateToastTable can decide that
1198 * no TOAST table is needed for the new table.
1199 *
1200 * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
1201 * that the TOAST table will be visible for insertion.
1202 */
1203 toastid = OldHeap->rd_rel->reltoastrelid;
1204 if (OidIsValid(toastid))
1205 {
1206 /* keep the existing toast table's reloptions, if any */
1208 if (!HeapTupleIsValid(tuple))
1209 elog(ERROR, "cache lookup failed for relation %u", toastid);
1210 reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
1211 &isNull);
1212 if (isNull)
1213 reloptions = (Datum) 0;
1214
1215 NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
1216
1217 ReleaseSysCache(tuple);
1218 }
1219
1221
1222 return OIDNewHeap;
1223}
1224
1225/*
1226 * Do the physical copying of table data.
1227 *
1228 * 'snapshot' and 'decoding_ctx': see table_relation_copy_for_cluster(). Pass
1229 * iff concurrent processing is required.
1230 *
1231 * There are three output parameters:
1232 * *pSwapToastByContent is set true if toast tables must be swapped by content.
1233 * *pFreezeXid receives the TransactionId used as freeze cutoff point.
1234 * *pCutoffMulti receives the MultiXactId used as a cutoff point.
1235 */
1236static void
1238 Snapshot snapshot, bool verbose, bool *pSwapToastByContent,
1240{
1246 VacuumParams params;
1247 struct VacuumCutoffs cutoffs;
1248 bool use_sort;
1249 double num_tuples = 0,
1250 tups_vacuumed = 0,
1252 BlockNumber num_pages;
1253 int elevel = verbose ? INFO : DEBUG2;
1254 PGRUsage ru0;
1255 char *nspname;
1256 bool concurrent = snapshot != NULL;
1258
1259 lmode = RepackLockLevel(concurrent);
1260
1262
1263 /* Store a copy of the namespace name for logging purposes */
1265
1266 /*
1267 * Their tuple descriptors should be exactly alike, but here we only need
1268 * assume that they have the same number of columns.
1269 */
1272 Assert(newTupDesc->natts == oldTupDesc->natts);
1273
1274 /*
1275 * If the OldHeap has a toast table, get lock on the toast table to keep
1276 * it from being vacuumed. This is needed because autovacuum processes
1277 * toast tables independently of their main tables, with no lock on the
1278 * latter. If an autovacuum were to start on the toast table after we
1279 * compute our OldestXmin below, it would use a later OldestXmin, and then
1280 * possibly remove as DEAD toast tuples belonging to main tuples we think
1281 * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
1282 * tuples.
1283 *
1284 * We don't need to open the toast relation here, just lock it. The lock
1285 * will be held till end of transaction.
1286 */
1287 if (OldHeap->rd_rel->reltoastrelid)
1288 LockRelationOid(OldHeap->rd_rel->reltoastrelid, lmode);
1289
1290 /*
1291 * If both tables have TOAST tables, perform toast swap by content. It is
1292 * possible that the old table has a toast table but the new one doesn't,
1293 * if toastable columns have been dropped. In that case we have to do
1294 * swap by links. This is okay because swap by content is only essential
1295 * for system catalogs, and we don't support schema changes for them.
1296 */
1297 if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid &&
1298 !concurrent)
1299 {
1300 *pSwapToastByContent = true;
1301
1302 /*
1303 * When doing swap by content, any toast pointers written into NewHeap
1304 * must use the old toast table's OID, because that's where the toast
1305 * data will eventually be found. Set this up by setting rd_toastoid.
1306 * This also tells toast_save_datum() to preserve the toast value
1307 * OIDs, which we want so as not to invalidate toast pointers in
1308 * system catalog caches, and to avoid making multiple copies of a
1309 * single toast value.
1310 *
1311 * Note that we must hold NewHeap open until we are done writing data,
1312 * since the relcache will not guarantee to remember this setting once
1313 * the relation is closed. Also, this technique depends on the fact
1314 * that no one will try to read from the NewHeap until after we've
1315 * finished writing it and swapping the rels --- otherwise they could
1316 * follow the toast pointers to the wrong place. (It would actually
1317 * work for values copied over from the old toast table, but not for
1318 * any values that we toast which were previously not toasted.)
1319 *
1320 * This would not work with CONCURRENTLY because we may need to delete
1321 * TOASTed tuples from the new heap. With this hack, we'd delete them
1322 * from the old heap.
1323 */
1324 NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
1325 }
1326 else
1327 *pSwapToastByContent = false;
1328
1329 /*
1330 * Compute xids used to freeze and weed out dead tuples and multixacts.
1331 * Since we're going to rewrite the whole table anyway, there's no reason
1332 * not to be aggressive about this.
1333 */
1334 memset(&params, 0, sizeof(VacuumParams));
1335 vacuum_get_cutoffs(OldHeap, &params, &cutoffs);
1336
1337 /*
1338 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
1339 * backwards, so take the max.
1340 */
1341 {
1342 TransactionId relfrozenxid = OldHeap->rd_rel->relfrozenxid;
1343
1346 cutoffs.FreezeLimit = relfrozenxid;
1347 }
1348
1349 /*
1350 * MultiXactCutoff, similarly, shouldn't go backwards either.
1351 */
1352 {
1353 MultiXactId relminmxid = OldHeap->rd_rel->relminmxid;
1354
1357 cutoffs.MultiXactCutoff = relminmxid;
1358 }
1359
1360 /*
1361 * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
1362 * the OldHeap. We know how to use a sort to duplicate the ordering of a
1363 * btree index, and will use seqscan-and-sort for that case if the planner
1364 * tells us it's cheaper. Otherwise, always indexscan if an index is
1365 * provided, else plain seqscan.
1366 */
1367 if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
1370 else
1371 use_sort = false;
1372
1373 /* Log what we're doing */
1374 if (OldIndex != NULL && !use_sort)
1375 ereport(elevel,
1376 errmsg("repacking \"%s.%s\" using index scan on \"%s\"",
1377 nspname,
1380 else if (use_sort)
1381 ereport(elevel,
1382 errmsg("repacking \"%s.%s\" using sequential scan and sort",
1383 nspname,
1385 else
1386 ereport(elevel,
1387 errmsg("repacking \"%s.%s\" in physical order",
1388 nspname,
1390
1391 /*
1392 * Hand off the actual copying to AM specific function, the generic code
1393 * cannot know how to deal with visibility across AMs. Note that this
1394 * routine is allowed to set FreezeXid / MultiXactCutoff to different
1395 * values (e.g. because the AM doesn't use freezing).
1396 */
1398 cutoffs.OldestXmin, snapshot,
1399 &cutoffs.FreezeLimit,
1400 &cutoffs.MultiXactCutoff,
1401 &num_tuples, &tups_vacuumed,
1403
1404 /* return selected values to caller, get set as relfrozenxid/minmxid */
1405 *pFreezeXid = cutoffs.FreezeLimit;
1406 *pCutoffMulti = cutoffs.MultiXactCutoff;
1407
1408 /*
1409 * Reset rd_toastoid just to be tidy --- it shouldn't be looked at again.
1410 * In the CONCURRENTLY case, we need to set it again before applying the
1411 * concurrent changes.
1412 */
1413 NewHeap->rd_toastoid = InvalidOid;
1414
1416
1417 /* Log what we did */
1418 ereport(elevel,
1419 (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
1420 nspname,
1422 tups_vacuumed, num_tuples,
1424 errdetail("%.0f dead row versions cannot be removed yet.\n"
1425 "%s.",
1427 pg_rusage_show(&ru0))));
1428
1429 /* Update pg_class to reflect the correct values of pages and tuples. */
1431
1435 elog(ERROR, "cache lookup failed for relation %u",
1438
1439 relform->relpages = num_pages;
1440 relform->reltuples = num_tuples;
1441
1442 /* Don't update the stats for pg_class. See swap_relation_files. */
1445 else
1447
1448 /* Clean up. */
1451
1452 /* Make the update visible */
1454}
1455
1456/*
1457 * Swap the physical files of two given relations.
1458 *
1459 * We swap the physical identity (reltablespace, relfilenumber) while keeping
1460 * the same logical identities of the two relations. relpersistence is also
1461 * swapped, which is critical since it determines where buffers live for each
1462 * relation.
1463 *
1464 * We can swap associated TOAST data in either of two ways: recursively swap
1465 * the physical content of the toast tables (and their indexes), or swap the
1466 * TOAST links in the given relations' pg_class entries. The former is needed
1467 * to manage rewrites of shared catalogs (where we cannot change the pg_class
1468 * links) while the latter is the only way to handle cases in which a toast
1469 * table is added or removed altogether.
1470 *
1471 * Additionally, the first relation is marked with relfrozenxid set to
1472 * frozenXid. It seems a bit ugly to have this here, but the caller would
1473 * have to do it anyway, so having it here saves a heap_update. Note: in
1474 * the swap-toast-links case, we assume we don't need to change the toast
1475 * table's relfrozenxid: the new version of the toast table should already
1476 * have relfrozenxid set to RecentXmin, which is good enough.
1477 *
1478 * Lastly, if r2 and its toast table and toast index (if any) are mapped,
1479 * their OIDs are emitted into mapped_tables[]. This is hacky but beats
1480 * having to look the information up again later in finish_heap_swap.
1481 */
1482static void
1485 bool is_internal,
1489{
1492 reltup2;
1494 relform2;
1498 char swptmpchr;
1499 Oid relam1,
1500 relam2;
1501
1502 /* We need writable copies of both pg_class tuples. */
1504
1507 elog(ERROR, "cache lookup failed for relation %u", r1);
1509
1512 elog(ERROR, "cache lookup failed for relation %u", r2);
1514
1515 relfilenumber1 = relform1->relfilenode;
1516 relfilenumber2 = relform2->relfilenode;
1517 relam1 = relform1->relam;
1518 relam2 = relform2->relam;
1519
1522 {
1523 /*
1524 * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
1525 * relpersistence
1526 */
1528
1529 swaptemp = relform1->relfilenode;
1530 relform1->relfilenode = relform2->relfilenode;
1531 relform2->relfilenode = swaptemp;
1532
1533 swaptemp = relform1->reltablespace;
1534 relform1->reltablespace = relform2->reltablespace;
1535 relform2->reltablespace = swaptemp;
1536
1537 swaptemp = relform1->relam;
1538 relform1->relam = relform2->relam;
1539 relform2->relam = swaptemp;
1540
1541 swptmpchr = relform1->relpersistence;
1542 relform1->relpersistence = relform2->relpersistence;
1543 relform2->relpersistence = swptmpchr;
1544
1545 /* Also swap toast links, if we're swapping by links */
1547 {
1548 swaptemp = relform1->reltoastrelid;
1549 relform1->reltoastrelid = relform2->reltoastrelid;
1550 relform2->reltoastrelid = swaptemp;
1551 }
1552 }
1553 else
1554 {
1555 /*
1556 * Mapped-relation case. Here we have to swap the relation mappings
1557 * instead of modifying the pg_class columns. Both must be mapped.
1558 */
1561 elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1562 NameStr(relform1->relname));
1563
1564 /*
1565 * We can't change the tablespace nor persistence of a mapped rel, and
1566 * we can't handle toast link swapping for one either, because we must
1567 * not apply any critical changes to its pg_class row. These cases
1568 * should be prevented by upstream permissions tests, so these checks
1569 * are non-user-facing emergency backstop.
1570 */
1571 if (relform1->reltablespace != relform2->reltablespace)
1572 elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1573 NameStr(relform1->relname));
1574 if (relform1->relpersistence != relform2->relpersistence)
1575 elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1576 NameStr(relform1->relname));
1577 if (relform1->relam != relform2->relam)
1578 elog(ERROR, "cannot change access method of mapped relation \"%s\"",
1579 NameStr(relform1->relname));
1580 if (!swap_toast_by_content &&
1581 (relform1->reltoastrelid || relform2->reltoastrelid))
1582 elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1583 NameStr(relform1->relname));
1584
1585 /*
1586 * Fetch the mappings --- shouldn't fail, but be paranoid
1587 */
1590 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1591 NameStr(relform1->relname), r1);
1594 elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1595 NameStr(relform2->relname), r2);
1596
1597 /*
1598 * Send replacement mappings to relmapper. Note these won't actually
1599 * take effect until CommandCounterIncrement.
1600 */
1601 RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
1602 RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
1603
1604 /* Pass OIDs of mapped r2 tables back to caller */
1605 *mapped_tables++ = r2;
1606 }
1607
1608 /*
1609 * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
1610 * subtransaction. The rel2 storage (swapped from rel1) may or may not be
1611 * new.
1612 */
1613 {
1614 Relation rel1,
1615 rel2;
1616
1619 rel2->rd_createSubid = rel1->rd_createSubid;
1620 rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
1621 rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
1625 }
1626
1627 /*
1628 * In the case of a shared catalog, these next few steps will only affect
1629 * our own database's pg_class row; but that's okay, because they are all
1630 * noncritical updates. That's also an important fact for the case of a
1631 * mapped catalog, because it's possible that we'll commit the map change
1632 * and then fail to commit the pg_class update.
1633 */
1634
1635 /* set rel1's frozen Xid and minimum MultiXid */
1636 if (relform1->relkind != RELKIND_INDEX)
1637 {
1640 relform1->relfrozenxid = frozenXid;
1641 relform1->relminmxid = cutoffMulti;
1642 }
1643
1644 /* swap size statistics too, since new rel has freshly-updated stats */
1645 {
1650
1651 swap_pages = relform1->relpages;
1652 relform1->relpages = relform2->relpages;
1653 relform2->relpages = swap_pages;
1654
1655 swap_tuples = relform1->reltuples;
1656 relform1->reltuples = relform2->reltuples;
1657 relform2->reltuples = swap_tuples;
1658
1659 swap_allvisible = relform1->relallvisible;
1660 relform1->relallvisible = relform2->relallvisible;
1661 relform2->relallvisible = swap_allvisible;
1662
1663 swap_allfrozen = relform1->relallfrozen;
1664 relform1->relallfrozen = relform2->relallfrozen;
1665 relform2->relallfrozen = swap_allfrozen;
1666 }
1667
1668 /*
1669 * Update the tuples in pg_class --- unless the target relation of the
1670 * swap is pg_class itself. In that case, there is zero point in making
1671 * changes because we'd be updating the old data that we're about to throw
1672 * away. Because the real work being done here for a mapped relation is
1673 * just to change the relation map settings, it's all right to not update
1674 * the pg_class rows in this case. The most important changes will instead
1675 * performed later, in finish_heap_swap() itself.
1676 */
1677 if (!target_is_pg_class)
1678 {
1680
1683 indstate);
1685 indstate);
1687 }
1688 else
1689 {
1690 /* no update ... but we do still need relcache inval */
1693 }
1694
1695 /*
1696 * Now that pg_class has been updated with its relevant information for
1697 * the swap, update the dependency of the relations to point to their new
1698 * table AM, if it has changed.
1699 */
1700 if (relam1 != relam2)
1701 {
1703 r1,
1705 relam1,
1706 relam2) != 1)
1707 elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
1709 get_rel_name(r1));
1711 r2,
1713 relam2,
1714 relam1) != 1)
1715 elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
1717 get_rel_name(r2));
1718 }
1719
1720 /*
1721 * Post alter hook for modified relations. The change to r2 is always
1722 * internal, but r1 depends on the invocation context.
1723 */
1725 InvalidOid, is_internal);
1727 InvalidOid, true);
1728
1729 /*
1730 * If we have toast tables associated with the relations being swapped,
1731 * deal with them too.
1732 */
1733 if (relform1->reltoastrelid || relform2->reltoastrelid)
1734 {
1736 {
1737 if (relform1->reltoastrelid && relform2->reltoastrelid)
1738 {
1739 /* Recursively swap the contents of the toast tables */
1740 swap_relation_files(relform1->reltoastrelid,
1741 relform2->reltoastrelid,
1744 is_internal,
1745 frozenXid,
1748 }
1749 else
1750 {
1751 /* caller messed up */
1752 elog(ERROR, "cannot swap toast files by content when there's only one");
1753 }
1754 }
1755 else
1756 {
1757 /*
1758 * We swapped the ownership links, so we need to change dependency
1759 * data to match.
1760 *
1761 * NOTE: it is possible that only one table has a toast table.
1762 *
1763 * NOTE: at present, a TOAST table's only dependency is the one on
1764 * its owning table. If more are ever created, we'd need to use
1765 * something more selective than deleteDependencyRecordsFor() to
1766 * get rid of just the link we want.
1767 */
1770 long count;
1771
1772 /*
1773 * We disallow this case for system catalogs, to avoid the
1774 * possibility that the catalog we're rebuilding is one of the
1775 * ones the dependency changes would change. It's too late to be
1776 * making any data changes to the target catalog.
1777 */
1779 elog(ERROR, "cannot swap toast files by links for system catalogs");
1780
1781 /* Delete old dependencies */
1782 if (relform1->reltoastrelid)
1783 {
1785 relform1->reltoastrelid,
1786 false);
1787 if (count != 1)
1788 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1789 count);
1790 }
1791 if (relform2->reltoastrelid)
1792 {
1794 relform2->reltoastrelid,
1795 false);
1796 if (count != 1)
1797 elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1798 count);
1799 }
1800
1801 /* Register new dependencies */
1803 baseobject.objectSubId = 0;
1805 toastobject.objectSubId = 0;
1806
1807 if (relform1->reltoastrelid)
1808 {
1809 baseobject.objectId = r1;
1810 toastobject.objectId = relform1->reltoastrelid;
1813 }
1814
1815 if (relform2->reltoastrelid)
1816 {
1817 baseobject.objectId = r2;
1818 toastobject.objectId = relform2->reltoastrelid;
1821 }
1822 }
1823 }
1824
1825 /*
1826 * If we're swapping two toast tables by content, do the same for their
1827 * valid index. The swap can actually be safely done only if the relations
1828 * have indexes.
1829 */
1831 relform1->relkind == RELKIND_TOASTVALUE &&
1832 relform2->relkind == RELKIND_TOASTVALUE)
1833 {
1836
1837 /* Get valid index for each relation */
1842
1847 is_internal,
1851 }
1852
1853 /* Clean up. */
1856
1858}
1859
1860/*
1861 * Remove the transient table that was built by make_new_heap, and finish
1862 * cleaning up (including rebuilding all indexes on the old heap).
1863 */
1864void
1866 bool is_system_catalog,
1868 bool check_constraints,
1869 bool is_internal,
1870 bool reindex,
1873 char newrelpersistence)
1874{
1875 ObjectAddress object;
1876 Oid mapped_tables[4];
1877 int i;
1878
1879 /* Report that we are now swapping relation files */
1882
1883 /* Zero out possible results from swapped_relation_files */
1884 memset(mapped_tables, 0, sizeof(mapped_tables));
1885
1886 /*
1887 * Swap the contents of the heap relations (including any toast tables).
1888 * Also set old heap's relfrozenxid to frozenXid.
1889 */
1892 swap_toast_by_content, is_internal,
1894
1895 /*
1896 * If it's a system catalog, queue a sinval message to flush all catcaches
1897 * on the catalog when we reach CommandCounterIncrement.
1898 */
1901
1902 if (reindex)
1903 {
1904 int reindex_flags;
1906
1907 /*
1908 * Rebuild each index on the relation (but not the toast table, which
1909 * is all-new at this point). It is important to do this before the
1910 * DROP step because if we are processing a system catalog that will
1911 * be used during DROP, we want to have its indexes available. There
1912 * is no advantage to the other order anyway because this is all
1913 * transactional, so no chance to reclaim disk space before commit. We
1914 * do not need a final CommandCounterIncrement() because
1915 * reindex_relation does it.
1916 *
1917 * Note: because index_build is called via reindex_relation, it will
1918 * never set indcheckxmin true for the indexes. This is OK even
1919 * though in some sense we are building new indexes rather than
1920 * rebuilding existing ones, because the new heap won't contain any
1921 * HOT chains at all, let alone broken ones, so it can't be necessary
1922 * to set indcheckxmin.
1923 */
1927
1928 /*
1929 * Ensure that the indexes have the same persistence as the parent
1930 * relation.
1931 */
1932 if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1934 else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1936
1937 /* Report that we are now reindexing relations */
1940
1942 }
1943
1944 /* Report that we are now doing clean up */
1947
1948 /*
1949 * If the relation being rebuilt is pg_class, swap_relation_files()
1950 * couldn't update pg_class's own pg_class entry (check comments in
1951 * swap_relation_files()), thus relfrozenxid was not updated. That's
1952 * annoying because a potential reason for doing a VACUUM FULL is a
1953 * imminent or actual anti-wraparound shutdown. So, now that we can
1954 * access the new relation using its indices, update relfrozenxid.
1955 * pg_class doesn't have a toast relation, so we don't need to update the
1956 * corresponding toast relation. Not that there's little point moving all
1957 * relfrozenxid updates here since swap_relation_files() needs to write to
1958 * pg_class for non-mapped relations anyway.
1959 */
1961 {
1965
1967
1970 elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1972
1973 relform->relfrozenxid = frozenXid;
1974 relform->relminmxid = cutoffMulti;
1975
1977
1979 }
1980
1981 /* Destroy new heap with old filenumber */
1982 object.classId = RelationRelationId;
1983 object.objectId = OIDNewHeap;
1984 object.objectSubId = 0;
1985
1986 if (!reindex)
1987 {
1988 /*
1989 * Make sure the changes in pg_class are visible. This is especially
1990 * important if !swap_toast_by_content, so that the correct TOAST
1991 * relation is dropped. (reindex_relation() above did not help in this
1992 * case))
1993 */
1995 }
1996
1997 /*
1998 * The new relation is local to our transaction and we know nothing
1999 * depends on it, so DROP_RESTRICT should be OK.
2000 */
2002
2003 /* performDeletion does CommandCounterIncrement at end */
2004
2005 /*
2006 * Now we must remove any relation mapping entries that we set up for the
2007 * transient table, as well as its toast table and toast index if any. If
2008 * we fail to do this before commit, the relmapper will complain about new
2009 * permanent map entries being added post-bootstrap.
2010 */
2011 for (i = 0; OidIsValid(mapped_tables[i]); i++)
2013
2014 /*
2015 * At this point, everything is kosher except that, if we did toast swap
2016 * by links, the toast table's name corresponds to the transient table.
2017 * The name is irrelevant to the backend because it's referenced by OID,
2018 * but users looking at the catalogs could be confused. Rename it to
2019 * prevent this problem.
2020 *
2021 * Note no lock required on the relation, because we already hold an
2022 * exclusive lock on it.
2023 */
2025 {
2027
2029 if (OidIsValid(newrel->rd_rel->reltoastrelid))
2030 {
2031 Oid toastidx;
2033
2034 /* Get the associated valid index to be renamed */
2035 toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
2037
2038 /* rename the toast table ... */
2039 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
2040 OIDOldHeap);
2041 RenameRelationInternal(newrel->rd_rel->reltoastrelid,
2042 NewToastName, true, false);
2043
2044 /* ... and its valid index too. */
2045 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
2046 OIDOldHeap);
2047
2049 NewToastName, true, true);
2050
2051 /*
2052 * Reset the relrewrite for the toast. The command-counter
2053 * increment is required here as we are about to update the tuple
2054 * that is updated as part of RenameRelationInternal.
2055 */
2057 ResetRelRewrite(newrel->rd_rel->reltoastrelid);
2058 }
2060 }
2061
2062 /* if it's not a catalog table, clear any missing attribute settings */
2063 if (!is_system_catalog)
2064 {
2066
2070 }
2071}
2072
2073/*
2074 * Determine which relations to process, when REPACK/CLUSTER is called
2075 * without specifying a table name. The exact process depends on whether
2076 * USING INDEX was given or not, and in any case we only return tables and
2077 * materialized views that the current user has privileges to repack/cluster.
2078 *
2079 * If USING INDEX was given, we scan pg_index to find those that have
2080 * indisclustered set; if it was not given, scan pg_class and return all
2081 * tables.
2082 *
2083 * Return it as a list of RelToCluster in the given memory context.
2084 */
2085static List *
2087{
2089 TableScanDesc scan;
2090 HeapTuple tuple;
2091 List *rtcs = NIL;
2092
2093 if (usingindex)
2094 {
2095 ScanKeyData entry;
2096
2097 /*
2098 * For USING INDEX, scan pg_index to find those with indisclustered.
2099 */
2101 ScanKeyInit(&entry,
2104 BoolGetDatum(true));
2105 scan = table_beginscan_catalog(catalog, 1, &entry);
2106 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2107 {
2111
2112 index = (Form_pg_index) GETSTRUCT(tuple);
2113
2114 /*
2115 * Try to obtain a light lock on the index's table, to ensure it
2116 * doesn't go away while we collect the list. If we cannot, just
2117 * disregard it. Be sure to release this if we ultimately decide
2118 * not to process the table!
2119 */
2121 continue;
2122
2123 /* Verify that the table still exists; skip if not */
2125 {
2127 continue;
2128 }
2129
2130 /* noisily skip rels which the user can't process */
2131 if (!repack_is_permitted_for_relation(cmd, index->indrelid,
2132 GetUserId()))
2133 {
2135 continue;
2136 }
2137
2138 /* Use a permanent memory context for the result list */
2141 rtc->tableOid = index->indrelid;
2142 rtc->indexOid = index->indexrelid;
2143 rtcs = lappend(rtcs, rtc);
2145 }
2146 }
2147 else
2148 {
2151
2152 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2153 {
2155 Form_pg_class class;
2157
2158 class = (Form_pg_class) GETSTRUCT(tuple);
2159
2160 /*
2161 * Try to obtain a light lock on the table, to ensure it doesn't
2162 * go away while we collect the list. If we cannot, just
2163 * disregard the table. Be sure to release this if we ultimately
2164 * decide not to process the table!
2165 */
2167 continue;
2168
2169 /* Verify that the table still exists */
2171 {
2173 continue;
2174 }
2175
2176 /* Can only process plain tables and matviews */
2177 if (class->relkind != RELKIND_RELATION &&
2178 class->relkind != RELKIND_MATVIEW)
2179 {
2181 continue;
2182 }
2183
2184 /* noisily skip rels which the user can't process */
2186 GetUserId()))
2187 {
2189 continue;
2190 }
2191
2192 /* Use a permanent memory context for the result list */
2195 rtc->tableOid = class->oid;
2196 rtc->indexOid = InvalidOid;
2197 rtcs = lappend(rtcs, rtc);
2199 }
2200 }
2201
2202 table_endscan(scan);
2204
2205 return rtcs;
2206}
2207
2208/*
2209 * Given a partitioned table or its index, return a list of RelToCluster for
2210 * all the leaf child tables/indexes.
2211 *
2212 * 'rel_is_index' tells whether 'relid' is that of an index (true) or of the
2213 * owning relation.
2214 */
2215static List *
2218{
2219 List *inhoids;
2220 List *rtcs = NIL;
2221
2222 /*
2223 * Do not lock the children until they're processed. Note that we do hold
2224 * a lock on the parent partitioned table.
2225 */
2228 {
2229 Oid table_oid,
2230 index_oid;
2233
2234 if (rel_is_index)
2235 {
2236 /* consider only leaf indexes */
2238 continue;
2239
2242 }
2243 else
2244 {
2245 /* consider only leaf relations */
2247 continue;
2248
2251 }
2252
2253 /*
2254 * It's possible that the user does not have privileges to CLUSTER the
2255 * leaf partition despite having them on the partitioned table. Skip
2256 * if so.
2257 */
2259 continue;
2260
2261 /* Use a permanent memory context for the result list */
2264 rtc->tableOid = table_oid;
2265 rtc->indexOid = index_oid;
2266 rtcs = lappend(rtcs, rtc);
2268 }
2269
2270 return rtcs;
2271}
2272
2273
2274/*
2275 * Return whether userid has privileges to REPACK relid. If not, this
2276 * function emits a WARNING.
2277 */
2278static bool
2280{
2282
2283 if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK)
2284 return true;
2285
2287 errmsg("permission denied to execute %s on \"%s\", skipping it",
2289 get_rel_name(relid)));
2290
2291 return false;
2292}
2293
2294
2295/*
2296 * Given a RepackStmt with an indicated relation name, resolve the relation
2297 * name, obtain lock on it, then determine what to do based on the relation
2298 * type: if it's table and not partitioned, repack it as indicated (using an
2299 * existing clustered index, or following the given one), and return NULL.
2300 *
2301 * On the other hand, if the table is partitioned, do nothing further and
2302 * instead return the opened and locked relcache entry, so that caller can
2303 * process the partitions using the multiple-table handling code. In this
2304 * case, if an index name is given, it's up to the caller to resolve it.
2305 */
2306static Relation
2308 ClusterParams *params)
2309{
2310 Relation rel;
2311 Oid tableOid;
2312
2313 Assert(stmt->relation != NULL);
2314 Assert(stmt->command == REPACK_COMMAND_CLUSTER ||
2315 stmt->command == REPACK_COMMAND_REPACK);
2316
2317 /*
2318 * Make sure ANALYZE is specified if a column list is present.
2319 */
2320 if ((params->options & CLUOPT_ANALYZE) == 0 && stmt->relation->va_cols != NIL)
2321 ereport(ERROR,
2323 errmsg("ANALYZE option must be specified when a column list is provided"));
2324
2325 /* Find, lock, and check permissions on the table. */
2326 tableOid = RangeVarGetRelidExtended(stmt->relation->relation,
2327 lockmode,
2328 0,
2330 NULL);
2331 rel = table_open(tableOid, NoLock);
2332
2333 /*
2334 * Reject clustering a remote temp table ... their local buffer manager is
2335 * not going to cope.
2336 */
2337 if (RELATION_IS_OTHER_TEMP(rel))
2338 ereport(ERROR,
2340 /*- translator: first %s is name of a SQL command, eg. REPACK */
2341 errmsg("cannot execute %s on temporary tables of other sessions",
2342 RepackCommandAsString(stmt->command)));
2343
2344 /*
2345 * For partitioned tables, let caller handle this. Otherwise, process it
2346 * here and we're done.
2347 */
2348 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2349 return rel;
2350 else
2351 {
2352 Oid indexOid = InvalidOid;
2353
2354 indexOid = determine_clustered_index(rel, stmt->usingindex,
2355 stmt->indexname);
2356 if (OidIsValid(indexOid))
2357 check_index_is_clusterable(rel, indexOid, lockmode);
2358
2359 cluster_rel(stmt->command, rel, indexOid, params, isTopLevel);
2360
2361 /*
2362 * Do an analyze, if requested. We close the transaction and start a
2363 * new one, so that we don't hold the stronger lock for longer than
2364 * needed.
2365 */
2366 if (params->options & CLUOPT_ANALYZE)
2367 {
2369
2372
2375
2376 vac_params.options |= VACOPT_ANALYZE;
2377 if (params->options & CLUOPT_VERBOSE)
2378 vac_params.options |= VACOPT_VERBOSE;
2379 analyze_rel(tableOid, NULL, &vac_params,
2380 stmt->relation->va_cols, true, NULL);
2383 }
2384
2385 return NULL;
2386 }
2387}
2388
2389/*
2390 * Given a relation and the usingindex/indexname options in a
2391 * REPACK USING INDEX or CLUSTER command, return the OID of the
2392 * index to use for clustering the table.
2393 *
2394 * Caller must hold lock on the relation so that the set of indexes
2395 * doesn't change, and must call check_index_is_clusterable.
2396 */
2397static Oid
2398determine_clustered_index(Relation rel, bool usingindex, const char *indexname)
2399{
2400 Oid indexOid;
2401
2402 if (indexname == NULL && usingindex)
2403 {
2404 /*
2405 * If USING INDEX with no name is given, find a clustered index, or
2406 * error out if none.
2407 */
2408 indexOid = InvalidOid;
2410 {
2412 {
2413 indexOid = idxoid;
2414 break;
2415 }
2416 }
2417
2418 if (!OidIsValid(indexOid))
2419 ereport(ERROR,
2421 errmsg("there is no previously clustered index for table \"%s\"",
2423 }
2424 else if (indexname != NULL)
2425 {
2426 /* An index was specified; obtain its OID. */
2427 indexOid = get_relname_relid(indexname, rel->rd_rel->relnamespace);
2428 if (!OidIsValid(indexOid))
2429 ereport(ERROR,
2431 errmsg("index \"%s\" for table \"%s\" does not exist",
2432 indexname, RelationGetRelationName(rel)));
2433 }
2434 else
2435 indexOid = InvalidOid;
2436
2437 return indexOid;
2438}
2439
2440static const char *
2442{
2443 switch (cmd)
2444 {
2446 return "REPACK";
2448 return "VACUUM";
2450 return "CLUSTER";
2451 }
2452 return "???"; /* keep compiler quiet */
2453}
2454
2455/*
2456 * Apply all the changes stored in 'file'.
2457 */
2458static void
2460{
2461 ConcurrentChangeKind kind = '\0';
2462 Relation rel = chgcxt->cc_rel;
2466 bool have_old_tuple = false;
2468
2470 &TTSOpsVirtual);
2474 &TTSOpsVirtual);
2475
2477
2478 while (true)
2479 {
2480 size_t nread;
2482
2484
2485 nread = BufFileReadMaybeEOF(file, &kind, 1, true);
2486 if (nread == 0) /* done with the file? */
2487 break;
2488
2489 /*
2490 * If this is the old tuple for an update, read it into the tuple slot
2491 * and go to the next one. The update itself will be executed on the
2492 * next iteration, when we receive the NEW tuple.
2493 */
2494 if (kind == CHANGE_UPDATE_OLD)
2495 {
2496 restore_tuple(file, rel, old_update_tuple);
2497 have_old_tuple = true;
2498 continue;
2499 }
2500
2501 /*
2502 * Just before an UPDATE or DELETE, we must update the command
2503 * counter, because the change could refer to a tuple that we have
2504 * just inserted; and before an INSERT, we have to do this also if the
2505 * previous command was either update or delete.
2506 *
2507 * With this approach we don't spend so many CCIs for long strings of
2508 * only INSERTs, which can't affect one another.
2509 */
2510 if (kind == CHANGE_UPDATE_NEW || kind == CHANGE_DELETE ||
2511 (kind == CHANGE_INSERT && (prevkind == CHANGE_UPDATE_NEW ||
2513 {
2516 }
2517
2518 /*
2519 * Now restore the tuple into the slot and execute the change.
2520 */
2521 restore_tuple(file, rel, spilled_tuple);
2522
2523 if (kind == CHANGE_INSERT)
2524 {
2526 }
2527 else if (kind == CHANGE_DELETE)
2528 {
2529 bool found;
2530
2531 /* Find the tuple to be deleted */
2533 if (!found)
2534 elog(ERROR, "failed to find target tuple");
2536 }
2537 else if (kind == CHANGE_UPDATE_NEW)
2538 {
2539 TupleTableSlot *key;
2540 bool found;
2541
2542 if (have_old_tuple)
2543 key = old_update_tuple;
2544 else
2545 key = spilled_tuple;
2546
2547 /* Find the tuple to be updated or deleted. */
2548 found = find_target_tuple(rel, chgcxt, key, ondisk_tuple);
2549 if (!found)
2550 elog(ERROR, "failed to find target tuple");
2551
2552 /*
2553 * If 'tup' contains TOAST pointers, they point to the old
2554 * relation's toast. Copy the corresponding TOAST pointers for the
2555 * new relation from the existing tuple. (The fact that we
2556 * received a TOAST pointer here implies that the attribute hasn't
2557 * changed.)
2558 */
2560
2562
2564 have_old_tuple = false;
2565 }
2566 else
2567 elog(ERROR, "unrecognized kind of change: %d", kind);
2568
2569 ResetPerTupleExprContext(chgcxt->cc_estate);
2570 }
2571
2572 /* Cleanup. */
2576
2578}
2579
2580/*
2581 * Apply an insert from the spill of concurrent changes to the new copy of the
2582 * table.
2583 */
2584static void
2587{
2588 /* Put the tuple in the table, but make sure it won't be decoded */
2589 table_tuple_insert(rel, slot, GetCurrentCommandId(true),
2591
2592 /* Update indexes with this new tuple. */
2594 chgcxt->cc_estate,
2595 0,
2596 slot,
2597 NIL, NULL);
2599}
2600
2601/*
2602 * Apply an update from the spill of concurrent changes to the new copy of the
2603 * table.
2604 */
2605static void
2609{
2610 LockTupleMode lockmode;
2611 TM_FailureData tmfd;
2613 TM_Result res;
2614
2615 /*
2616 * Carry out the update, skipping logical decoding for it.
2617 */
2618 res = table_tuple_update(rel, &(ondisk_tuple->tts_tid), spilled_tuple,
2619 GetCurrentCommandId(true),
2623 false,
2624 &tmfd, &lockmode, &update_indexes);
2625 if (res != TM_Ok)
2626 ereport(ERROR,
2627 errmsg("failed to apply concurrent UPDATE"));
2628
2629 if (update_indexes != TU_None)
2630 {
2631 uint32 flags = EIIT_IS_UPDATE;
2632
2634 flags |= EIIT_ONLY_SUMMARIZING;
2636 chgcxt->cc_estate,
2637 flags,
2639 NIL, NULL);
2640 }
2641
2643}
2644
2645static void
2647{
2648 TM_Result res;
2649 TM_FailureData tmfd;
2650
2651 /*
2652 * Delete tuple from the new heap, skipping logical decoding for it.
2653 */
2654 res = table_tuple_delete(rel, &(slot->tts_tid),
2655 GetCurrentCommandId(true),
2658 false,
2659 &tmfd);
2660
2661 if (res != TM_Ok)
2662 ereport(ERROR,
2663 errmsg("failed to apply concurrent DELETE"));
2664
2666}
2667
2668/*
2669 * Read tuple from file and put it in the input slot. All memory is allocated
2670 * in the current memory context; caller is responsible for freeing it as
2671 * appropriate.
2672 *
2673 * External attributes are stored in separate memory chunks, in order to avoid
2674 * exceeding MaxAllocSize - that could happen if the individual attributes are
2675 * smaller than MaxAllocSize but the whole tuple is bigger.
2676 */
2677static void
2679{
2680 uint32 t_len;
2681 HeapTuple tup;
2682 int natt_ext;
2683
2684 /* Read the tuple. */
2685 BufFileReadExact(file, &t_len, sizeof(t_len));
2686 tup = (HeapTuple) palloc(HEAPTUPLESIZE + t_len);
2687 tup->t_data = (HeapTupleHeader) ((char *) tup + HEAPTUPLESIZE);
2688 BufFileReadExact(file, tup->t_data, t_len);
2689 tup->t_len = t_len;
2690 ItemPointerSetInvalid(&tup->t_self);
2691 tup->t_tableOid = RelationGetRelid(relation);
2692
2693 /*
2694 * Put the tuple we read in a slot. This deforms it, so that we can hack
2695 * the external attributes in place.
2696 */
2697 ExecForceStoreHeapTuple(tup, slot, false);
2698
2699 /*
2700 * Next, read any attributes we stored separately into the tts_values
2701 * array elements expecting them, if any. This matches
2702 * repack_store_change.
2703 */
2704 BufFileReadExact(file, &natt_ext, sizeof(natt_ext));
2705 if (natt_ext > 0)
2706 {
2707 TupleDesc desc = slot->tts_tupleDescriptor;
2708
2709 for (int i = 0; i < desc->natts; i++)
2710 {
2712 varlena *varlen;
2713 union
2714 {
2715 alignas(int32) varlena hdr;
2716 char data[sizeof(void *)];
2717 } chunk_header;
2718 void *value;
2719 Size varlensz;
2720
2721 if (attr->attisdropped || attr->attlen != -1)
2722 continue;
2723 if (slot_attisnull(slot, i + 1))
2724 continue;
2727 continue;
2728 slot_getsomeattrs(slot, i + 1);
2729
2732
2735 BufFileReadExact(file, (char *) value + VARHDRSZ, varlensz - VARHDRSZ);
2736
2738 natt_ext--;
2739 if (natt_ext < 0)
2740 ereport(ERROR,
2742 errmsg("insufficient number of attributes stored separately"));
2743 }
2744 }
2745}
2746
2747/*
2748 * Adjust 'dest' replacing any EXTERNAL_ONDISK toast pointers with the
2749 * corresponding ones from 'src'.
2750 */
2751static void
2753{
2754 TupleDesc desc = dest->tts_tupleDescriptor;
2755
2756 for (int i = 0; i < desc->natts; i++)
2757 {
2760
2761 if (attr->attisdropped)
2762 continue;
2763 if (attr->attlen != -1)
2764 continue;
2765 if (slot_attisnull(dest, i + 1))
2766 continue;
2767
2768 slot_getsomeattrs(dest, i + 1);
2769
2770 varlena_dst = (varlena *) DatumGetPointer(dest->tts_values[i]);
2772 continue;
2773 slot_getsomeattrs(src, i + 1);
2774
2775 dest->tts_values[i] = src->tts_values[i];
2776 }
2777}
2778
2779/*
2780 * Find the tuple to be updated or deleted by the given data change, whose
2781 * tuple has already been loaded into locator.
2782 *
2783 * If the tuple is found, put it in retrieved and return true. If the tuple is
2784 * not found, return false.
2785 */
2786static bool
2789{
2790 Form_pg_index idx = chgcxt->cc_ident_index->rd_index;
2791 IndexScanDesc scan;
2792 bool retval;
2793
2794 /*
2795 * Scan key is passed by caller, so it does not have to be constructed
2796 * multiple times. Key entries have all fields initialized, except for
2797 * sk_argument.
2798 *
2799 * Use the incoming tuple to finalize the scan key.
2800 */
2801 for (int i = 0; i < chgcxt->cc_ident_key_nentries; i++)
2802 {
2803 ScanKey entry = &chgcxt->cc_ident_key[i];
2804 AttrNumber attno = idx->indkey.values[i];
2805
2806 entry->sk_argument = locator->tts_values[attno - 1];
2807 Assert(!locator->tts_isnull[attno - 1]);
2808 }
2809
2810 /* XXX no instrumentation for now */
2811 scan = index_beginscan(rel, chgcxt->cc_ident_index, GetActiveSnapshot(),
2812 NULL, chgcxt->cc_ident_key_nentries, 0, 0);
2813 index_rescan(scan, chgcxt->cc_ident_key, chgcxt->cc_ident_key_nentries, NULL, 0);
2815 index_endscan(scan);
2816
2817 return retval;
2818}
2819
2820/*
2821 * Decode and apply concurrent changes, up to (and including) the record whose
2822 * LSN is 'end_of_wal'.
2823 *
2824 * XXX the names "process_concurrent_changes" and "apply_concurrent_changes"
2825 * are far too similar to each other.
2826 */
2827static void
2829{
2830 DecodingWorkerShared *shared;
2831 char fname[MAXPGPATH];
2832 BufFile *file;
2833
2836
2837 /* Ask the worker for the file. */
2839 SpinLockAcquire(&shared->mutex);
2840 shared->lsn_upto = end_of_wal;
2841 shared->done = done;
2842 SpinLockRelease(&shared->mutex);
2843
2844 /*
2845 * The worker needs to finish processing of the current WAL record. Even
2846 * if it's idle, it'll need to close the output file. Thus we're likely to
2847 * wait, so prepare for sleep.
2848 */
2850 for (;;)
2851 {
2852 int last_exported;
2853
2854 SpinLockAcquire(&shared->mutex);
2855 last_exported = shared->last_exported;
2856 SpinLockRelease(&shared->mutex);
2857
2858 /*
2859 * Has the worker exported the file we are waiting for?
2860 */
2861 if (last_exported == chgcxt->cc_file_seq)
2862 break;
2863
2865 }
2867
2868 /* Open the file. */
2869 DecodingWorkerFileName(fname, shared->relid, chgcxt->cc_file_seq);
2870 file = BufFileOpenFileSet(&shared->sfs.fs, fname, O_RDONLY, false);
2872
2873 BufFileClose(file);
2874
2875 /* Get ready for the next file. */
2876 chgcxt->cc_file_seq++;
2877}
2878
2879/*
2880 * Initialize the ChangeContext struct for the given relation, with
2881 * the given index as identity index.
2882 */
2883static void
2885 Relation relation, Oid ident_index_id)
2886{
2887 chgcxt->cc_rel = relation;
2888
2889 /* Only initialize fields needed by ExecInsertIndexTuples(). */
2890 chgcxt->cc_estate = CreateExecutorState();
2891
2892 chgcxt->cc_rri = (ResultRelInfo *) palloc(sizeof(ResultRelInfo));
2893 InitResultRelInfo(chgcxt->cc_rri, relation, 0, 0, 0);
2894 ExecOpenIndices(chgcxt->cc_rri, false);
2895
2896 /*
2897 * The table's relcache entry already has the relcache entry for the
2898 * identity index; find that.
2899 */
2900 chgcxt->cc_ident_index = NULL;
2901 for (int i = 0; i < chgcxt->cc_rri->ri_NumIndices; i++)
2902 {
2904
2905 ind_rel = chgcxt->cc_rri->ri_IndexRelationDescs[i];
2906 if (ind_rel->rd_id == ident_index_id)
2907 {
2908 chgcxt->cc_ident_index = ind_rel;
2909 break;
2910 }
2911 }
2912 if (chgcxt->cc_ident_index == NULL)
2913 elog(ERROR, "failed to find identity index");
2914
2915 /* Set up for scanning said identity index */
2916 {
2918
2919 indexForm = chgcxt->cc_ident_index->rd_index;
2920 chgcxt->cc_ident_key_nentries = indexForm->indnkeyatts;
2921 chgcxt->cc_ident_key = (ScanKey) palloc_array(ScanKeyData, indexForm->indnkeyatts);
2922 for (int i = 0; i < indexForm->indnkeyatts; i++)
2923 {
2924 ScanKey entry;
2925 Oid opfamily,
2926 opcintype,
2927 opno,
2928 opcode;
2929
2930 entry = &chgcxt->cc_ident_key[i];
2931
2932 opfamily = chgcxt->cc_ident_index->rd_opfamily[i];
2933 opcintype = chgcxt->cc_ident_index->rd_opcintype[i];
2934 opno = get_opfamily_member(opfamily, opcintype, opcintype,
2936 if (!OidIsValid(opno))
2937 elog(ERROR, "failed to find = operator for type %u", opcintype);
2938 opcode = get_opcode(opno);
2939 if (!OidIsValid(opcode))
2940 elog(ERROR, "failed to find = operator for operator %u", opno);
2941
2942 /* Initialize everything but argument. */
2943 ScanKeyInit(entry,
2944 i + 1,
2945 BTEqualStrategyNumber, opcode,
2946 (Datum) NULL);
2947 entry->sk_collation = chgcxt->cc_ident_index->rd_indcollation[i];
2948 }
2949 }
2950
2951 chgcxt->cc_file_seq = WORKER_FILE_SNAPSHOT + 1;
2952}
2953
2954/*
2955 * Free up resources taken by a ChangeContext.
2956 */
2957static void
2959{
2960 ExecCloseIndices(chgcxt->cc_rri);
2961 FreeExecutorState(chgcxt->cc_estate);
2962 /* XXX are these pfrees necessary? */
2963 pfree(chgcxt->cc_rri);
2964 pfree(chgcxt->cc_ident_key);
2965}
2966
2967/*
2968 * The final steps of rebuild_relation() for concurrent processing.
2969 *
2970 * On entry, NewHeap is locked in AccessExclusiveLock mode. OldHeap and its
2971 * clustering index (if one is passed) are still locked in a mode that allows
2972 * concurrent data changes. On exit, both tables and their indexes are closed,
2973 * but locked in AccessExclusiveLock mode.
2974 */
2975static void
2979{
2984 ListCell *lc,
2985 *lc2;
2986 char relpersistence;
2987 bool is_system_catalog;
2989 XLogRecPtr end_of_wal;
2990 List *indexrels;
2992
2995
2996 /*
2997 * Unlike the exclusive case, we build new indexes for the new relation
2998 * rather than swapping the storage and reindexing the old relation. The
2999 * point is that the index build can take some time, so we do it before we
3000 * get AccessExclusiveLock on the old heap and therefore we cannot swap
3001 * the heap storage yet.
3002 *
3003 * index_create() will lock the new indexes using AccessExclusiveLock - no
3004 * need to change that. At the same time, we use ShareUpdateExclusiveLock
3005 * to lock the existing indexes - that should be enough to prevent others
3006 * from changing them while we're repacking the relation. The lock on
3007 * table should prevent others from changing the index column list, but
3008 * might not be enough for commands like ALTER INDEX ... SET ... (Those
3009 * are not necessarily dangerous, but can make user confused if the
3010 * changes they do get lost due to REPACK.)
3011 */
3013
3014 /*
3015 * The identity index in the new relation appears in the same relative
3016 * position as the corresponding index in the old relation. Find it.
3017 */
3020 {
3021 if (identIdx == ind_old)
3022 {
3023 int pos = foreach_current_index(ind_old);
3024
3025 if (unlikely(list_length(ind_oids_new) < pos))
3026 elog(ERROR, "list of new indexes too short");
3028 break;
3029 }
3030 }
3032 elog(ERROR, "could not find index matching \"%s\" at the new relation",
3034
3035 /* Gather information to apply concurrent changes. */
3037
3038 /*
3039 * During testing, wait for another backend to perform concurrent data
3040 * changes which we will process below.
3041 */
3042 INJECTION_POINT("repack-concurrently-before-lock", NULL);
3043
3044 /*
3045 * Flush all WAL records inserted so far (possibly except for the last
3046 * incomplete page; see GetInsertRecPtr), to minimize the amount of data
3047 * we need to flush while holding exclusive lock on the source table.
3048 */
3050 end_of_wal = GetFlushRecPtr(NULL);
3051
3052 /*
3053 * Apply concurrent changes first time, to minimize the time we need to
3054 * hold AccessExclusiveLock. (Quite some amount of WAL could have been
3055 * written during the data copying and index creation.)
3056 */
3057 process_concurrent_changes(end_of_wal, &chgcxt, false);
3058
3059 /*
3060 * Acquire AccessExclusiveLock on the table, its TOAST relation (if there
3061 * is one), all its indexes, so that we can swap the files.
3062 */
3064
3065 /*
3066 * Lock all indexes now, not only the clustering one: all indexes need to
3067 * have their files swapped. While doing that, store their relation
3068 * references in a zero-terminated array, to handle predicate locks below.
3069 */
3070 indexrels = NIL;
3072 {
3074
3076
3077 /*
3078 * Some things about the index may have changed before we locked the
3079 * index, such as ALTER INDEX RENAME. We don't need to do anything
3080 * here to absorb those changes in the new index.
3081 */
3083 }
3084
3085 /*
3086 * Lock the OldHeap's TOAST relation exclusively - again, the lock is
3087 * needed to swap the files.
3088 */
3089 if (OidIsValid(OldHeap->rd_rel->reltoastrelid))
3090 LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
3091
3092 /*
3093 * Tuples and pages of the old heap will be gone, but the heap will stay.
3094 */
3097 {
3100 }
3102
3103 /*
3104 * Flush WAL again, to make sure that all changes committed while we were
3105 * waiting for the exclusive lock are available for decoding.
3106 */
3108 end_of_wal = GetFlushRecPtr(NULL);
3109
3110 /*
3111 * Apply the concurrent changes again. Indicate that the decoding worker
3112 * won't be needed anymore.
3113 */
3114 process_concurrent_changes(end_of_wal, &chgcxt, true);
3115
3116 /* Remember info about rel before closing OldHeap */
3117 relpersistence = OldHeap->rd_rel->relpersistence;
3119
3122
3123 /*
3124 * Even ShareUpdateExclusiveLock should have prevented others from
3125 * creating / dropping indexes (even using the CONCURRENTLY option), so we
3126 * do not need to check whether the lists match.
3127 */
3129 {
3132 Oid mapped_tables[4] = {0};
3133
3136 false, /* swap_toast_by_content */
3137 true,
3141
3142#ifdef USE_ASSERT_CHECKING
3143
3144 /*
3145 * Concurrent processing is not supported for system relations, so
3146 * there should be no mapped tables.
3147 */
3148 for (int i = 0; i < 4; i++)
3150#endif
3151 }
3152
3153 /* The new indexes must be visible for deletion. */
3155
3156 /* Close the old heap but keep lock until transaction commit. */
3158 /* Close the new heap. (We didn't have to open its indexes). */
3160
3161 /* Cleanup what we don't need anymore. (And close the identity index.) */
3163
3164 /*
3165 * Swap the relations and their TOAST relations and TOAST indexes. This
3166 * also drops the new relation and its indexes.
3167 *
3168 * (System catalogs are currently not supported.)
3169 */
3173 false, /* swap_toast_by_content */
3174 false,
3175 true,
3176 false, /* reindex */
3178 relpersistence);
3179}
3180
3181/*
3182 * Build indexes on NewHeap according to those on OldHeap.
3183 *
3184 * OldIndexes is the list of index OIDs on OldHeap. The contained indexes end
3185 * up locked using ShareUpdateExclusiveLock.
3186 *
3187 * A list of OIDs of the corresponding indexes created on NewHeap is
3188 * returned. The order of items does match, so we can use these arrays to swap
3189 * index storage.
3190 */
3191static List *
3223
3224/*
3225 * Create a transient copy of a constraint -- supported by a transient
3226 * copy of the index that supports the original constraint.
3227 *
3228 * When repacking a table that contains exclusion constraints, the executor
3229 * relies on these constraints being properly catalogued. These copies are
3230 * to support that.
3231 *
3232 * We don't need the constraints for anything else (the original constraints
3233 * will be there once repack completes), so we add pg_depend entries so that
3234 * the are dropped when the transient table is dropped.
3235 */
3236static void
3238{
3240 Relation rel;
3241 TupleDesc desc;
3242 SysScanDesc scan;
3243 HeapTuple tup;
3245
3248
3249 /*
3250 * Retrieve the constraints supported by the old index and create an
3251 * identical one that points to the new index.
3252 */
3256 ObjectIdGetDatum(old_index->rd_index->indrelid));
3258 NULL, 1, &skey);
3259 desc = RelationGetDescr(rel);
3260 while (HeapTupleIsValid(tup = systable_getnext(scan)))
3261 {
3263 Oid oid;
3265 bool nulls[Natts_pg_constraint] = {0};
3266 bool replaces[Natts_pg_constraint] = {0};
3269
3270 if (conform->conindid != RelationGetRelid(old_index))
3271 continue;
3272
3276 replaces[Anum_pg_constraint_oid - 1] = true;
3281
3282 new_tup = heap_modify_tuple(tup, desc, values, nulls, replaces);
3283
3284 /* Insert it into the catalog. */
3286
3287 /* Create a dependency so it's removed when we drop the new heap. */
3290 }
3291 systable_endscan(scan);
3292
3294
3296}
3297
3298/*
3299 * Try to start a background worker to perform logical decoding of data
3300 * changes applied to relation while REPACK CONCURRENTLY is copying its
3301 * contents to a new table.
3302 */
3303static void
3305{
3306 Size size;
3307 dsm_segment *seg;
3308 DecodingWorkerShared *shared;
3309 shm_mq *mq;
3312
3313 /* Setup shared memory. */
3314 size = BUFFERALIGN(offsetof(DecodingWorkerShared, error_queue)) +
3316 seg = dsm_create(size, 0);
3317 shared = (DecodingWorkerShared *) dsm_segment_address(seg);
3318 shared->lsn_upto = InvalidXLogRecPtr;
3319 shared->done = false;
3320 SharedFileSetInit(&shared->sfs, seg);
3321 shared->last_exported = -1;
3322 SpinLockInit(&shared->mutex);
3323 shared->dbid = MyDatabaseId;
3324
3325 /*
3326 * This is the UserId set in cluster_rel(). Security context shouldn't be
3327 * needed for decoding worker.
3328 */
3329 shared->roleid = GetUserId();
3330 shared->relid = relid;
3331 ConditionVariableInit(&shared->cv);
3332 shared->backend_proc = MyProc;
3333 shared->backend_pid = MyProcPid;
3335
3336 mq = shm_mq_create((char *) BUFFERALIGN(shared->error_queue),
3339 mqh = shm_mq_attach(mq, seg, NULL);
3340
3341 memset(&bgw, 0, sizeof(bgw));
3342 snprintf(bgw.bgw_name, BGW_MAXLEN,
3343 "REPACK decoding worker for relation \"%s\"",
3344 get_rel_name(relid));
3345 snprintf(bgw.bgw_type, BGW_MAXLEN, "REPACK decoding worker");
3346 bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
3348 bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
3349 bgw.bgw_restart_time = BGW_NEVER_RESTART;
3350 snprintf(bgw.bgw_library_name, MAXPGPATH, "postgres");
3351 snprintf(bgw.bgw_function_name, BGW_MAXLEN, "RepackWorkerMain");
3352 bgw.bgw_main_arg = UInt32GetDatum(dsm_segment_handle(seg));
3353 bgw.bgw_notify_pid = MyProcPid;
3354
3357 ereport(ERROR,
3359 errmsg("out of background worker slots"),
3360 errhint("You might need to increase \"%s\".", "max_worker_processes"));
3361
3362 decoding_worker->seg = seg;
3364
3365 /*
3366 * The decoding setup must be done before the caller can have XID assigned
3367 * for any reason, otherwise the worker might end up in a deadlock,
3368 * waiting for the caller's transaction to end. Therefore wait here until
3369 * the worker indicates that it has the logical decoding initialized.
3370 */
3372 for (;;)
3373 {
3374 bool initialized;
3375
3376 SpinLockAcquire(&shared->mutex);
3377 initialized = shared->initialized;
3378 SpinLockRelease(&shared->mutex);
3379
3380 if (initialized)
3381 break;
3382
3384 }
3386}
3387
3388/*
3389 * Stop the decoding worker and cleanup the related resources.
3390 *
3391 * The worker stops on its own when it knows there is no more work to do, but
3392 * we need to stop it explicitly at least on ERROR in the launching backend.
3393 */
3394static void
3396{
3397 BgwHandleStatus status;
3398
3399 /* Haven't reached the worker startup? */
3400 if (decoding_worker == NULL)
3401 return;
3402
3403 /* Could not register the worker? */
3404 if (decoding_worker->handle == NULL)
3405 return;
3406
3408 /* The worker should really exit before the REPACK command does. */
3412
3413 if (status == BGWH_POSTMASTER_DIED)
3414 ereport(FATAL,
3416 errmsg("postmaster exited during REPACK command"));
3417
3419
3420 /*
3421 * If we could not cancel the current sleep due to ERROR, do that before
3422 * we detach from the shared memory the condition variable is located in.
3423 * If we did not, the bgworker ERROR handling code would try and fail
3424 * badly.
3425 */
3427
3431}
3432
3433/*
3434 * Get the initial snapshot from the decoding worker.
3435 */
3436static Snapshot
3438{
3439 DecodingWorkerShared *shared;
3440 char fname[MAXPGPATH];
3441 BufFile *file;
3443 char *snap_space;
3444 Snapshot snapshot;
3445
3446 shared = (DecodingWorkerShared *) dsm_segment_address(worker->seg);
3447
3448 /*
3449 * The worker needs to initialize the logical decoding, which usually
3450 * takes some time. Therefore it makes sense to prepare for the sleep
3451 * first.
3452 */
3454 for (;;)
3455 {
3456 int last_exported;
3457
3458 SpinLockAcquire(&shared->mutex);
3459 last_exported = shared->last_exported;
3460 SpinLockRelease(&shared->mutex);
3461
3462 /*
3463 * Has the worker exported the file we are waiting for?
3464 */
3465 if (last_exported == WORKER_FILE_SNAPSHOT)
3466 break;
3467
3469 }
3471
3472 /* Read the snapshot from a file. */
3474 file = BufFileOpenFileSet(&shared->sfs.fs, fname, O_RDONLY, false);
3475 BufFileReadExact(file, &snap_size, sizeof(snap_size));
3476 snap_space = (char *) palloc(snap_size);
3478 BufFileClose(file);
3479
3480 /* Restore it. */
3481 snapshot = RestoreSnapshot(snap_space);
3483
3484 return snapshot;
3485}
3486
3487/*
3488 * Generate worker's file name into 'fname', which must be of size MAXPGPATH.
3489 * If relations of the same 'relid' happen to be processed at the same time,
3490 * they must be from different databases and therefore different backends must
3491 * be involved.
3492 */
3493void
3495{
3496 /* The PID is already present in the fileset name, so we needn't add it */
3497 snprintf(fname, MAXPGPATH, "%u-%u", relid, seq);
3498}
3499
3500/*
3501 * Handle receipt of an interrupt indicating a repack worker message.
3502 *
3503 * Note: this is called within a signal handler! All we can do is set
3504 * a flag that will cause the next CHECK_FOR_INTERRUPTS() to invoke
3505 * ProcessRepackMessages().
3506 */
3507void
3509{
3510 InterruptPending = true;
3511 RepackMessagePending = true;
3513}
3514
3515/*
3516 * Process any queued protocol messages received from the repack worker.
3517 */
3518void
3520{
3521 MemoryContext oldcontext;
3523
3524 /*
3525 * Nothing to do if we haven't launched the worker yet or have already
3526 * terminated it.
3527 */
3528 if (decoding_worker == NULL)
3529 return;
3530
3531 /*
3532 * This is invoked from ProcessInterrupts(), and since some of the
3533 * functions it calls contain CHECK_FOR_INTERRUPTS(), there is a potential
3534 * for recursive calls if more signals are received while this runs. It's
3535 * unclear that recursive entry would be safe, and it doesn't seem useful
3536 * even if it is safe, so let's block interrupts until done.
3537 */
3539
3540 /*
3541 * Moreover, CurrentMemoryContext might be pointing almost anywhere. We
3542 * don't want to risk leaking data into long-lived contexts, so let's do
3543 * our work here in a private context that we can reset on each use.
3544 */
3545 if (hpm_context == NULL) /* first time through? */
3547 "ProcessRepackMessages",
3549 else
3551
3552 oldcontext = MemoryContextSwitchTo(hpm_context);
3553
3554 /* OK to process messages. Reset the flag saying there are more to do. */
3555 RepackMessagePending = false;
3556
3557 /*
3558 * Read as many messages as we can from the worker, but stop when no more
3559 * messages can be read from the worker without blocking.
3560 */
3561 while (true)
3562 {
3563 shm_mq_result res;
3564 Size nbytes;
3565 void *data;
3566
3568 &data, true);
3569 if (res == SHM_MQ_WOULD_BLOCK)
3570 break;
3571 else if (res == SHM_MQ_SUCCESS)
3572 {
3573 StringInfoData msg;
3574
3575 initStringInfo(&msg);
3576 appendBinaryStringInfo(&msg, data, nbytes);
3578 pfree(msg.data);
3579 }
3580 else
3581 {
3582 /*
3583 * The decoding worker is special in that it exits as soon as it
3584 * has its work done. Thus the DETACHED result code is fine.
3585 */
3586 Assert(res == SHM_MQ_DETACHED);
3587
3588 break;
3589 }
3590 }
3591
3592 MemoryContextSwitchTo(oldcontext);
3593
3594 /* Might as well clear the context on our way out */
3596
3598}
3599
3600/*
3601 * Process a single protocol message received from a single parallel worker.
3602 */
3603static void
3605{
3606 char msgtype;
3607
3608 msgtype = pq_getmsgbyte(msg);
3609
3610 switch (msgtype)
3611 {
3614 {
3616
3617 /* Parse ErrorResponse or NoticeResponse. */
3619
3620 /* Death of a worker isn't enough justification for suicide. */
3621 edata.elevel = Min(edata.elevel, ERROR);
3622
3623 /*
3624 * Add a context line to show that this is a message
3625 * propagated from the worker. Otherwise, it can sometimes be
3626 * confusing to understand what actually happened.
3627 */
3628 if (edata.context)
3629 edata.context = psprintf("%s\n%s", edata.context,
3630 _("REPACK decoding worker"));
3631 else
3632 edata.context = pstrdup(_("REPACK decoding worker"));
3633
3634 /* Rethrow error or print notice. */
3636
3637 break;
3638 }
3639
3640 default:
3641 {
3642 elog(ERROR, "unrecognized message type received from decoding worker: %c (message length %d bytes)",
3643 msgtype, msg->len);
3644 }
3645 }
3646}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
@ ACLCHECK_OK
Definition acl.h:184
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition aclchk.c:4082
int16 AttrNumber
Definition attnum.h:21
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_incr_param(int index, int64 incr)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_REPACK
void TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
Definition bgworker.c:1319
BgwHandleStatus WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
Definition bgworker.c:1280
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition bgworker.c:1068
#define BGW_NEVER_RESTART
Definition bgworker.h:92
BgwHandleStatus
Definition bgworker.h:111
@ BGWH_POSTMASTER_DIED
Definition bgworker.h:115
@ BgWorkerStart_RecoveryFinished
Definition bgworker.h:88
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition bgworker.h:60
#define BGWORKER_SHMEM_ACCESS
Definition bgworker.h:53
#define BGW_MAXLEN
Definition bgworker.h:93
uint32 BlockNumber
Definition block.h:31
static Datum values[MAXATTR]
Definition bootstrap.c:190
BufFile * BufFileOpenFileSet(FileSet *fileset, const char *name, int mode, bool missing_ok)
Definition buffile.c:292
void BufFileReadExact(BufFile *file, void *ptr, size_t size)
Definition buffile.c:655
size_t BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
Definition buffile.c:665
void BufFileClose(BufFile *file)
Definition buffile.c:413
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:309
#define NameStr(name)
Definition c.h:835
#define Min(x, y)
Definition c.h:1091
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:249
#define BUFFERALIGN(LEN)
Definition c.h:898
#define VARHDRSZ
Definition c.h:781
#define Assert(condition)
Definition c.h:943
TransactionId MultiXactId
Definition c.h:746
int32_t int32
Definition c.h:620
#define unlikely(x)
Definition c.h:438
uint32_t uint32
Definition c.h:624
float float4
Definition c.h:713
uint32 TransactionId
Definition c.h:736
#define OidIsValid(objectId)
Definition c.h:858
size_t Size
Definition c.h:689
bool IsToastRelation(Relation relation)
Definition catalog.c:206
bool IsSystemRelation(Relation relation)
Definition catalog.c:74
Oid GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
Definition catalog.c:448
bool IsCatalogRelation(Relation relation)
Definition catalog.c:104
bool IsSystemClass(Oid relid, Form_pg_class reltuple)
Definition catalog.c:86
uint32 result
void analyze_rel(Oid relid, RangeVar *relation, const VacuumParams *params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy)
Definition analyze.c:109
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
bool defGetBoolean(DefElem *def)
Definition define.c:93
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
Definition dependency.c:279
@ DEPENDENCY_AUTO
Definition dependency.h:34
@ DEPENDENCY_INTERNAL
Definition dependency.h:35
#define PERFORM_DELETION_INTERNAL
Definition dependency.h:92
dsm_handle dsm_segment_handle(dsm_segment *seg)
Definition dsm.c:1131
void dsm_detach(dsm_segment *seg)
Definition dsm.c:811
void * dsm_segment_address(dsm_segment *seg)
Definition dsm.c:1103
dsm_segment * dsm_create(Size size, int flags)
Definition dsm.c:524
void ThrowErrorData(ErrorData *edata)
Definition elog.c:2090
int errcode(int sqlerrcode)
Definition elog.c:874
#define _(x)
Definition elog.c:95
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define FATAL
Definition elog.h:41
#define PG_TRY(...)
Definition elog.h:373
#define WARNING
Definition elog.h:36
#define DEBUG2
Definition elog.h:29
#define PG_END_TRY(...)
Definition elog.h:398
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:227
#define PG_FINALLY(...)
Definition elog.h:390
#define INFO
Definition elog.h:34
#define ereport(elevel,...)
Definition elog.h:151
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
List * ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, EState *estate, uint32 flags, TupleTableSlot *slot, List *arbiterIndexes, bool *specConflict)
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
Definition execMain.c:1262
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps TTSOpsVirtual
Definition execTuples.c:84
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
void ExecForceStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
void FreeExecutorState(EState *estate)
Definition execUtils.c:197
EState * CreateExecutorState(void)
Definition execUtils.c:90
#define ResetPerTupleExprContext(estate)
Definition executor.h:669
#define EIIT_IS_UPDATE
Definition executor.h:750
#define GetPerTupleMemoryContext(estate)
Definition executor.h:665
#define EIIT_ONLY_SUMMARIZING
Definition executor.h:752
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
void systable_endscan(SysScanDesc sysscan)
Definition genam.c:604
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition genam.c:515
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition genam.c:388
volatile sig_atomic_t InterruptPending
Definition globals.c:32
int MyProcPid
Definition globals.c:47
ProcNumber MyProcNumber
Definition globals.c:90
bool allowSystemTableMods
Definition globals.c:130
struct Latch * MyLatch
Definition globals.c:63
Oid MyDatabaseId
Definition globals.c:94
int NewGUCNestLevel(void)
Definition guc.c:2142
void RestrictSearchPath(void)
Definition guc.c:2153
void AtEOXact_GUC(bool isCommit, int nestLevel)
Definition guc.c:2169
void RelationClearMissing(Relation rel)
Definition heap.c:1964
Oid heap_create_with_catalog(const char *relname, Oid relnamespace, Oid reltablespace, Oid relid, Oid reltypeid, Oid reloftypeid, Oid ownerid, Oid accessmtd, TupleDesc tupdesc, List *cooked_constraints, char relkind, char relpersistence, bool shared_relation, bool mapped_relation, OnCommitAction oncommit, Datum reloptions, bool use_user_acl, bool allow_system_table_mods, bool is_internal, Oid relrewrite, ObjectAddress *typaddress)
Definition heap.c:1122
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition heapam.c:1422
HeapTuple heap_modify_tuple(HeapTuple tuple, TupleDesc tupleDesc, const Datum *replValues, const bool *replIsnull, const bool *doReplace)
Definition heaptuple.c:1118
bool heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
Definition heaptuple.c:456
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1372
#define HEAPTUPLESIZE
Definition htup.h:73
HeapTupleData * HeapTuple
Definition htup.h:71
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
#define HeapTupleIsValid(tuple)
Definition htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
#define stmt
Oid IndexGetRelation(Oid indexId, bool missing_ok)
Definition index.c:3604
bool reindex_relation(const ReindexStmt *stmt, Oid relid, int flags, const ReindexParams *params)
Definition index.c:3969
Oid index_create_copy(Relation heapRelation, uint16 flags, Oid oldIndexId, Oid tablespaceOid, const char *newName)
Definition index.c:1306
#define INDEX_CREATE_SUPPRESS_PROGRESS
Definition index.h:74
#define REINDEX_REL_FORCE_INDEXES_UNLOGGED
Definition index.h:169
#define REINDEX_REL_SUPPRESS_INDEX_USE
Definition index.h:167
#define REINDEX_REL_FORCE_INDEXES_PERMANENT
Definition index.h:170
#define REINDEX_REL_CHECK_CONSTRAINTS
Definition index.h:168
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition indexam.c:737
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, int nkeys, int norderbys, uint32 flags)
Definition indexam.c:257
void index_close(Relation relation, LOCKMODE lockmode)
Definition indexam.c:178
void index_endscan(IndexScanDesc scan)
Definition indexam.c:394
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition indexam.c:134
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition indexam.c:368
char * ChooseRelationName(const char *name1, const char *name2, const char *label, Oid namespaceid, bool isconstraint)
Definition indexcmds.c:2634
void CatalogTupleUpdateWithInfo(Relation heapRel, const ItemPointerData *otid, HeapTuple tup, CatalogIndexState indstate)
Definition indexing.c:337
void CatalogTupleUpdate(Relation heapRel, const ItemPointerData *otid, HeapTuple tup)
Definition indexing.c:313
void CatalogCloseIndexes(CatalogIndexState indstate)
Definition indexing.c:61
void CatalogTupleInsert(Relation heapRel, HeapTuple tup)
Definition indexing.c:233
CatalogIndexState CatalogOpenIndexes(Relation heapRel)
Definition indexing.c:43
static struct @177 value
#define INJECTION_POINT(name, arg)
void CacheInvalidateCatalog(Oid catalogId)
Definition inval.c:1612
void CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
Definition inval.c:1669
int i
Definition isn.c:77
static void ItemPointerSetInvalid(ItemPointerData *pointer)
Definition itemptr.h:184
void SetLatch(Latch *latch)
Definition latch.c:290
List * lappend(List *list, void *datum)
Definition list.c:339
List * lappend_oid(List *list, Oid datum)
Definition list.c:375
void list_free(List *list)
Definition list.c:1546
bool ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
Definition lmgr.c:151
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition lmgr.c:229
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition lmgr.c:107
bool CheckRelationLockedByMe(Relation relation, LOCKMODE lockmode, bool orstronger)
Definition lmgr.c:334
bool CheckRelationOidLockedByMe(Oid relid, LOCKMODE lockmode, bool orstronger)
Definition lmgr.c:351
int LOCKMODE
Definition lockdefs.h:26
#define NoLock
Definition lockdefs.h:34
#define AccessExclusiveLock
Definition lockdefs.h:43
#define AccessShareLock
Definition lockdefs.h:36
#define ShareUpdateExclusiveLock
Definition lockdefs.h:39
#define RowExclusiveLock
Definition lockdefs.h:38
LockTupleMode
Definition lockoptions.h:51
char * get_rel_name(Oid relid)
Definition lsyscache.c:2148
char get_rel_relkind(Oid relid)
Definition lsyscache.c:2223
Oid get_rel_namespace(Oid relid)
Definition lsyscache.c:2172
RegProcedure get_opcode(Oid opno)
Definition lsyscache.c:1505
bool get_index_isclustered(Oid index_oid)
Definition lsyscache.c:3848
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition lsyscache.c:170
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3588
Oid get_relname_relid(const char *relname, Oid relnamespace)
Definition lsyscache.c:2105
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
MemoryContext TopMemoryContext
Definition mcxt.c:166
void * palloc(Size size)
Definition mcxt.c:1387
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
MemoryContext PortalContext
Definition mcxt.c:175
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define RESUME_INTERRUPTS()
Definition miscadmin.h:136
#define SECURITY_RESTRICTED_OPERATION
Definition miscadmin.h:320
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
Definition miscinit.c:613
Oid GetUserId(void)
Definition miscinit.c:470
void SetUserIdAndSecContext(Oid userid, int sec_context)
Definition miscinit.c:620
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2865
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define InvalidMultiXactId
Definition multixact.h:25
Oid LookupCreationNamespace(const char *nspname)
Definition namespace.c:3500
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
Definition namespace.c:442
static char * errmsg
#define InvokeObjectPostAlterHookArg(classId, objectId, subId, auxiliaryId, is_internal)
#define ObjectAddressSet(addr, class_id, object_id)
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
int parser_errposition(ParseState *pstate, int location)
Definition parse_node.c:106
RepackCommand
@ REPACK_COMMAND_REPACK
@ REPACK_COMMAND_CLUSTER
@ REPACK_COMMAND_VACUUMFULL
#define ACL_MAINTAIN
Definition parsenodes.h:90
@ DROP_RESTRICT
static int verbose
#define ERRCODE_DATA_CORRUPTED
FormData_pg_class * Form_pg_class
Definition pg_class.h:160
#define NAMEDATALEN
#define MAXPGPATH
END_CATALOG_STRUCT typedef FormData_pg_constraint * Form_pg_constraint
const void * data
void recordDependencyOn(const ObjectAddress *depender, const ObjectAddress *referenced, DependencyType behavior)
Definition pg_depend.c:47
long changeDependencyFor(Oid classId, Oid objectId, Oid refClassId, Oid oldRefObjectId, Oid newRefObjectId)
Definition pg_depend.c:459
long deleteDependencyRecordsFor(Oid classId, Oid objectId, bool skipExtensionDeps)
Definition pg_depend.c:303
END_CATALOG_STRUCT typedef FormData_pg_index * Form_pg_index
Definition pg_index.h:74
List * find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
#define forboth(cell1, list1, cell2, list2)
Definition pg_list.h:550
#define foreach_current_index(var_or_cell)
Definition pg_list.h:435
static Oid list_nth_oid(const List *list, int n)
Definition pg_list.h:353
#define foreach_ptr(type, var, lst)
Definition pg_list.h:501
#define foreach_node(type, var, lst)
Definition pg_list.h:528
#define foreach_oid(var, lst)
Definition pg_list.h:503
#define lfirst_oid(lc)
Definition pg_list.h:174
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
bool plan_cluster_use_sort(Oid tableOid, Oid indexOid)
Definition planner.c:6919
#define snprintf
Definition port.h:260
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static Datum UInt32GetDatum(uint32 X)
Definition postgres.h:232
#define InvalidOid
unsigned int Oid
int pq_getmsgbyte(StringInfo msg)
Definition pqformat.c:398
void pq_parse_errornotice(StringInfo msg, ErrorData *edata)
Definition pqmq.c:229
void TransferPredicateLocksToHeapRelation(Relation relation)
Definition predicate.c:3053
static int fb(int x)
@ ONCOMMIT_NOOP
Definition primnodes.h:59
#define PROGRESS_REPACK_PHASE_CATCH_UP
Definition progress.h:103
#define PROGRESS_REPACK_PHASE
Definition progress.h:86
#define PROGRESS_REPACK_COMMAND
Definition progress.h:85
#define PROGRESS_REPACK_PHASE_SWAP_REL_FILES
Definition progress.h:104
#define PROGRESS_REPACK_HEAP_TUPLES_DELETED
Definition progress.h:91
#define PROGRESS_REPACK_HEAP_TUPLES_UPDATED
Definition progress.h:90
#define PROGRESS_REPACK_PHASE_FINAL_CLEANUP
Definition progress.h:106
#define PROGRESS_REPACK_PHASE_REBUILD_INDEX
Definition progress.h:105
#define PROGRESS_REPACK_HEAP_TUPLES_INSERTED
Definition progress.h:89
#define PqMsg_ErrorResponse
Definition protocol.h:44
#define PqMsg_NoticeResponse
Definition protocol.h:49
char * psprintf(const char *fmt,...)
Definition psprintf.c:43
#define RelationGetRelid(relation)
Definition rel.h:516
#define RelationGetDescr(relation)
Definition rel.h:542
#define RelationIsMapped(relation)
Definition rel.h:565
#define RelationGetRelationName(relation)
Definition rel.h:550
#define RelationIsPopulated(relation)
Definition rel.h:688
#define RELATION_IS_OTHER_TEMP(relation)
Definition rel.h:669
#define RelationGetNamespace(relation)
Definition rel.h:557
List * RelationGetIndexList(Relation relation)
Definition relcache.c:4827
Oid RelationGetReplicaIndex(Relation relation)
Definition relcache.c:5063
void RelationAssumeNewRelfilelocator(Relation relation)
Definition relcache.c:3968
void RelationMapRemoveMapping(Oid relationId)
Definition relmapper.c:439
RelFileNumber RelationMapOidToFilenumber(Oid relationId, bool shared)
Definition relmapper.c:166
void RelationMapUpdateMap(Oid relationId, RelFileNumber fileNumber, bool shared, bool immediate)
Definition relmapper.c:326
Oid RelFileNumber
Definition relpath.h:25
#define RelFileNumberIsValid(relnumber)
Definition relpath.h:27
static bool cluster_rel_recheck(RepackCommand cmd, Relation OldHeap, Oid indexOid, Oid userid, LOCKMODE lmode, int options)
Definition repack.c:690
static void restore_tuple(BufFile *file, Relation relation, TupleTableSlot *slot)
Definition repack.c:2678
static void start_repack_decoding_worker(Oid relid)
Definition repack.c:3304
static void check_concurrent_repack_requirements(Relation rel, Oid *ident_idx_p)
Definition repack.c:877
static List * get_tables_to_repack_partitioned(RepackCommand cmd, Oid relid, bool rel_is_index, MemoryContext permcxt)
Definition repack.c:2216
void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool is_system_catalog, bool swap_toast_by_content, bool check_constraints, bool is_internal, bool reindex, TransactionId frozenXid, MultiXactId cutoffMulti, char newrelpersistence)
Definition repack.c:1865
static Relation process_single_relation(RepackStmt *stmt, LOCKMODE lockmode, bool isTopLevel, ClusterParams *params)
Definition repack.c:2307
void check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
Definition repack.c:751
static void release_change_context(ChangeContext *chgcxt)
Definition repack.c:2958
static bool find_target_tuple(Relation rel, ChangeContext *chgcxt, TupleTableSlot *locator, TupleTableSlot *received)
Definition repack.c:2787
static bool repack_is_permitted_for_relation(RepackCommand cmd, Oid relid, Oid userid)
Definition repack.c:2279
void ExecRepack(ParseState *pstate, RepackStmt *stmt, bool isTopLevel)
Definition repack.c:237
static void stop_repack_decoding_worker(void)
Definition repack.c:3395
static LOCKMODE RepackLockLevel(bool concurrent)
Definition repack.c:469
static void apply_concurrent_delete(Relation rel, TupleTableSlot *slot)
Definition repack.c:2646
static void ProcessRepackMessage(StringInfo msg)
Definition repack.c:3604
volatile sig_atomic_t RepackMessagePending
Definition repack.c:146
void cluster_rel(RepackCommand cmd, Relation OldHeap, Oid indexOid, ClusterParams *params, bool isTopLevel)
Definition repack.c:502
static List * get_tables_to_repack(RepackCommand cmd, bool usingindex, MemoryContext permcxt)
Definition repack.c:2086
static const char * RepackCommandAsString(RepackCommand cmd)
Definition repack.c:2441
void DecodingWorkerFileName(char *fname, Oid relid, uint32 seq)
Definition repack.c:3494
Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod, char relpersistence, LOCKMODE lockmode)
Definition repack.c:1108
static void initialize_change_context(ChangeContext *chgcxt, Relation relation, Oid ident_index_id)
Definition repack.c:2884
static void process_concurrent_changes(XLogRecPtr end_of_wal, ChangeContext *chgcxt, bool done)
Definition repack.c:2828
static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, Snapshot snapshot, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti)
Definition repack.c:1237
static void rebuild_relation_finish_concurrent(Relation NewHeap, Relation OldHeap, Oid identIdx, TransactionId frozenXid, MultiXactId cutoffMulti)
Definition repack.c:2976
static void apply_concurrent_insert(Relation rel, TupleTableSlot *slot, ChangeContext *chgcxt)
Definition repack.c:2585
static void apply_concurrent_changes(BufFile *file, ChangeContext *chgcxt)
Definition repack.c:2459
static void rebuild_relation(Relation OldHeap, Relation index, bool verbose, Oid ident_idx)
Definition repack.c:961
void HandleRepackMessageInterrupt(void)
Definition repack.c:3508
static Snapshot get_initial_snapshot(DecodingWorker *worker)
Definition repack.c:3437
void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
Definition repack.c:811
static void adjust_toast_pointers(Relation relation, TupleTableSlot *dest, TupleTableSlot *src)
Definition repack.c:2752
#define WORKER_FILE_SNAPSHOT
Definition repack.c:97
static void swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, bool swap_toast_by_content, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, Oid *mapped_tables)
Definition repack.c:1483
static Oid determine_clustered_index(Relation rel, bool usingindex, const char *indexname)
Definition repack.c:2398
void ProcessRepackMessages(void)
Definition repack.c:3519
static void copy_index_constraints(Relation old_index, Oid new_index_id, Oid new_heap_id)
Definition repack.c:3237
static void apply_concurrent_update(Relation rel, TupleTableSlot *spilled_tuple, TupleTableSlot *ondisk_tuple, ChangeContext *chgcxt)
Definition repack.c:2606
static DecodingWorker * decoding_worker
Definition repack.c:140
static List * build_new_indexes(Relation NewHeap, Relation OldHeap, List *OldIndexes)
Definition repack.c:3192
#define CLUOPT_VERBOSE
Definition repack.h:25
#define CLUOPT_ANALYZE
Definition repack.h:28
#define CLUOPT_CONCURRENT
Definition repack.h:29
#define CLUOPT_RECHECK_ISCLUSTERED
Definition repack.h:27
#define CLUOPT_RECHECK
Definition repack.h:26
#define CHANGE_UPDATE_OLD
#define CHANGE_DELETE
#define CHANGE_UPDATE_NEW
char ConcurrentChangeKind
#define CHANGE_INSERT
#define REPACK_ERROR_QUEUE_SIZE
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Definition scankey.c:76
@ ForwardScanDirection
Definition sdir.h:28
void SharedFileSetInit(SharedFileSet *fileset, dsm_segment *seg)
shm_mq * shm_mq_create(void *address, Size size)
Definition shm_mq.c:179
void shm_mq_detach(shm_mq_handle *mqh)
Definition shm_mq.c:845
void shm_mq_set_receiver(shm_mq *mq, PGPROC *proc)
Definition shm_mq.c:208
shm_mq_result shm_mq_receive(shm_mq_handle *mqh, Size *nbytesp, void **datap, bool nowait)
Definition shm_mq.c:574
shm_mq_handle * shm_mq_attach(shm_mq *mq, dsm_segment *seg, BackgroundWorkerHandle *handle)
Definition shm_mq.c:292
shm_mq_result
Definition shm_mq.h:39
@ SHM_MQ_SUCCESS
Definition shm_mq.h:40
@ SHM_MQ_WOULD_BLOCK
Definition shm_mq.h:41
@ SHM_MQ_DETACHED
Definition shm_mq.h:42
ScanKeyData * ScanKey
Definition skey.h:75
Snapshot GetTransactionSnapshot(void)
Definition snapmgr.c:272
void PushActiveSnapshot(Snapshot snapshot)
Definition snapmgr.c:682
Snapshot RestoreSnapshot(char *start_address)
Definition snapmgr.c:1793
void UpdateActiveSnapshotCommandId(void)
Definition snapmgr.c:744
void PopActiveSnapshot(void)
Definition snapmgr.c:775
Snapshot GetActiveSnapshot(void)
Definition snapmgr.c:800
#define InvalidSnapshot
Definition snapshot.h:119
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
static void SpinLockInit(volatile slock_t *lock)
Definition spin.h:50
void relation_close(Relation relation, LOCKMODE lockmode)
Definition relation.c:206
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition relation.c:48
PGPROC * MyProc
Definition proc.c:71
void BecomeLockGroupLeader(void)
Definition proc.c:2042
#define BTEqualStrategyNumber
Definition stratnum.h:31
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition stringinfo.c:281
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int cc_file_seq
Definition repack.c:121
int cc_ident_key_nentries
Definition repack.c:118
Relation cc_rel
Definition repack.c:105
Relation cc_ident_index
Definition repack.c:116
ScanKey cc_ident_key
Definition repack.c:117
EState * cc_estate
Definition repack.c:109
ResultRelInfo * cc_rri
Definition repack.c:108
uint32 options
Definition repack.h:34
bool attisdropped
Definition tupdesc.h:78
ConditionVariable cv
char error_queue[FLEXIBLE_ARRAY_MEMBER]
dsm_segment * seg
Definition repack.c:133
BackgroundWorkerHandle * handle
Definition repack.c:130
shm_mq_handle * error_mqh
Definition repack.c:136
Definition pg_list.h:54
Oid indexOid
Definition repack.c:90
Oid tableOid
Definition repack.c:89
Oid rd_pkindex
Definition rel.h:153
Form_pg_class rd_rel
Definition rel.h:111
Datum sk_argument
Definition skey.h:72
Oid sk_collation
Definition skey.h:70
TupleDesc tts_tupleDescriptor
Definition tuptable.h:129
bool * tts_isnull
Definition tuptable.h:133
ItemPointerData tts_tid
Definition tuptable.h:142
Datum * tts_values
Definition tuptable.h:131
TransactionId FreezeLimit
Definition vacuum.h:288
TransactionId OldestXmin
Definition vacuum.h:278
TransactionId relfrozenxid
Definition vacuum.h:262
MultiXactId relminmxid
Definition vacuum.h:263
MultiXactId MultiXactCutoff
Definition vacuum.h:289
Definition type.h:96
Definition c.h:776
void ReleaseSysCache(HeapTuple tuple)
Definition syscache.c:265
HeapTuple SearchSysCache1(SysCacheIdentifier cacheId, Datum key1)
Definition syscache.c:221
Datum SysCacheGetAttr(SysCacheIdentifier cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition syscache.c:596
#define SearchSysCacheCopy1(cacheId, key1)
Definition syscache.h:91
#define SearchSysCacheExists1(cacheId, key1)
Definition syscache.h:100
Relation try_table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:60
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, ScanKeyData *key)
Definition tableam.c:113
const TupleTableSlotOps * table_slot_callbacks(Relation relation)
Definition tableam.c:59
TU_UpdateIndexes
Definition tableam.h:130
@ TU_Summarizing
Definition tableam.h:138
@ TU_None
Definition tableam.h:132
static void table_endscan(TableScanDesc scan)
Definition tableam.h:1058
#define TABLE_INSERT_NO_LOGICAL
Definition tableam.h:283
TM_Result
Definition tableam.h:92
@ TM_Ok
Definition tableam.h:97
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, Snapshot snapshot, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
Definition tableam.h:1745
static void table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid, uint32 options, BulkInsertStateData *bistate)
Definition tableam.h:1455
#define TABLE_DELETE_NO_LOGICAL
Definition tableam.h:287
#define TABLE_UPDATE_NO_LOGICAL
Definition tableam.h:290
static TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, uint32 options, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition tableam.h:1597
static TM_Result table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, uint32 options, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd)
Definition tableam.h:1546
void ResetRelRewrite(Oid myrelid)
Definition tablecmds.c:4394
void CheckTableNotInUse(Relation rel, const char *stmt)
Definition tablecmds.c:4447
void RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
Definition tablecmds.c:4301
void RangeVarCallbackMaintainsTable(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg)
Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock)
void NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode, Oid OIDOldToast)
Definition toasting.c:65
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition tupdesc.h:195
static void slot_getsomeattrs(TupleTableSlot *slot, int attnum)
Definition tuptable.h:376
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476
static bool slot_attisnull(TupleTableSlot *slot, int attnum)
Definition tuptable.h:403
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1101
#define VACOPT_VERBOSE
Definition vacuum.h:181
#define VACOPT_ANALYZE
Definition vacuum.h:180
static bool VARATT_IS_EXTERNAL_ONDISK(const void *PTR)
Definition varatt.h:361
static Size VARSIZE_ANY(const void *PTR)
Definition varatt.h:460
static bool VARATT_IS_EXTERNAL_INDIRECT(const void *PTR)
Definition varatt.h:368
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
static bool initialized
Definition win32ntdll.c:36
void CommandCounterIncrement(void)
Definition xact.c:1130
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
Definition xact.c:3698
void StartTransactionCommand(void)
Definition xact.c:3109
void CommitTransactionCommand(void)
Definition xact.c:3207
CommandId GetCurrentCommandId(bool used)
Definition xact.c:831
XLogRecPtr GetFlushRecPtr(TimeLineID *insertTLI)
Definition xlog.c:6995
XLogRecPtr GetXLogInsertEndRecPtr(void)
Definition xlog.c:10124
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2801
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28