PostgreSQL Source Code git master
Loading...
Searching...
No Matches
repack_worker.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * repack_worker.c
4 * Implementation of the background worker for ad-hoc logical decoding
5 * during REPACK (CONCURRENTLY).
6 *
7 *
8 * Copyright (c) 2026, PostgreSQL Global Development Group
9 *
10 *
11 * IDENTIFICATION
12 * src/backend/commands/repack_worker.c
13 *
14 *-------------------------------------------------------------------------
15 */
16#include "postgres.h"
17
18#include "access/table.h"
20#include "access/xlogutils.h"
21#include "access/xlogwait.h"
22#include "commands/repack.h"
24#include "libpq/pqmq.h"
26#include "storage/ipc.h"
27#include "storage/proc.h"
28#include "tcop/tcopprot.h"
29#include "utils/memutils.h"
30
31#define PGREPACK_PLUGIN "pgrepack"
32
33static void RepackWorkerShutdown(int code, Datum arg);
36static void export_initial_snapshot(Snapshot snapshot,
37 DecodingWorkerShared *shared);
39 DecodingWorkerShared *shared);
40
41/* Is this process a REPACK worker? */
42static bool am_repack_worker = false;
43
44/* The WAL segment being decoded. */
46
47/* Our DSM segment, for shutting down */
49
50/*
51 * Keep track of the table we're processing, to skip logical decoding of data
52 * from other relations.
53 */
56
57
58/* REPACK decoding worker entry point */
59void
61{
62 dsm_segment *seg;
64 shm_mq *mq;
67 SharedFileSet *sfs;
68 Snapshot snapshot;
69
70 am_repack_worker = true;
71
73
75 if (seg == NULL)
78 errmsg("could not map dynamic shared memory segment"));
80
82
83 /* Arrange to signal the leader if we exit. */
85
86 /*
87 * Join locking group - see the comments around the call of
88 * start_repack_decoding_worker().
89 */
90 if (!BecomeLockGroupMember(shared->backend_proc, shared->backend_pid))
91 return; /* The leader is not running anymore. */
92
93 /*
94 * Setup a queue to send error messages to the backend that launched this
95 * worker.
96 */
97 mq = (shm_mq *) (char *) BUFFERALIGN(shared->error_queue);
99 mqh = shm_mq_attach(mq, seg, NULL);
102 shared->backend_proc_number);
103
104 /* Connect to the database. LOGIN is not required. */
107
108 /*
109 * Transaction is needed to open relation, and it also provides us with a
110 * resource owner.
111 */
113
115
116 /*
117 * Not sure the spinlock is needed here - the backend should not change
118 * anything in the shared memory until we have serialized the snapshot.
119 */
120 SpinLockAcquire(&shared->mutex);
122 sfs = &shared->sfs;
123 SpinLockRelease(&shared->mutex);
124
125 SharedFileSetAttach(sfs, seg);
126
127 /*
128 * Prepare to capture the concurrent data changes ourselves.
129 */
131
132 /* Announce that we're ready. */
133 SpinLockAcquire(&shared->mutex);
134 shared->initialized = true;
135 SpinLockRelease(&shared->mutex);
136 ConditionVariableSignal(&shared->cv);
137
138 /* There doesn't seem to a nice API to set these */
140 XactReadOnly = true;
141
142 /* Build the initial snapshot and export it. */
143 snapshot = SnapBuildInitialSnapshot(decoding_ctx->snapshot_builder);
144 export_initial_snapshot(snapshot, shared);
145
146 /*
147 * Only historic snapshots should be used now. Do not let us restrict the
148 * progress of xmin horizon.
149 */
151
152 for (;;)
153 {
154 bool stop = decode_concurrent_changes(decoding_ctx, shared);
155
156 if (stop)
157 break;
158
159 }
160
161 /* Cleanup. */
164}
165
166/*
167 * See ParallelWorkerShutdown for details.
168 */
169static void
180
181bool
183{
184 return am_repack_worker;
185}
186
187/*
188 * This function is much like pg_create_logical_replication_slot() except that
189 * the new slot is neither released (if anyone else could read changes from
190 * our slot, we could miss changes other backends do while we copy the
191 * existing data into temporary table), nor persisted (it's easier to handle
192 * crash by restarting all the work from scratch).
193 */
196{
197 Relation rel;
200 char slotname[NAMEDATALEN];
203
204 /*
205 * REPACK CONCURRENTLY is not allowed in a transaction block, so this
206 * should never fire.
207 */
209
210 /* Make sure we can use logical decoding */
212
213 /*
214 * Create the replication slot we'll use, and enable logical decoding in
215 * case it isn't already on.
216 *
217 * Make the slot RS_TEMPORARY so that it's removed on ERROR. A backend
218 * cannot execute multiple REPACK commands at a time, so the PID is enough
219 * to make the slot name unique.
220 */
221 snprintf(slotname, NAMEDATALEN, "pg_repack_%d", MyProcPid);
222 ReplicationSlotCreate(slotname, true, RS_TEMPORARY, false, true,
223 false, false);
225
226 /*
227 * Set up repacked_rel_locator and repacked_rel_toast_locator, which we
228 * use to skip decoding of unrelated relations.
229 */
230 rel = table_open(relid, AccessShareLock);
232 toastrelid = rel->rd_rel->reltoastrelid;
234 {
236
237 /* Avoid logical decoding of other TOAST relations. */
241 }
243
244 /*
245 * Set up our logical decoding context. We initially use the blocking
246 * read_local_xlog_page until we find the start point, and switch to the
247 * non-blocking interface afterwards.
248 */
250 NIL,
251 true,
252 true,
254 XL_ROUTINE(.page_read = read_local_xlog_page,
255 .segment_open = wal_segment_open,
256 .segment_close = wal_segment_close),
257 NULL, NULL, NULL);
258
259 /* Complete setup of output_writer_private */
261 dstate->relid = relid;
262 dstate->worker_cxt = CurrentMemoryContext;
263 dstate->worker_resowner = CurrentResourceOwner;
264
265 /* We don't have control on fast_forward, but verify it's sane */
266 Assert(!ctx->fast_forward);
267
268 /* Find our decoding starting point. */
270
271 /* From this point on, we need non-blocking WAL reads */
273
274 /*
275 * Initialize repack_current_segment so that we can notice WAL segment
276 * boundaries.
277 */
280
281 /*
282 * Set up our reader private state to let the page-read callback notify
283 * when end-of-WAL has been reached. This lives in the same context as
284 * the logical decoding itself.
285 */
289
290 return ctx;
291}
292
293static void
305
306/*
307 * Make snapshot available to the backend that launched the decoding worker.
308 */
309static void
311{
312 char fname[MAXPGPATH];
313 BufFile *file;
315 char *snap_space;
316
318 snap_space = (char *) palloc(snap_size);
319 SerializeSnapshot(snapshot, snap_space);
320
321 DecodingWorkerFileName(fname, shared->relid, shared->last_exported + 1);
322 file = BufFileCreateFileSet(&shared->sfs.fs, fname);
323 /* To make restoration easier, write the snapshot size first. */
324 BufFileWrite(file, &snap_size, sizeof(snap_size));
326 BufFileClose(file);
328
329 /* Increase the counter to tell the backend that the file is available. */
330 SpinLockAcquire(&shared->mutex);
331 shared->last_exported++;
332 SpinLockRelease(&shared->mutex);
333 ConditionVariableSignal(&shared->cv);
334}
335
336/*
337 * Decode logical changes from the WAL sequence and store them to a file.
338 *
339 * If true is returned, there is no more work for the worker.
340 */
341static bool
343 DecodingWorkerShared *shared)
344{
346 XLogRecPtr lsn_upto;
347 bool done;
348 char fname[MAXPGPATH];
349
351
352 /* Open the output file. */
353 DecodingWorkerFileName(fname, shared->relid, shared->last_exported + 1);
354 dstate->file = BufFileCreateFileSet(&shared->sfs.fs, fname);
355
356 SpinLockAcquire(&shared->mutex);
357 lsn_upto = shared->lsn_upto;
358 done = shared->done;
359 SpinLockRelease(&shared->mutex);
360
361 while (true)
362 {
363 XLogRecord *record;
365 char *errm = NULL;
366 XLogRecPtr end_lsn;
367
369
370 record = XLogReadRecord(ctx->reader, &errm);
371 if (record)
372 {
374
375 /*
376 * We want to allow WAL to be recycled while REPACK is running.
377 *
378 * In normal usage of a replication slot, we need to be very
379 * careful not to advance the LSN until it's been confirmed as
380 * received by the remote. In REPACK's case, this is not needed:
381 * REPACK will never try to replay the same WAL after a crash, and
382 * if there _is_ a crash, the whole REPACK has to be started from
383 * scratch anyway.
384 *
385 * So here we disregard the careful LSN tracking and just move the
386 * LSN locations forward to what we've processed. Note that it
387 * would be bogus to move the xmin forward, though, so we don't
388 * touch that.
389 *
390 * This can be done on whatever schedule is convenient, but in
391 * order not to cause unnecessary load, we only do it as we cross
392 * each WAL segment boundary.
393 */
394 end_lsn = ctx->reader->EndRecPtr;
397 {
400 elog(DEBUG1, "REPACK: confirmed receive location %X/%X",
401 (uint32) (end_lsn >> 32), (uint32) end_lsn);
403 }
404 }
405 else
406 {
408
409 if (errm)
412 errmsg("could not read WAL from timeline %u at %X/%08X: %s",
413 ctx->reader->currTLI,
415 errm));
416
417 /*
418 * In the decoding loop we do not want to get blocked when there
419 * is no more WAL available, otherwise the loop would become
420 * uninterruptible.
421 */
423 if (priv->end_of_wal)
424 /* Do not miss the end of WAL condition next time. */
425 priv->end_of_wal = false;
426 else
429 errmsg("could not read WAL record"));
430 }
431
432 /*
433 * Whether we could read new record or not, keep checking if
434 * 'lsn_upto' was specified.
435 */
436 if (!XLogRecPtrIsValid(lsn_upto))
437 {
438 SpinLockAcquire(&shared->mutex);
439 lsn_upto = shared->lsn_upto;
440 /* 'done' should be set at the same time as 'lsn_upto' */
441 done = shared->done;
442 SpinLockRelease(&shared->mutex);
443 }
444 if (XLogRecPtrIsValid(lsn_upto) &&
445 ctx->reader->EndRecPtr >= lsn_upto)
446 break;
447
448 if (record == NULL)
449 {
450 int64 timeout = 0;
451 WaitLSNResult res;
452
453 /*
454 * Before we retry reading, wait until new WAL is flushed.
455 *
456 * There is a race condition such that the backend executing
457 * REPACK determines 'lsn_upto', but before it sets the shared
458 * variable, we reach the end of WAL. In that case we'd need to
459 * wait until the next WAL flush (unrelated to REPACK). Although
460 * that should not be a problem in a busy system, it might be
461 * noticeable in other cases, including regression tests (which
462 * are not necessarily executed in parallel). Therefore it makes
463 * sense to use timeout.
464 *
465 * If lsn_upto is valid, WAL records having LSN lower than that
466 * should already have been flushed to disk.
467 */
468 if (!XLogRecPtrIsValid(lsn_upto))
469 timeout = 100L;
471 ctx->reader->EndRecPtr + 1,
472 timeout);
473 if (res != WAIT_LSN_RESULT_SUCCESS &&
477 errmsg("waiting for WAL failed"));
478 }
479 }
480
481 /*
482 * Close the file so we can make it available to the backend.
483 */
484 BufFileClose(dstate->file);
485 dstate->file = NULL;
486 SpinLockAcquire(&shared->mutex);
487 shared->lsn_upto = InvalidXLogRecPtr;
488 shared->last_exported++;
489 SpinLockRelease(&shared->mutex);
490 ConditionVariableSignal(&shared->cv);
491
492 return done;
493}
494
495/*
496 * Does the WAL record contain a data change that this backend does not need
497 * to decode on behalf of REPACK (CONCURRENTLY)?
498 */
499bool
501{
503 RelFileLocator locator;
504
505 /* TOAST locator should not be set unless the main is. */
508
509 /*
510 * Backends not involved in REPACK (CONCURRENTLY) should not do the
511 * filtering.
512 */
514 return false;
515
516 /*
517 * If the record does not contain the block 0, it's probably not INSERT /
518 * UPDATE / DELETE. In any case, we do not have enough information to
519 * filter the change out.
520 */
521 if (!XLogRecGetBlockTagExtended(r, 0, &locator, NULL, NULL, NULL))
522 return false;
523
524 /*
525 * Decode the change if it belongs to the table we are repacking, or if it
526 * belongs to its TOAST relation.
527 */
529 return false;
532 return false;
533
534 /* Filter out changes of other tables. */
535 return true;
536}
void BackgroundWorkerUnblockSignals(void)
Definition bgworker.c:949
void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
Definition bgworker.c:909
#define BGWORKER_BYPASS_ROLELOGINCHECK
Definition bgworker.h:167
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition buffile.c:677
BufFile * BufFileCreateFileSet(FileSet *fileset, const char *name)
Definition buffile.c:268
void BufFileClose(BufFile *file)
Definition buffile.c:413
#define BUFFERALIGN(LEN)
Definition c.h:898
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
uint32_t uint32
Definition c.h:624
#define OidIsValid(objectId)
Definition c.h:858
size_t Size
Definition c.h:689
void ConditionVariableSignal(ConditionVariable *cv)
void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *record)
Definition decode.c:89
void dsm_detach(dsm_segment *seg)
Definition dsm.c:811
void * dsm_segment_address(dsm_segment *seg)
Definition dsm.c:1103
dsm_segment * dsm_attach(dsm_handle h)
Definition dsm.c:673
Datum arg
Definition elog.c:1323
int errcode_for_file_access(void)
Definition elog.c:898
int errcode(int sqlerrcode)
Definition elog.c:875
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
#define palloc0_object(type)
Definition fe_memutils.h:90
int MyProcPid
Definition globals.c:49
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344
#define AccessShareLock
Definition lockdefs.h:36
void LogicalConfirmReceivedLocation(XLogRecPtr lsn)
Definition logical.c:1813
void FreeDecodingContext(LogicalDecodingContext *ctx)
Definition logical.c:670
void DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
Definition logical.c:626
void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
Definition logical.c:1737
LogicalDecodingContext * CreateInitDecodingContext(const char *plugin, List *output_plugin_options, bool need_full_snapshot, bool for_repack, XLogRecPtr restart_lsn, XLogReaderRoutine *xl_routine, LogicalOutputPluginWriterPrepareWrite prepare_write, LogicalOutputPluginWriterWrite do_write, LogicalOutputPluginWriterUpdateProgress update_progress)
Definition logical.c:322
void CheckLogicalDecodingRequirements(bool repack)
Definition logical.c:111
void EnsureLogicalDecodingEnabled(void)
Definition logicalctl.c:289
void pfree(void *pointer)
Definition mcxt.c:1619
void * palloc(Size size)
Definition mcxt.c:1390
MemoryContext CurrentMemoryContext
Definition mcxt.c:161
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
static char * errmsg
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:138
#define ERRCODE_DATA_CORRUPTED
#define NAMEDATALEN
#define MAXPGPATH
#define NIL
Definition pg_list.h:68
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define snprintf
Definition port.h:261
static uint32 DatumGetUInt32(Datum X)
Definition postgres.h:222
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
#define PointerGetDatum(X)
Definition postgres.h:354
#define InvalidOid
unsigned int Oid
void pq_set_parallel_leader(pid_t pid, ProcNumber procNumber)
Definition pqmq.c:85
void pq_redirect_to_shm_mq(dsm_segment *seg, shm_mq_handle *mqh)
Definition pqmq.c:56
static int fb(int x)
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition procsignal.c:296
@ PROCSIG_REPACK_MESSAGE
Definition procsignal.h:40
#define RelFileLocatorEquals(locator1, locator2)
void DecodingWorkerFileName(char *fname, Oid relid, uint32 seq)
Definition repack.c:3644
static void RepackWorkerShutdown(int code, Datum arg)
#define PGREPACK_PLUGIN
static void repack_cleanup_logical_decoding(LogicalDecodingContext *ctx)
static RelFileLocator repacked_rel_toast_locator
bool AmRepackWorker(void)
static bool decode_concurrent_changes(LogicalDecodingContext *ctx, DecodingWorkerShared *shared)
bool change_useless_for_repack(XLogRecordBuffer *buf)
static XLogSegNo repack_current_segment
static dsm_segment * worker_dsm_segment
static void export_initial_snapshot(Snapshot snapshot, DecodingWorkerShared *shared)
static LogicalDecodingContext * repack_setup_logical_decoding(Oid relid)
static RelFileLocator repacked_rel_locator
void RepackWorkerMain(Datum main_arg)
static bool am_repack_worker
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
void shm_mq_set_sender(shm_mq *mq, PGPROC *proc)
Definition shm_mq.c:226
shm_mq_handle * shm_mq_attach(shm_mq *mq, dsm_segment *seg, BackgroundWorkerHandle *handle)
Definition shm_mq.c:292
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool repack, bool failover, bool synced)
Definition slot.c:378
void ReplicationSlotDropAcquired(bool try_disable)
Definition slot.c:1031
@ RS_TEMPORARY
Definition slot.h:47
Snapshot SnapBuildInitialSnapshot(SnapBuild *builder)
Definition snapbuild.c:444
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition snapmgr.c:1736
Size EstimateSnapshotSpace(Snapshot snapshot)
Definition snapmgr.c:1712
void InvalidateCatalogSnapshot(void)
Definition snapmgr.c:455
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
PGPROC * MyProc
Definition proc.c:71
bool BecomeLockGroupMember(PGPROC *leader, int pid)
Definition proc.c:2105
ConditionVariable cv
char error_queue[FLEXIBLE_ARRAY_MEMBER]
XLogReaderState * reader
Definition logical.h:42
MemoryContext context
Definition logical.h:36
void * output_writer_private
Definition logical.h:81
RelFileNumber relNumber
RelFileLocator rd_locator
Definition rel.h:57
Form_pg_class rd_rel
Definition rel.h:111
XLogPageReadCB page_read
Definition xlogreader.h:94
DecodedXLogRecord * record
Definition xlogreader.h:235
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogReaderRoutine routine
Definition xlogreader.h:179
TimeLineID currTLI
Definition xlogreader.h:284
void * private_data
Definition xlogreader.h:195
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
#define TransactionIdIsValid(xid)
Definition transam.h:41
bool XactReadOnly
Definition xact.c:84
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:443
void StartTransactionCommand(void)
Definition xact.c:3112
int XactIsoLevel
Definition xact.c:81
void CommitTransactionCommand(void)
Definition xact.c:3210
#define XACT_REPEATABLE_READ
Definition xact.h:38
int wal_segment_size
Definition xlog.c:150
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52
bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum, Buffer *prefetch_buffer)
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition xlogreader.c:391
#define XL_ROUTINE(...)
Definition xlogreader.h:117
void wal_segment_close(XLogReaderState *state)
Definition xlogutils.c:831
void wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p)
Definition xlogutils.c:806
int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
Definition xlogutils.c:845
int read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
Definition xlogutils.c:857
WaitLSNResult WaitForLSN(WaitLSNType lsnType, XLogRecPtr targetLSN, int64 timeout)
Definition xlogwait.c:403
WaitLSNResult
Definition xlogwait.h:26
@ WAIT_LSN_RESULT_TIMEOUT
Definition xlogwait.h:30
@ WAIT_LSN_RESULT_SUCCESS
Definition xlogwait.h:27
@ WAIT_LSN_TYPE_PRIMARY_FLUSH
Definition xlogwait.h:44