PostgreSQL Source Code git master
Loading...
Searching...
No Matches
repack_worker.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * repack_worker.c
4 * Implementation of the background worker for ad-hoc logical decoding
5 * during REPACK (CONCURRENTLY).
6 *
7 *
8 * Copyright (c) 2026, PostgreSQL Global Development Group
9 *
10 *
11 * IDENTIFICATION
12 * src/backend/commands/repack_worker.c
13 *
14 *-------------------------------------------------------------------------
15 */
16#include "postgres.h"
17
18#include "access/table.h"
20#include "access/xlogutils.h"
21#include "access/xlogwait.h"
22#include "commands/repack.h"
24#include "libpq/pqmq.h"
26#include "storage/ipc.h"
27#include "storage/proc.h"
28#include "tcop/tcopprot.h"
29#include "utils/memutils.h"
30
31#define REPL_PLUGIN_NAME "pgrepack"
32
33static void RepackWorkerShutdown(int code, Datum arg);
36static void export_initial_snapshot(Snapshot snapshot,
37 DecodingWorkerShared *shared);
39 DecodingWorkerShared *shared);
40
41/* Is this process a REPACK worker? */
42static bool am_repack_worker = false;
43
44/* The WAL segment being decoded. */
46
47/*
48 * Keep track of the table we're processing, to skip logical decoding of data
49 * from other relations.
50 */
53
54
55/* REPACK decoding worker entry point */
56void
58{
59 dsm_segment *seg;
61 shm_mq *mq;
64 SharedFileSet *sfs;
65 Snapshot snapshot;
66
67 am_repack_worker = true;
68
69 /*
70 * Override the default bgworker_die() with die() so we can use
71 * CHECK_FOR_INTERRUPTS().
72 */
75
77 if (seg == NULL)
80 errmsg("could not map dynamic shared memory segment"));
81
83 shared->dsm_seg = seg;
84
85 /* Arrange to signal the leader if we exit. */
87
88 /*
89 * Join locking group - see the comments around the call of
90 * start_repack_decoding_worker().
91 */
92 if (!BecomeLockGroupMember(shared->backend_proc, shared->backend_pid))
93 return; /* The leader is not running anymore. */
94
95 /*
96 * Setup a queue to send error messages to the backend that launched this
97 * worker.
98 */
99 mq = (shm_mq *) (char *) BUFFERALIGN(shared->error_queue);
101 mqh = shm_mq_attach(mq, seg, NULL);
104 shared->backend_proc_number);
105
106 /* Connect to the database. */
108
109 /*
110 * Transaction is needed to open relation, and it also provides us with a
111 * resource owner.
112 */
114
116
117 /*
118 * Not sure the spinlock is needed here - the backend should not change
119 * anything in the shared memory until we have serialized the snapshot.
120 */
121 SpinLockAcquire(&shared->mutex);
123 sfs = &shared->sfs;
124 SpinLockRelease(&shared->mutex);
125
126 SharedFileSetAttach(sfs, seg);
127
128 /*
129 * Prepare to capture the concurrent data changes ourselves.
130 */
132
133 /* Announce that we're ready. */
134 SpinLockAcquire(&shared->mutex);
135 shared->initialized = true;
136 SpinLockRelease(&shared->mutex);
137 ConditionVariableSignal(&shared->cv);
138
139 /* There doesn't seem to a nice API to set these */
141 XactReadOnly = true;
142
143 /* Build the initial snapshot and export it. */
144 snapshot = SnapBuildInitialSnapshot(decoding_ctx->snapshot_builder);
145 export_initial_snapshot(snapshot, shared);
146
147 /*
148 * Only historic snapshots should be used now. Do not let us restrict the
149 * progress of xmin horizon.
150 */
152
153 for (;;)
154 {
155 bool stop = decode_concurrent_changes(decoding_ctx, shared);
156
157 if (stop)
158 break;
159
160 }
161
162 /* Cleanup. */
165}
166
167/*
168 * See ParallelWorkerShutdown for details.
169 */
170static void
181
182bool
184{
185 return am_repack_worker;
186}
187
188/*
189 * This function is much like pg_create_logical_replication_slot() except that
190 * the new slot is neither released (if anyone else could read changes from
191 * our slot, we could miss changes other backends do while we copy the
192 * existing data into temporary table), nor persisted (it's easier to handle
193 * crash by restarting all the work from scratch).
194 */
197{
198 Relation rel;
201 NameData slotname;
204
205 /*
206 * REPACK CONCURRENTLY is not allowed in a transaction block, so this
207 * should never fire.
208 */
210
211 /*
212 * Make sure we can use logical decoding.
213 */
216
217 /*
218 * A single backend should not execute multiple REPACK commands at a time,
219 * so use PID to make the slot unique.
220 *
221 * RS_TEMPORARY so that the slot gets cleaned up on ERROR.
222 */
223 snprintf(NameStr(slotname), NAMEDATALEN, "repack_%d", MyProcPid);
224 ReplicationSlotCreate(NameStr(slotname), true, RS_TEMPORARY, false, false,
225 false);
226
228
229 /*
230 * Neither prepare_write nor do_write callback nor update_progress is
231 * useful for us.
232 */
234 NIL,
235 true,
237 XL_ROUTINE(.page_read = read_local_xlog_page,
238 .segment_open = wal_segment_open,
239 .segment_close = wal_segment_close),
240 NULL, NULL, NULL);
241
242 /*
243 * We don't have control on setting fast_forward, so at least check it.
244 */
245 Assert(!ctx->fast_forward);
246
247 /* Avoid logical decoding of other relations. */
248 rel = table_open(relid, AccessShareLock);
250 toastrelid = rel->rd_rel->reltoastrelid;
252 {
254
255 /* Avoid logical decoding of other TOAST relations. */
259 }
261
263
264 /*
265 * decode_concurrent_changes() needs non-blocking callback.
266 */
268
269 /* Some WAL records should have been read. */
271
272 /*
273 * Initialize repack_current_segment so that we can notice WAL segment
274 * boundaries.
275 */
278
279 /* Our private state belongs to the decoding context. */
281
282 /*
283 * read_local_xlog_page_no_wait() needs to be able to indicate the end of
284 * WAL.
285 */
289
290#ifdef USE_ASSERT_CHECKING
291 dstate->relid = relid;
292#endif
293
294 dstate->change_cxt = AllocSetContextCreate(ctx->context,
295 "REPACK - change",
297
298 /* The file will be set as soon as we have it opened. */
299 dstate->file = NULL;
300
301 /*
302 * Memory context and resource owner for long-lived resources.
303 */
304 dstate->worker_cxt = CurrentMemoryContext;
305 dstate->worker_resowner = CurrentResourceOwner;
306
308
309 return ctx;
310}
311
312static void
324
325/*
326 * Make snapshot available to the backend that launched the decoding worker.
327 */
328static void
330{
331 char fname[MAXPGPATH];
332 BufFile *file;
334 char *snap_space;
335
337 snap_space = (char *) palloc(snap_size);
338 SerializeSnapshot(snapshot, snap_space);
339
340 DecodingWorkerFileName(fname, shared->relid, shared->last_exported + 1);
341 file = BufFileCreateFileSet(&shared->sfs.fs, fname);
342 /* To make restoration easier, write the snapshot size first. */
343 BufFileWrite(file, &snap_size, sizeof(snap_size));
345 BufFileClose(file);
347
348 /* Increase the counter to tell the backend that the file is available. */
349 SpinLockAcquire(&shared->mutex);
350 shared->last_exported++;
351 SpinLockRelease(&shared->mutex);
352 ConditionVariableSignal(&shared->cv);
353}
354
355/*
356 * Decode logical changes from the WAL sequence and store them to a file.
357 *
358 * If true is returned, there is no more work for the worker.
359 */
360static bool
362 DecodingWorkerShared *shared)
363{
365 XLogRecPtr lsn_upto;
366 bool done;
367 char fname[MAXPGPATH];
368
370
371 /* Open the output file. */
372 DecodingWorkerFileName(fname, shared->relid, shared->last_exported + 1);
373 dstate->file = BufFileCreateFileSet(&shared->sfs.fs, fname);
374
375 SpinLockAcquire(&shared->mutex);
376 lsn_upto = shared->lsn_upto;
377 done = shared->done;
378 SpinLockRelease(&shared->mutex);
379
380 while (true)
381 {
382 XLogRecord *record;
384 char *errm = NULL;
385 XLogRecPtr end_lsn;
386
388
389 record = XLogReadRecord(ctx->reader, &errm);
390 if (record)
391 {
393
394 /*
395 * If WAL segment boundary has been crossed, inform the decoding
396 * system that the catalog_xmin can advance.
397 */
398 end_lsn = ctx->reader->EndRecPtr;
401 {
403 elog(DEBUG1, "REPACK: confirmed receive location %X/%X",
404 (uint32) (end_lsn >> 32), (uint32) end_lsn);
406 }
407 }
408 else
409 {
411
412 if (errm)
414 errmsg("%s", errm));
415
416 /*
417 * In the decoding loop we do not want to get blocked when there
418 * is no more WAL available, otherwise the loop would become
419 * uninterruptible.
420 */
422 if (priv->end_of_wal)
423 /* Do not miss the end of WAL condition next time. */
424 priv->end_of_wal = false;
425 else
427 errmsg("could not read WAL record"));
428 }
429
430 /*
431 * Whether we could read new record or not, keep checking if
432 * 'lsn_upto' was specified.
433 */
434 if (!XLogRecPtrIsValid(lsn_upto))
435 {
436 SpinLockAcquire(&shared->mutex);
437 lsn_upto = shared->lsn_upto;
438 /* 'done' should be set at the same time as 'lsn_upto' */
439 done = shared->done;
440 SpinLockRelease(&shared->mutex);
441 }
442 if (XLogRecPtrIsValid(lsn_upto) &&
443 ctx->reader->EndRecPtr >= lsn_upto)
444 break;
445
446 if (record == NULL)
447 {
448 int64 timeout = 0;
449 WaitLSNResult res;
450
451 /*
452 * Before we retry reading, wait until new WAL is flushed.
453 *
454 * There is a race condition such that the backend executing
455 * REPACK determines 'lsn_upto', but before it sets the shared
456 * variable, we reach the end of WAL. In that case we'd need to
457 * wait until the next WAL flush (unrelated to REPACK). Although
458 * that should not be a problem in a busy system, it might be
459 * noticeable in other cases, including regression tests (which
460 * are not necessarily executed in parallel). Therefore it makes
461 * sense to use timeout.
462 *
463 * If lsn_upto is valid, WAL records having LSN lower than that
464 * should already have been flushed to disk.
465 */
466 if (!XLogRecPtrIsValid(lsn_upto))
467 timeout = 100L;
469 ctx->reader->EndRecPtr + 1,
470 timeout);
471 if (res != WAIT_LSN_RESULT_SUCCESS &&
474 errmsg("waiting for WAL failed"));
475 }
476 }
477
478 /*
479 * Close the file so we can make it available to the backend.
480 */
481 BufFileClose(dstate->file);
482 dstate->file = NULL;
483 SpinLockAcquire(&shared->mutex);
484 shared->lsn_upto = InvalidXLogRecPtr;
485 shared->last_exported++;
486 SpinLockRelease(&shared->mutex);
487 ConditionVariableSignal(&shared->cv);
488
489 return done;
490}
491
492/*
493 * Does the WAL record contain a data change that this backend does not need
494 * to decode on behalf of REPACK (CONCURRENTLY)?
495 */
496bool
498{
500 RelFileLocator locator;
501
502 /* TOAST locator should not be set unless the main is. */
505
506 /*
507 * Backends not involved in REPACK (CONCURRENTLY) should not do the
508 * filtering.
509 */
511 return false;
512
513 /*
514 * If the record does not contain the block 0, it's probably not INSERT /
515 * UPDATE / DELETE. In any case, we do not have enough information to
516 * filter the change out.
517 */
518 if (!XLogRecGetBlockTagExtended(r, 0, &locator, NULL, NULL, NULL))
519 return false;
520
521 /*
522 * Decode the change if it belongs to the table we are repacking, or if it
523 * belongs to its TOAST relation.
524 */
526 return false;
529 return false;
530
531 /* Filter out changes of other tables. */
532 return true;
533}
void BackgroundWorkerUnblockSignals(void)
Definition bgworker.c:949
void BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
Definition bgworker.c:909
void BufFileWrite(BufFile *file, const void *ptr, size_t size)
Definition buffile.c:677
BufFile * BufFileCreateFileSet(FileSet *fileset, const char *name)
Definition buffile.c:268
void BufFileClose(BufFile *file)
Definition buffile.c:413
#define NameStr(name)
Definition c.h:835
#define BUFFERALIGN(LEN)
Definition c.h:898
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
uint32_t uint32
Definition c.h:624
#define OidIsValid(objectId)
Definition c.h:858
size_t Size
Definition c.h:689
void ConditionVariableSignal(ConditionVariable *cv)
void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *record)
Definition decode.c:89
void dsm_detach(dsm_segment *seg)
Definition dsm.c:811
void * dsm_segment_address(dsm_segment *seg)
Definition dsm.c:1103
dsm_segment * dsm_attach(dsm_handle h)
Definition dsm.c:673
Datum arg
Definition elog.c:1322
int errcode(int sqlerrcode)
Definition elog.c:874
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:227
#define ereport(elevel,...)
Definition elog.h:151
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
#define palloc0_object(type)
Definition fe_memutils.h:75
int MyProcPid
Definition globals.c:47
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344
#define AccessShareLock
Definition lockdefs.h:36
void LogicalConfirmReceivedLocation(XLogRecPtr lsn)
Definition logical.c:1814
void FreeDecodingContext(LogicalDecodingContext *ctx)
Definition logical.c:671
void DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
Definition logical.c:627
LogicalDecodingContext * CreateInitDecodingContext(const char *plugin, List *output_plugin_options, bool need_full_snapshot, XLogRecPtr restart_lsn, XLogReaderRoutine *xl_routine, LogicalOutputPluginWriterPrepareWrite prepare_write, LogicalOutputPluginWriterWrite do_write, LogicalOutputPluginWriterUpdateProgress update_progress)
Definition logical.c:324
void CheckLogicalDecodingRequirements(void)
Definition logical.c:111
void EnsureLogicalDecodingEnabled(void)
Definition logicalctl.c:303
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
static char * errmsg
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define NAMEDATALEN
#define MAXPGPATH
#define NIL
Definition pg_list.h:68
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define die(msg)
#define pqsignal
Definition port.h:547
#define snprintf
Definition port.h:260
static uint32 DatumGetUInt32(Datum X)
Definition postgres.h:222
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
#define InvalidOid
unsigned int Oid
void pq_set_parallel_leader(pid_t pid, ProcNumber procNumber)
Definition pqmq.c:85
void pq_redirect_to_shm_mq(dsm_segment *seg, shm_mq_handle *mqh)
Definition pqmq.c:56
static int fb(int x)
int SendProcSignal(pid_t pid, ProcSignalReason reason, ProcNumber procNumber)
Definition procsignal.c:287
@ PROCSIG_REPACK_MESSAGE
Definition procsignal.h:39
#define RelFileLocatorEquals(locator1, locator2)
void DecodingWorkerFileName(char *fname, Oid relid, uint32 seq)
Definition repack.c:3494
static void RepackWorkerShutdown(int code, Datum arg)
static void repack_cleanup_logical_decoding(LogicalDecodingContext *ctx)
static RelFileLocator repacked_rel_toast_locator
bool AmRepackWorker(void)
static bool decode_concurrent_changes(LogicalDecodingContext *ctx, DecodingWorkerShared *shared)
bool change_useless_for_repack(XLogRecordBuffer *buf)
static XLogSegNo repack_current_segment
#define REPL_PLUGIN_NAME
static void export_initial_snapshot(Snapshot snapshot, DecodingWorkerShared *shared)
static LogicalDecodingContext * repack_setup_logical_decoding(Oid relid)
static RelFileLocator repacked_rel_locator
void RepackWorkerMain(Datum main_arg)
static bool am_repack_worker
ResourceOwner CurrentResourceOwner
Definition resowner.c:173
void SharedFileSetAttach(SharedFileSet *fileset, dsm_segment *seg)
void shm_mq_set_sender(shm_mq *mq, PGPROC *proc)
Definition shm_mq.c:226
shm_mq_handle * shm_mq_attach(shm_mq *mq, dsm_segment *seg, BackgroundWorkerHandle *handle)
Definition shm_mq.c:292
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition slot.c:374
void ReplicationSlotDropAcquired(void)
Definition slot.c:1029
void CheckSlotPermissions(void)
Definition slot.c:1674
@ RS_TEMPORARY
Definition slot.h:47
Snapshot SnapBuildInitialSnapshot(SnapBuild *builder)
Definition snapbuild.c:458
void SerializeSnapshot(Snapshot snapshot, char *start_address)
Definition snapmgr.c:1736
Size EstimateSnapshotSpace(Snapshot snapshot)
Definition snapmgr.c:1712
void InvalidateCatalogSnapshot(void)
Definition snapmgr.c:455
static void SpinLockRelease(volatile slock_t *lock)
Definition spin.h:62
static void SpinLockAcquire(volatile slock_t *lock)
Definition spin.h:56
PGPROC * MyProc
Definition proc.c:71
bool BecomeLockGroupMember(PGPROC *leader, int pid)
Definition proc.c:2072
ConditionVariable cv
char error_queue[FLEXIBLE_ARRAY_MEMBER]
XLogReaderState * reader
Definition logical.h:42
MemoryContext context
Definition logical.h:36
void * output_writer_private
Definition logical.h:81
RelFileNumber relNumber
RelFileLocator rd_locator
Definition rel.h:57
Form_pg_class rd_rel
Definition rel.h:111
XLogPageReadCB page_read
Definition xlogreader.h:94
DecodedXLogRecord * record
Definition xlogreader.h:235
XLogRecPtr EndRecPtr
Definition xlogreader.h:206
XLogReaderRoutine routine
Definition xlogreader.h:179
void * private_data
Definition xlogreader.h:195
Definition c.h:830
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
#define TransactionIdIsValid(xid)
Definition transam.h:41
bool XactReadOnly
Definition xact.c:84
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:443
void StartTransactionCommand(void)
Definition xact.c:3109
int XactIsoLevel
Definition xact.c:81
void CommitTransactionCommand(void)
Definition xact.c:3207
#define XACT_REPEATABLE_READ
Definition xact.h:38
int wal_segment_size
Definition xlog.c:150
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
uint64 XLogSegNo
Definition xlogdefs.h:52
bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum, Buffer *prefetch_buffer)
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition xlogreader.c:391
#define XL_ROUTINE(...)
Definition xlogreader.h:117
void wal_segment_close(XLogReaderState *state)
Definition xlogutils.c:831
void wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p)
Definition xlogutils.c:806
int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
Definition xlogutils.c:845
int read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
Definition xlogutils.c:857
WaitLSNResult WaitForLSN(WaitLSNType lsnType, XLogRecPtr targetLSN, int64 timeout)
Definition xlogwait.c:378
WaitLSNResult
Definition xlogwait.h:26
@ WAIT_LSN_RESULT_TIMEOUT
Definition xlogwait.h:30
@ WAIT_LSN_RESULT_SUCCESS
Definition xlogwait.h:27
@ WAIT_LSN_TYPE_PRIMARY_FLUSH
Definition xlogwait.h:44