PostgreSQL Source Code  git master
worker_internal.h File Reference
#include <signal.h>
#include "access/xlogdefs.h"
#include "catalog/pg_subscription.h"
#include "datatype/timestamp.h"
#include "storage/lock.h"
#include "storage/spin.h"
Include dependency graph for worker_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  LogicalRepWorker
 

Typedefs

typedef struct LogicalRepWorker LogicalRepWorker
 

Functions

void logicalrep_worker_attach (int slot)
 
LogicalRepWorkerlogicalrep_worker_find (Oid subid, Oid relid, bool only_running)
 
Listlogicalrep_workers_find (Oid subid, bool only_running)
 
void logicalrep_worker_launch (Oid dbid, Oid subid, const char *subname, Oid userid, Oid relid)
 
void logicalrep_worker_stop (Oid subid, Oid relid)
 
void logicalrep_worker_wakeup (Oid subid, Oid relid)
 
void logicalrep_worker_wakeup_ptr (LogicalRepWorker *worker)
 
int logicalrep_sync_worker_count (Oid subid)
 
void ReplicationOriginNameForTablesync (Oid suboid, Oid relid, char *originname, int szorgname)
 
char * LogicalRepSyncTableStart (XLogRecPtr *origin_startpos)
 
bool AllTablesyncsReady (void)
 
void UpdateTwoPhaseState (Oid suboid, char new_state)
 
void process_syncing_tables (XLogRecPtr current_lsn)
 
void invalidate_syncing_table_states (Datum arg, int cacheid, uint32 hashvalue)
 
static bool am_tablesync_worker (void)
 

Variables

MemoryContext ApplyContext
 
struct WalReceiverConnLogRepWorkerWalRcvConn
 
SubscriptionMySubscription
 
LogicalRepWorkerMyLogicalRepWorker
 
bool in_remote_transaction
 

Typedef Documentation

◆ LogicalRepWorker

Function Documentation

◆ AllTablesyncsReady()

bool AllTablesyncsReady ( void  )

Definition at line 1230 of file tablesync.c.

References CommitTransactionCommand(), FetchTableStates(), list_length(), and pgstat_report_stat().

Referenced by ApplyWorkerMain(), and process_syncing_tables_for_apply().

1231 {
1232  bool started_tx = false;
1233  bool has_subrels = false;
1234 
1235  /* We need up-to-date sync state info for subscription tables here. */
1236  has_subrels = FetchTableStates(&started_tx);
1237 
1238  if (started_tx)
1239  {
1241  pgstat_report_stat(false);
1242  }
1243 
1244  /*
1245  * Return false when there are no tables in subscription or not all tables
1246  * are in ready state; true otherwise.
1247  */
1248  return has_subrels && list_length(table_states_not_ready) == 0;
1249 }
static bool FetchTableStates(bool *started_tx)
Definition: tablesync.c:1169
void CommitTransactionCommand(void)
Definition: xact.c:2939
void pgstat_report_stat(bool disconnect)
Definition: pgstat.c:843
static List * table_states_not_ready
Definition: tablesync.c:121
static int list_length(const List *l)
Definition: pg_list.h:149

◆ am_tablesync_worker()

static bool am_tablesync_worker ( void  )
inlinestatic

Definition at line 97 of file worker_internal.h.

References OidIsValid, and LogicalRepWorker::relid.

Referenced by apply_handle_begin_prepare(), apply_handle_origin(), apply_handle_stream_prepare(), ApplyWorkerMain(), process_syncing_tables(), and should_apply_changes_for_rel().

98 {
100 }
#define OidIsValid(objectId)
Definition: c.h:710
LogicalRepWorker * MyLogicalRepWorker
Definition: launcher.c:57

◆ invalidate_syncing_table_states()

void invalidate_syncing_table_states ( Datum  arg,
int  cacheid,
uint32  hashvalue 
)

Definition at line 268 of file tablesync.c.

References table_states_valid.

Referenced by ApplyWorkerMain().

269 {
270  table_states_valid = false;
271 }
static bool table_states_valid
Definition: tablesync.c:120

◆ logicalrep_sync_worker_count()

int logicalrep_sync_worker_count ( Oid  subid)

Definition at line 659 of file launcher.c.

References Assert, i, LWLockHeldByMe(), max_logical_replication_workers, OidIsValid, LogicalRepWorker::relid, LogicalRepWorker::subid, and LogicalRepCtxStruct::workers.

Referenced by logicalrep_worker_launch(), and process_syncing_tables_for_apply().

660 {
661  int i;
662  int res = 0;
663 
664  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
665 
666  /* Search for attached worker for a given subscription id. */
667  for (i = 0; i < max_logical_replication_workers; i++)
668  {
670 
671  if (w->subid == subid && OidIsValid(w->relid))
672  res++;
673  }
674 
675  return res;
676 }
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]
Definition: launcher.c:65
#define OidIsValid(objectId)
Definition: c.h:710
#define Assert(condition)
Definition: c.h:804
int max_logical_replication_workers
Definition: launcher.c:54
LogicalRepCtxStruct * LogicalRepCtx
Definition: launcher.c:68
int i

◆ logicalrep_worker_attach()

void logicalrep_worker_attach ( int  slot)

Definition at line 564 of file launcher.c.

References Assert, before_shmem_exit(), ereport, errcode(), errmsg(), ERROR, LogicalRepWorker::in_use, logicalrep_worker_onexit(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_logical_replication_workers, MyProc, LogicalRepWorker::proc, and LogicalRepCtxStruct::workers.

Referenced by ApplyWorkerMain().

565 {
566  /* Block concurrent access. */
567  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
568 
569  Assert(slot >= 0 && slot < max_logical_replication_workers);
571 
573  {
574  LWLockRelease(LogicalRepWorkerLock);
575  ereport(ERROR,
576  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
577  errmsg("logical replication worker slot %d is empty, cannot attach",
578  slot)));
579  }
580 
582  {
583  LWLockRelease(LogicalRepWorkerLock);
584  ereport(ERROR,
585  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
586  errmsg("logical replication worker slot %d is already used by "
587  "another worker, cannot attach", slot)));
588  }
589 
592 
593  LWLockRelease(LogicalRepWorkerLock);
594 }
PGPROC * MyProc
Definition: proc.c:68
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]
Definition: launcher.c:65
int errcode(int sqlerrcode)
Definition: elog.c:698
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
LogicalRepWorker * MyLogicalRepWorker
Definition: launcher.c:57
#define ERROR
Definition: elog.h:46
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:333
static void logicalrep_worker_onexit(int code, Datum arg)
Definition: launcher.c:643
uintptr_t Datum
Definition: postgres.h:411
#define ereport(elevel,...)
Definition: elog.h:157
#define Assert(condition)
Definition: c.h:804
int max_logical_replication_workers
Definition: launcher.c:54
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
int errmsg(const char *fmt,...)
Definition: elog.c:909
LogicalRepCtxStruct * LogicalRepCtx
Definition: launcher.c:68

◆ logicalrep_worker_find()

LogicalRepWorker* logicalrep_worker_find ( Oid  subid,
Oid  relid,
bool  only_running 
)

Definition at line 215 of file launcher.c.

References Assert, i, LogicalRepWorker::in_use, LWLockHeldByMe(), max_logical_replication_workers, LogicalRepWorker::proc, LogicalRepWorker::relid, LogicalRepWorker::subid, and LogicalRepCtxStruct::workers.

Referenced by ApplyLauncherMain(), logicalrep_worker_stop(), logicalrep_worker_wakeup(), process_syncing_tables_for_apply(), wait_for_relation_state_change(), and wait_for_worker_state_change().

216 {
217  int i;
218  LogicalRepWorker *res = NULL;
219 
220  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
221 
222  /* Search for attached worker for a given subscription id. */
223  for (i = 0; i < max_logical_replication_workers; i++)
224  {
226 
227  if (w->in_use && w->subid == subid && w->relid == relid &&
228  (!only_running || w->proc))
229  {
230  res = w;
231  break;
232  }
233  }
234 
235  return res;
236 }
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]
Definition: launcher.c:65
#define Assert(condition)
Definition: c.h:804
int max_logical_replication_workers
Definition: launcher.c:54
LogicalRepCtxStruct * LogicalRepCtx
Definition: launcher.c:68
int i

◆ logicalrep_worker_launch()

void logicalrep_worker_launch ( Oid  dbid,
Oid  subid,
const char *  subname,
Oid  userid,
Oid  relid 
)

Definition at line 266 of file launcher.c.

References Assert, BackgroundWorker::bgw_flags, BackgroundWorker::bgw_function_name, BackgroundWorker::bgw_library_name, BackgroundWorker::bgw_main_arg, BGW_MAXLEN, BackgroundWorker::bgw_name, BGW_NEVER_RESTART, BackgroundWorker::bgw_notify_pid, BackgroundWorker::bgw_restart_time, BackgroundWorker::bgw_start_time, BackgroundWorker::bgw_type, BGWORKER_BACKEND_DATABASE_CONNECTION, BGWORKER_SHMEM_ACCESS, BgWorkerStart_RecoveryFinished, LogicalRepWorker::dbid, DEBUG1, elog, ereport, errcode(), errhint(), errmsg(), errmsg_internal(), ERROR, LogicalRepWorker::generation, GetCurrentTimestamp(), i, LogicalRepWorker::in_use, Int32GetDatum, InvalidXLogRecPtr, LogicalRepWorker::last_lsn, LogicalRepWorker::last_recv_time, LogicalRepWorker::last_send_time, LogicalRepWorker::launch_time, logicalrep_sync_worker_count(), logicalrep_worker_cleanup(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_logical_replication_workers, max_replication_slots, max_sync_workers_per_subscription, MyProcPid, now(), OidIsValid, LogicalRepWorker::proc, RegisterDynamicBackgroundWorker(), LogicalRepWorker::relid, LogicalRepWorker::relstate, LogicalRepWorker::relstate_lsn, LogicalRepWorker::reply_lsn, LogicalRepWorker::reply_time, snprintf, LogicalRepWorker::subid, TIMESTAMP_NOBEGIN, TimestampDifferenceExceeds(), LogicalRepWorker::userid, WaitForReplicationWorkerAttach(), wal_receiver_timeout, WARNING, and LogicalRepCtxStruct::workers.

Referenced by ApplyLauncherMain(), and process_syncing_tables_for_apply().

268 {
269  BackgroundWorker bgw;
270  BackgroundWorkerHandle *bgw_handle;
271  uint16 generation;
272  int i;
273  int slot = 0;
274  LogicalRepWorker *worker = NULL;
275  int nsyncworkers;
277 
278  ereport(DEBUG1,
279  (errmsg_internal("starting logical replication worker for subscription \"%s\"",
280  subname)));
281 
282  /* Report this after the initial starting message for consistency. */
283  if (max_replication_slots == 0)
284  ereport(ERROR,
285  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
286  errmsg("cannot start logical replication workers when max_replication_slots = 0")));
287 
288  /*
289  * We need to do the modification of the shared memory under lock so that
290  * we have consistent view.
291  */
292  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
293 
294 retry:
295  /* Find unused worker slot. */
296  for (i = 0; i < max_logical_replication_workers; i++)
297  {
299 
300  if (!w->in_use)
301  {
302  worker = w;
303  slot = i;
304  break;
305  }
306  }
307 
308  nsyncworkers = logicalrep_sync_worker_count(subid);
309 
310  now = GetCurrentTimestamp();
311 
312  /*
313  * If we didn't find a free slot, try to do garbage collection. The
314  * reason we do this is because if some worker failed to start up and its
315  * parent has crashed while waiting, the in_use state was never cleared.
316  */
317  if (worker == NULL || nsyncworkers >= max_sync_workers_per_subscription)
318  {
319  bool did_cleanup = false;
320 
321  for (i = 0; i < max_logical_replication_workers; i++)
322  {
324 
325  /*
326  * If the worker was marked in use but didn't manage to attach in
327  * time, clean it up.
328  */
329  if (w->in_use && !w->proc &&
332  {
333  elog(WARNING,
334  "logical replication worker for subscription %u took too long to start; canceled",
335  w->subid);
336 
338  did_cleanup = true;
339  }
340  }
341 
342  if (did_cleanup)
343  goto retry;
344  }
345 
346  /*
347  * If we reached the sync worker limit per subscription, just exit
348  * silently as we might get here because of an otherwise harmless race
349  * condition.
350  */
351  if (nsyncworkers >= max_sync_workers_per_subscription)
352  {
353  LWLockRelease(LogicalRepWorkerLock);
354  return;
355  }
356 
357  /*
358  * However if there are no more free worker slots, inform user about it
359  * before exiting.
360  */
361  if (worker == NULL)
362  {
363  LWLockRelease(LogicalRepWorkerLock);
365  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
366  errmsg("out of logical replication worker slots"),
367  errhint("You might need to increase max_logical_replication_workers.")));
368  return;
369  }
370 
371  /* Prepare the worker slot. */
372  worker->launch_time = now;
373  worker->in_use = true;
374  worker->generation++;
375  worker->proc = NULL;
376  worker->dbid = dbid;
377  worker->userid = userid;
378  worker->subid = subid;
379  worker->relid = relid;
380  worker->relstate = SUBREL_STATE_UNKNOWN;
382  worker->last_lsn = InvalidXLogRecPtr;
385  worker->reply_lsn = InvalidXLogRecPtr;
386  TIMESTAMP_NOBEGIN(worker->reply_time);
387 
388  /* Before releasing lock, remember generation for future identification. */
389  generation = worker->generation;
390 
391  LWLockRelease(LogicalRepWorkerLock);
392 
393  /* Register the new dynamic worker. */
394  memset(&bgw, 0, sizeof(bgw));
398  snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres");
399  snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ApplyWorkerMain");
400  if (OidIsValid(relid))
402  "logical replication worker for subscription %u sync %u", subid, relid);
403  else
405  "logical replication worker for subscription %u", subid);
406  snprintf(bgw.bgw_type, BGW_MAXLEN, "logical replication worker");
407 
410  bgw.bgw_main_arg = Int32GetDatum(slot);
411 
412  if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
413  {
414  /* Failed to start worker, so clean up the worker slot. */
415  LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
416  Assert(generation == worker->generation);
418  LWLockRelease(LogicalRepWorkerLock);
419 
421  (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
422  errmsg("out of background worker slots"),
423  errhint("You might need to increase max_worker_processes.")));
424  return;
425  }
426 
427  /* Now wait until it attaches. */
428  WaitForReplicationWorkerAttach(worker, generation, bgw_handle);
429 }
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
int MyProcPid
Definition: globals.c:43
int errhint(const char *fmt,...)
Definition: elog.c:1156
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
int64 TimestampTz
Definition: timestamp.h:39
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]
Definition: launcher.c:65
TimestampTz last_send_time
XLogRecPtr last_lsn
int bgw_restart_time
Definition: bgworker.h:94
int errcode(int sqlerrcode)
Definition: elog.c:698
NameData subname
#define BGWORKER_SHMEM_ACCESS
Definition: bgworker.h:52
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1711
#define OidIsValid(objectId)
Definition: c.h:710
char bgw_function_name[BGW_MAXLEN]
Definition: bgworker.h:96
int wal_receiver_timeout
Definition: walreceiver.c:90
XLogRecPtr relstate_lsn
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
Datum bgw_main_arg
Definition: bgworker.h:97
unsigned short uint16
Definition: c.h:440
#define ERROR
Definition: elog.h:46
int max_sync_workers_per_subscription
Definition: launcher.c:55
XLogRecPtr reply_lsn
static void logicalrep_worker_cleanup(LogicalRepWorker *worker)
Definition: launcher.c:614
#define BGW_NEVER_RESTART
Definition: bgworker.h:84
#define TIMESTAMP_NOBEGIN(j)
Definition: timestamp.h:112
#define WARNING
Definition: elog.h:40
int logicalrep_sync_worker_count(Oid subid)
Definition: launcher.c:659
static void WaitForReplicationWorkerAttach(LogicalRepWorker *worker, uint16 generation, BackgroundWorkerHandle *handle)
Definition: launcher.c:157
#define ereport(elevel,...)
Definition: elog.h:157
TimestampTz launch_time
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996
int max_replication_slots
Definition: slot.c:99
TimestampTz last_recv_time
char bgw_name[BGW_MAXLEN]
Definition: bgworker.h:90
#define Assert(condition)
Definition: c.h:804
#define BGWORKER_BACKEND_DATABASE_CONNECTION
Definition: bgworker.h:59
int max_logical_replication_workers
Definition: launcher.c:54
#define BGW_MAXLEN
Definition: bgworker.h:85
BgWorkerStartTime bgw_start_time
Definition: bgworker.h:93
bool RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle)
Definition: bgworker.c:973
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
#define Int32GetDatum(X)
Definition: postgres.h:523
char bgw_type[BGW_MAXLEN]
Definition: bgworker.h:91
int errmsg(const char *fmt,...)
Definition: elog.c:909
pid_t bgw_notify_pid
Definition: bgworker.h:99
#define elog(elevel,...)
Definition: elog.h:232
LogicalRepCtxStruct * LogicalRepCtx
Definition: launcher.c:68
int i
char bgw_library_name[BGW_MAXLEN]
Definition: bgworker.h:95
#define snprintf
Definition: port.h:216
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1544
TimestampTz reply_time

◆ logicalrep_worker_stop()

void logicalrep_worker_stop ( Oid  subid,
Oid  relid 
)

Definition at line 436 of file launcher.c.

References CHECK_FOR_INTERRUPTS, LogicalRepWorker::generation, LogicalRepWorker::in_use, kill, logicalrep_worker_find(), LW_SHARED, LWLockAcquire(), LWLockRelease(), MyLatch, PGPROC::pid, LogicalRepWorker::proc, ResetLatch(), WAIT_EVENT_BGWORKER_SHUTDOWN, WAIT_EVENT_BGWORKER_STARTUP, WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by AlterSubscription_refresh(), and DropSubscription().

437 {
438  LogicalRepWorker *worker;
439  uint16 generation;
440 
441  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
442 
443  worker = logicalrep_worker_find(subid, relid, false);
444 
445  /* No worker, nothing to do. */
446  if (!worker)
447  {
448  LWLockRelease(LogicalRepWorkerLock);
449  return;
450  }
451 
452  /*
453  * Remember which generation was our worker so we can check if what we see
454  * is still the same one.
455  */
456  generation = worker->generation;
457 
458  /*
459  * If we found a worker but it does not have proc set then it is still
460  * starting up; wait for it to finish starting and then kill it.
461  */
462  while (worker->in_use && !worker->proc)
463  {
464  int rc;
465 
466  LWLockRelease(LogicalRepWorkerLock);
467 
468  /* Wait a bit --- we don't expect to have to wait long. */
469  rc = WaitLatch(MyLatch,
472 
473  if (rc & WL_LATCH_SET)
474  {
477  }
478 
479  /* Recheck worker status. */
480  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
481 
482  /*
483  * Check whether the worker slot is no longer used, which would mean
484  * that the worker has exited, or whether the worker generation is
485  * different, meaning that a different worker has taken the slot.
486  */
487  if (!worker->in_use || worker->generation != generation)
488  {
489  LWLockRelease(LogicalRepWorkerLock);
490  return;
491  }
492 
493  /* Worker has assigned proc, so it has started. */
494  if (worker->proc)
495  break;
496  }
497 
498  /* Now terminate the worker ... */
499  kill(worker->proc->pid, SIGTERM);
500 
501  /* ... and wait for it to die. */
502  for (;;)
503  {
504  int rc;
505 
506  /* is it gone? */
507  if (!worker->proc || worker->generation != generation)
508  break;
509 
510  LWLockRelease(LogicalRepWorkerLock);
511 
512  /* Wait a bit --- we don't expect to have to wait long. */
513  rc = WaitLatch(MyLatch,
514  WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
516 
517  if (rc & WL_LATCH_SET)
518  {
521  }
522 
523  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
524  }
525 
526  LWLockRelease(LogicalRepWorkerLock);
527 }
#define WL_TIMEOUT
Definition: latch.h:128
#define kill(pid, sig)
Definition: win32_port.h:454
void ResetLatch(Latch *latch)
Definition: latch.c:660
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:452
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
unsigned short uint16
Definition: c.h:440
LogicalRepWorker * logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
Definition: launcher.c:215
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199
struct Latch * MyLatch
Definition: globals.c:57
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
int pid
Definition: proc.h:146
#define WL_LATCH_SET
Definition: latch.h:125
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130

◆ logicalrep_worker_wakeup()

void logicalrep_worker_wakeup ( Oid  subid,
Oid  relid 
)

Definition at line 533 of file launcher.c.

References logicalrep_worker_find(), logicalrep_worker_wakeup_ptr(), LW_SHARED, LWLockAcquire(), and LWLockRelease().

Referenced by pg_attribute_noreturn().

534 {
535  LogicalRepWorker *worker;
536 
537  LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
538 
539  worker = logicalrep_worker_find(subid, relid, true);
540 
541  if (worker)
543 
544  LWLockRelease(LogicalRepWorkerLock);
545 }
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1803
void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker)
Definition: launcher.c:553
LogicalRepWorker * logicalrep_worker_find(Oid subid, Oid relid, bool only_running)
Definition: launcher.c:215
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1199

◆ logicalrep_worker_wakeup_ptr()

void logicalrep_worker_wakeup_ptr ( LogicalRepWorker worker)

Definition at line 553 of file launcher.c.

References Assert, LWLockHeldByMe(), LogicalRepWorker::proc, PGPROC::procLatch, and SetLatch().

Referenced by logicalrep_worker_wakeup(), process_syncing_tables_for_apply(), and wait_for_worker_state_change().

554 {
555  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
556 
557  SetLatch(&worker->proc->procLatch);
558 }
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
void SetLatch(Latch *latch)
Definition: latch.c:567
Latch procLatch
Definition: proc.h:130
#define Assert(condition)
Definition: c.h:804

◆ logicalrep_workers_find()

List* logicalrep_workers_find ( Oid  subid,
bool  only_running 
)

Definition at line 243 of file launcher.c.

References Assert, i, LogicalRepWorker::in_use, lappend(), LWLockHeldByMe(), max_logical_replication_workers, NIL, LogicalRepWorker::proc, LogicalRepWorker::subid, and LogicalRepCtxStruct::workers.

Referenced by DropSubscription().

244 {
245  int i;
246  List *res = NIL;
247 
248  Assert(LWLockHeldByMe(LogicalRepWorkerLock));
249 
250  /* Search for attached worker for a given subscription id. */
251  for (i = 0; i < max_logical_replication_workers; i++)
252  {
254 
255  if (w->in_use && w->subid == subid && (!only_running || w->proc))
256  res = lappend(res, w);
257  }
258 
259  return res;
260 }
#define NIL
Definition: pg_list.h:65
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1919
LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]
Definition: launcher.c:65
List * lappend(List *list, void *datum)
Definition: list.c:336
#define Assert(condition)
Definition: c.h:804
int max_logical_replication_workers
Definition: launcher.c:54
LogicalRepCtxStruct * LogicalRepCtx
Definition: launcher.c:68
int i
Definition: pg_list.h:50

◆ LogicalRepSyncTableStart()

char* LogicalRepSyncTableStart ( XLogRecPtr origin_startpos)

Definition at line 920 of file tablesync.c.

References Assert, CommandCounterIncrement(), CommitTransactionCommand(), Subscription::conninfo, copy_table(), CRS_USE_SNAPSHOT, DEBUG1, elog, ereport, WalRcvExecResult::err, errcode(), ERRCODE_DUPLICATE_OBJECT, errmsg(), ERROR, GetSubscriptionRelState(), GetTransactionSnapshot(), HOLD_INTERRUPTS, InvalidXLogRecPtr, LockRelationOid(), LogRepWorkerWalRcvConn, LSN_FORMAT_ARGS, MyLogicalRepWorker, MySubscription, NAMEDATALEN, NoLock, Subscription::oid, OidIsValid, palloc(), pgstat_report_stat(), PopActiveSnapshot(), PushActiveSnapshot(), LogicalRepWorker::relid, LogicalRepWorker::relmutex, LogicalRepWorker::relstate, LogicalRepWorker::relstate_lsn, ReplicationOriginNameForTablesync(), ReplicationSlotDropAtPubNode(), ReplicationSlotNameForTablesync(), replorigin_advance(), replorigin_by_name(), replorigin_create(), replorigin_session_get_progress(), replorigin_session_origin, replorigin_session_setup(), RESUME_INTERRUPTS, RowExclusiveLock, SpinLockAcquire, SpinLockRelease, StartTransactionCommand(), WalRcvExecResult::status, LogicalRepWorker::subid, table_close(), table_open(), UnlockRelationOid(), UpdateSubscriptionRelState(), wait_for_worker_state_change(), walrcv_clear_result(), walrcv_connect, walrcv_create_slot, walrcv_exec, and WALRCV_OK_COMMAND.

Referenced by ApplyWorkerMain().

921 {
922  char *slotname;
923  char *err;
924  char relstate;
925  XLogRecPtr relstate_lsn;
926  Relation rel;
927  WalRcvExecResult *res;
928  char originname[NAMEDATALEN];
929  RepOriginId originid;
930 
931  /* Check the state of the table synchronization. */
935  &relstate_lsn);
937 
939  MyLogicalRepWorker->relstate = relstate;
940  MyLogicalRepWorker->relstate_lsn = relstate_lsn;
942 
943  /*
944  * If synchronization is already done or no longer necessary, exit now
945  * that we've updated shared memory state.
946  */
947  switch (relstate)
948  {
949  case SUBREL_STATE_SYNCDONE:
950  case SUBREL_STATE_READY:
951  case SUBREL_STATE_UNKNOWN:
952  finish_sync_worker(); /* doesn't return */
953  }
954 
955  /* Calculate the name of the tablesync slot. */
956  slotname = (char *) palloc(NAMEDATALEN);
959  slotname,
960  NAMEDATALEN);
961 
962  /*
963  * Here we use the slot name instead of the subscription name as the
964  * application_name, so that it is different from the main apply worker,
965  * so that synchronous replication can distinguish them.
966  */
968  walrcv_connect(MySubscription->conninfo, true, slotname, &err);
969  if (LogRepWorkerWalRcvConn == NULL)
970  ereport(ERROR,
971  (errcode(ERRCODE_CONNECTION_FAILURE),
972  errmsg("could not connect to the publisher: %s", err)));
973 
974  Assert(MyLogicalRepWorker->relstate == SUBREL_STATE_INIT ||
975  MyLogicalRepWorker->relstate == SUBREL_STATE_DATASYNC ||
976  MyLogicalRepWorker->relstate == SUBREL_STATE_FINISHEDCOPY);
977 
978  /* Assign the origin tracking record name. */
981  originname,
982  sizeof(originname));
983 
984  if (MyLogicalRepWorker->relstate == SUBREL_STATE_DATASYNC)
985  {
986  /*
987  * We have previously errored out before finishing the copy so the
988  * replication slot might exist. We want to remove the slot if it
989  * already exists and proceed.
990  *
991  * XXX We could also instead try to drop the slot, last time we failed
992  * but for that, we might need to clean up the copy state as it might
993  * be in the middle of fetching the rows. Also, if there is a network
994  * breakdown then it wouldn't have succeeded so trying it next time
995  * seems like a better bet.
996  */
998  }
999  else if (MyLogicalRepWorker->relstate == SUBREL_STATE_FINISHEDCOPY)
1000  {
1001  /*
1002  * The COPY phase was previously done, but tablesync then crashed
1003  * before it was able to finish normally.
1004  */
1006 
1007  /*
1008  * The origin tracking name must already exist. It was created first
1009  * time this tablesync was launched.
1010  */
1011  originid = replorigin_by_name(originname, false);
1012  replorigin_session_setup(originid);
1013  replorigin_session_origin = originid;
1014  *origin_startpos = replorigin_session_get_progress(false);
1015 
1017 
1018  goto copy_table_done;
1019  }
1020 
1022  MyLogicalRepWorker->relstate = SUBREL_STATE_DATASYNC;
1025 
1026  /* Update the state and make it visible to others. */
1033  pgstat_report_stat(false);
1034 
1036 
1037  /*
1038  * Use a standard write lock here. It might be better to disallow access
1039  * to the table while it's being synchronized. But we don't want to block
1040  * the main apply process from working and it has to open the relation in
1041  * RowExclusiveLock when remapping remote relation id to local one.
1042  */
1044 
1045  /*
1046  * Start a transaction in the remote node in REPEATABLE READ mode. This
1047  * ensures that both the replication slot we create (see below) and the
1048  * COPY are consistent with each other.
1049  */
1051  "BEGIN READ ONLY ISOLATION LEVEL REPEATABLE READ",
1052  0, NULL);
1053  if (res->status != WALRCV_OK_COMMAND)
1054  ereport(ERROR,
1055  (errcode(ERRCODE_CONNECTION_FAILURE),
1056  errmsg("table copy could not start transaction on publisher: %s",
1057  res->err)));
1058  walrcv_clear_result(res);
1059 
1060  /*
1061  * Create a new permanent logical decoding slot. This slot will be used
1062  * for the catchup phase after COPY is done, so tell it to use the
1063  * snapshot to make the final data consistent.
1064  *
1065  * Prevent cancel/die interrupts while creating slot here because it is
1066  * possible that before the server finishes this command, a concurrent
1067  * drop subscription happens which would complete without removing this
1068  * slot leading to a dangling slot on the server.
1069  */
1070  HOLD_INTERRUPTS();
1072  slotname, false /* permanent */ , false /* two_phase */ ,
1073  CRS_USE_SNAPSHOT, origin_startpos);
1075 
1076  /*
1077  * Setup replication origin tracking. The purpose of doing this before the
1078  * copy is to avoid doing the copy again due to any error in setting up
1079  * origin tracking.
1080  */
1081  originid = replorigin_by_name(originname, true);
1082  if (!OidIsValid(originid))
1083  {
1084  /*
1085  * Origin tracking does not exist, so create it now.
1086  *
1087  * Then advance to the LSN got from walrcv_create_slot. This is WAL
1088  * logged for the purpose of recovery. Locks are to prevent the
1089  * replication origin from vanishing while advancing.
1090  */
1091  originid = replorigin_create(originname);
1092 
1093  LockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1094  replorigin_advance(originid, *origin_startpos, InvalidXLogRecPtr,
1095  true /* go backward */ , true /* WAL log */ );
1096  UnlockRelationOid(ReplicationOriginRelationId, RowExclusiveLock);
1097 
1098  replorigin_session_setup(originid);
1099  replorigin_session_origin = originid;
1100  }
1101  else
1102  {
1103  ereport(ERROR,
1105  errmsg("replication origin \"%s\" already exists",
1106  originname)));
1107  }
1108 
1109  /* Now do the initial data copy */
1111  copy_table(rel);
1113 
1114  res = walrcv_exec(LogRepWorkerWalRcvConn, "COMMIT", 0, NULL);
1115  if (res->status != WALRCV_OK_COMMAND)
1116  ereport(ERROR,
1117  (errcode(ERRCODE_CONNECTION_FAILURE),
1118  errmsg("table copy could not finish transaction on publisher: %s",
1119  res->err)));
1120  walrcv_clear_result(res);
1121 
1122  table_close(rel, NoLock);
1123 
1124  /* Make the copy visible. */
1126 
1127  /*
1128  * Update the persisted state to indicate the COPY phase is done; make it
1129  * visible to others.
1130  */
1133  SUBREL_STATE_FINISHEDCOPY,
1135 
1137 
1138 copy_table_done:
1139 
1140  elog(DEBUG1,
1141  "LogicalRepSyncTableStart: '%s' origin_startpos lsn %X/%X",
1142  originname, LSN_FORMAT_ARGS(*origin_startpos));
1143 
1144  /*
1145  * We are done with the initial data synchronization, update the state.
1146  */
1148  MyLogicalRepWorker->relstate = SUBREL_STATE_SYNCWAIT;
1149  MyLogicalRepWorker->relstate_lsn = *origin_startpos;
1151 
1152  /*
1153  * Finally, wait until the main apply worker tells us to catch up and then
1154  * return to let LogicalRepApplyLoop do it.
1155  */
1156  wait_for_worker_state_change(SUBREL_STATE_CATCHUP);
1157  return slotname;
1158 }
Subscription * MySubscription
Definition: worker.c:246
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define DEBUG1
Definition: elog.h:25
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
WalReceiverConn * LogRepWorkerWalRcvConn
Definition: worker.c:244
void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok)
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:200
void CommitTransactionCommand(void)
Definition: xact.c:2939
RepOriginId replorigin_by_name(const char *roname, bool missing_ok)
Definition: origin.c:209
uint16 RepOriginId
Definition: xlogdefs.h:65
static void copy_table(Relation rel)
Definition: tablesync.c:808
XLogRecPtr replorigin_session_get_progress(bool flush)
Definition: origin.c:1206
int errcode(int sqlerrcode)
Definition: elog.c:698
RepOriginId replorigin_create(const char *roname)
Definition: origin.c:240
void PopActiveSnapshot(void)
Definition: snapmgr.c:759
void replorigin_advance(RepOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition: origin.c:872
void pgstat_report_stat(bool disconnect)
Definition: pgstat.c:843
void replorigin_session_setup(RepOriginId node)
Definition: origin.c:1071
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
#define OidIsValid(objectId)
Definition: c.h:710
XLogRecPtr relstate_lsn
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:133
#define NAMEDATALEN
#define SpinLockAcquire(lock)
Definition: spin.h:62
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
static void walrcv_clear_result(WalRcvExecResult *walres)
Definition: walreceiver.h:436
LogicalRepWorker * MyLogicalRepWorker
Definition: launcher.c:57
#define ERROR
Definition: elog.h:46
#define NoLock
Definition: lockdefs.h:34
void PushActiveSnapshot(Snapshot snap)
Definition: snapmgr.c:680
#define RowExclusiveLock
Definition: lockdefs.h:38
#define SpinLockRelease(lock)
Definition: spin.h:64
char GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn)
void CommandCounterIncrement(void)
Definition: xact.c:1021
void UpdateSubscriptionRelState(Oid subid, Oid relid, char state, XLogRecPtr sublsn)
#define ereport(elevel,...)
Definition: elog.h:157
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:804
WalRcvExecStatus status
Definition: walreceiver.h:216
RepOriginId replorigin_session_origin
Definition: origin.c:154
void StartTransactionCommand(void)
Definition: xact.c:2838
void ReplicationOriginNameForTablesync(Oid suboid, Oid relid, char *originname, int szorgname)
Definition: tablesync.c:905
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:131
#define elog(elevel,...)
Definition: elog.h:232
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:109
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
#define ERRCODE_DUPLICATE_OBJECT
Definition: streamutil.c:32
static bool wait_for_worker_state_change(char expected_state)
Definition: tablesync.c:219
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
Definition: walreceiver.h:430
void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslotname, int szslot)
Definition: tablesync.c:892
#define walrcv_create_slot(conn, slotname, temporary, two_phase, snapshot_action, lsn)
Definition: walreceiver.h:426
#define walrcv_connect(conninfo, logical, appname, err)
Definition: walreceiver.h:404

◆ process_syncing_tables()

void process_syncing_tables ( XLogRecPtr  current_lsn)

Definition at line 587 of file tablesync.c.

References am_tablesync_worker(), process_syncing_tables_for_apply(), and process_syncing_tables_for_sync().

Referenced by apply_handle_commit(), apply_handle_commit_prepared(), apply_handle_prepare(), apply_handle_rollback_prepared(), apply_handle_stream_commit(), apply_handle_stream_prepare(), and LogicalRepApplyLoop().

588 {
589  if (am_tablesync_worker())
590  process_syncing_tables_for_sync(current_lsn);
591  else
593 }
static void process_syncing_tables_for_apply(XLogRecPtr current_lsn)
Definition: tablesync.c:362
static bool am_tablesync_worker(void)
static void process_syncing_tables_for_sync(XLogRecPtr current_lsn)
Definition: tablesync.c:282

◆ ReplicationOriginNameForTablesync()

void ReplicationOriginNameForTablesync ( Oid  suboid,
Oid  relid,
char *  originname,
int  szorgname 
)

Definition at line 905 of file tablesync.c.

References snprintf.

Referenced by AlterSubscription_refresh(), DropSubscription(), LogicalRepSyncTableStart(), and process_syncing_tables_for_apply().

907 {
908  snprintf(originname, szorgname, "pg_%u_%u", suboid, relid);
909 }
#define snprintf
Definition: port.h:216

◆ UpdateTwoPhaseState()

void UpdateTwoPhaseState ( Oid  suboid,
char  new_state 
)

Definition at line 1255 of file tablesync.c.

References Assert, CatalogTupleUpdate(), CharGetDatum, elog, ERROR, heap_freetuple(), heap_modify_tuple(), HeapTupleIsValid, LOGICALREP_TWOPHASE_STATE_DISABLED, LOGICALREP_TWOPHASE_STATE_ENABLED, LOGICALREP_TWOPHASE_STATE_PENDING, ObjectIdGetDatum, RelationGetDescr, RowExclusiveLock, SearchSysCacheCopy1, SUBSCRIPTIONOID, HeapTupleData::t_self, table_close(), table_open(), and values.

Referenced by ApplyWorkerMain(), and CreateSubscription().

1256 {
1257  Relation rel;
1258  HeapTuple tup;
1259  bool nulls[Natts_pg_subscription];
1260  bool replaces[Natts_pg_subscription];
1261  Datum values[Natts_pg_subscription];
1262 
1264  new_state == LOGICALREP_TWOPHASE_STATE_PENDING ||
1265  new_state == LOGICALREP_TWOPHASE_STATE_ENABLED);
1266 
1267  rel = table_open(SubscriptionRelationId, RowExclusiveLock);
1269  if (!HeapTupleIsValid(tup))
1270  elog(ERROR,
1271  "cache lookup failed for subscription oid %u",
1272  suboid);
1273 
1274  /* Form a new tuple. */
1275  memset(values, 0, sizeof(values));
1276  memset(nulls, false, sizeof(nulls));
1277  memset(replaces, false, sizeof(replaces));
1278 
1279  /* And update/set two_phase state */
1280  values[Anum_pg_subscription_subtwophasestate - 1] = CharGetDatum(new_state);
1281  replaces[Anum_pg_subscription_subtwophasestate - 1] = true;
1282 
1283  tup = heap_modify_tuple(tup, RelationGetDescr(rel),
1284  values, nulls, replaces);
1285  CatalogTupleUpdate(rel, &tup->t_self, tup);
1286 
1287  heap_freetuple(tup);
1289 }
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
#define RelationGetDescr(relation)
Definition: rel.h:503
#define LOGICALREP_TWOPHASE_STATE_DISABLED
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
#define LOGICALREP_TWOPHASE_STATE_ENABLED
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define ERROR
Definition: elog.h:46
ItemPointerData t_self
Definition: htup.h:65
#define RowExclusiveLock
Definition: lockdefs.h:38
#define LOGICALREP_TWOPHASE_STATE_PENDING
uintptr_t Datum
Definition: postgres.h:411
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define Assert(condition)
Definition: c.h:804
void CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
Definition: indexing.c:301
#define CharGetDatum(X)
Definition: postgres.h:460
static Datum values[MAXATTR]
Definition: bootstrap.c:166
#define SearchSysCacheCopy1(cacheId, key1)
Definition: syscache.h:175
#define elog(elevel,...)
Definition: elog.h:232
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
HeapTuple heap_modify_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *replValues, bool *replIsnull, bool *doReplace)
Definition: heaptuple.c:1113

Variable Documentation

◆ ApplyContext

MemoryContext ApplyContext

Definition at line 239 of file worker.c.

Referenced by apply_handle_stream_start().

◆ in_remote_transaction

◆ LogRepWorkerWalRcvConn

◆ MyLogicalRepWorker

◆ MySubscription