PostgreSQL Source Code git master
Loading...
Searching...
No Matches
twophase.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * twophase.c
4 * Two-phase commit support functions.
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/access/transam/twophase.c
11 *
12 * NOTES
13 * Each global transaction is associated with a global transaction
14 * identifier (GID). The client assigns a GID to a postgres
15 * transaction with the PREPARE TRANSACTION command.
16 *
17 * We keep all active global transactions in a shared memory array.
18 * When the PREPARE TRANSACTION command is issued, the GID is
19 * reserved for the transaction in the array. This is done before
20 * a WAL entry is made, because the reservation checks for duplicate
21 * GIDs and aborts the transaction if there already is a global
22 * transaction in prepared state with the same GID.
23 *
24 * A global transaction (gxact) also has dummy PGPROC; this is what keeps
25 * the XID considered running by TransactionIdIsInProgress. It is also
26 * convenient as a PGPROC to hook the gxact's locks to.
27 *
28 * Information to recover prepared transactions in case of crash is
29 * now stored in WAL for the common case. In some cases there will be
30 * an extended period between preparing a GXACT and commit/abort, in
31 * which case we need to separately record prepared transaction data
32 * in permanent storage. This includes locking information, pending
33 * notifications etc. All that state information is written to the
34 * per-transaction state file in the pg_twophase directory.
35 * All prepared transactions will be written prior to shutdown.
36 *
37 * Life track of state data is following:
38 *
39 * * On PREPARE TRANSACTION backend writes state data only to the WAL and
40 * stores pointer to the start of the WAL record in
41 * gxact->prepare_start_lsn.
42 * * If COMMIT occurs before checkpoint then backend reads data from WAL
43 * using prepare_start_lsn.
44 * * On checkpoint state data copied to files in pg_twophase directory and
45 * fsynced
46 * * If COMMIT happens after checkpoint then backend reads state data from
47 * files
48 *
49 * During replay and replication, TwoPhaseState also holds information
50 * about active prepared transactions that haven't been moved to disk yet.
51 *
52 * Replay of twophase records happens by the following rules:
53 *
54 * * At the beginning of recovery, pg_twophase is scanned once, filling
55 * TwoPhaseState with entries marked with gxact->inredo and
56 * gxact->ondisk. Two-phase file data older than the XID horizon of
57 * the redo position are discarded.
58 * * On PREPARE redo, the transaction is added to TwoPhaseState->prepXacts.
59 * gxact->inredo is set to true for such entries.
60 * * On Checkpoint we iterate through TwoPhaseState->prepXacts entries
61 * that have gxact->inredo set and are behind the redo_horizon. We
62 * save them to disk and then switch gxact->ondisk to true.
63 * * On COMMIT/ABORT we delete the entry from TwoPhaseState->prepXacts.
64 * If gxact->ondisk is true, the corresponding entry from the disk
65 * is additionally deleted.
66 * * RecoverPreparedTransactions(), StandbyRecoverPreparedTransactions()
67 * and PrescanPreparedTransactions() have been modified to go through
68 * gxact->inredo entries that have not made it to disk.
69 *
70 *-------------------------------------------------------------------------
71 */
72#include "postgres.h"
73
74#include <fcntl.h>
75#include <sys/stat.h>
76#include <time.h>
77#include <unistd.h>
78
79#include "access/commit_ts.h"
80#include "access/htup_details.h"
81#include "access/subtrans.h"
82#include "access/transam.h"
83#include "access/twophase.h"
85#include "access/xact.h"
86#include "access/xlog.h"
87#include "access/xloginsert.h"
88#include "access/xlogreader.h"
89#include "access/xlogrecovery.h"
90#include "access/xlogutils.h"
91#include "catalog/pg_type.h"
92#include "catalog/storage.h"
93#include "funcapi.h"
94#include "miscadmin.h"
95#include "pg_trace.h"
96#include "pgstat.h"
97#include "replication/origin.h"
98#include "replication/syncrep.h"
99#include "storage/fd.h"
100#include "storage/ipc.h"
101#include "storage/md.h"
102#include "storage/predicate.h"
103#include "storage/proc.h"
104#include "storage/procarray.h"
105#include "storage/subsystems.h"
106#include "utils/builtins.h"
108#include "utils/memutils.h"
109#include "utils/timestamp.h"
110#include "utils/wait_event.h"
111
112/*
113 * Directory where Two-phase commit files reside within PGDATA
114 */
115#define TWOPHASE_DIR "pg_twophase"
116
117/* GUC variable, can't be changed after startup */
119
120/*
121 * This struct describes one global transaction that is in prepared state
122 * or attempting to become prepared.
123 *
124 * The lifecycle of a global transaction is:
125 *
126 * 1. After checking that the requested GID is not in use, set up an entry in
127 * the TwoPhaseState->prepXacts array with the correct GID and valid = false,
128 * and mark it as locked by my backend.
129 *
130 * 2. After successfully completing prepare, set valid = true and enter the
131 * referenced PGPROC into the global ProcArray.
132 *
133 * 3. To begin COMMIT PREPARED or ROLLBACK PREPARED, check that the entry is
134 * valid and not locked, then mark the entry as locked by storing my current
135 * proc number into locking_backend. This prevents concurrent attempts to
136 * commit or rollback the same prepared xact.
137 *
138 * 4. On completion of COMMIT PREPARED or ROLLBACK PREPARED, remove the entry
139 * from the ProcArray and the TwoPhaseState->prepXacts array and return it to
140 * the freelist.
141 *
142 * Note that if the preparing transaction fails between steps 1 and 2, the
143 * entry must be removed so that the GID and the GlobalTransaction struct
144 * can be reused. See AtAbort_Twophase().
145 *
146 * typedef struct GlobalTransactionData *GlobalTransaction appears in
147 * twophase.h
148 */
149
151{
152 GlobalTransaction next; /* list link for free list */
153 int pgprocno; /* ID of associated dummy PGPROC */
154 TimestampTz prepared_at; /* time of preparation */
155
156 /*
157 * Note that we need to keep track of two LSNs for each GXACT. We keep
158 * track of the start LSN because this is the address we must use to read
159 * state data back from WAL when committing a prepared GXACT. We keep
160 * track of the end LSN because that is the LSN we need to wait for prior
161 * to commit.
162 */
163 XLogRecPtr prepare_start_lsn; /* XLOG offset of prepare record start */
164 XLogRecPtr prepare_end_lsn; /* XLOG offset of prepare record end */
165 FullTransactionId fxid; /* The GXACT full xid */
166
167 Oid owner; /* ID of user that executed the xact */
168 ProcNumber locking_backend; /* backend currently working on the xact */
169 bool valid; /* true if PGPROC entry is in proc array */
170 bool ondisk; /* true if prepare state file is on disk */
171 bool inredo; /* true if entry was added via xlog_redo */
172 char gid[GIDSIZE]; /* The GID assigned to the prepared xact */
174
175/*
176 * Two Phase Commit shared state. Access to this struct is protected
177 * by TwoPhaseStateLock.
178 */
179typedef struct TwoPhaseStateData
180{
181 /* Head of linked list of free GlobalTransactionData structs */
183
184 /* Number of valid prepXacts entries. */
186
187 /* There are max_prepared_xacts items in this array */
190
192
193static void TwoPhaseShmemRequest(void *arg);
194static void TwoPhaseShmemInit(void *arg);
195
200
201/*
202 * Global transaction entry currently locked by us, if any. Note that any
203 * access to the entry pointed to by this variable must be protected by
204 * TwoPhaseStateLock, though obviously the pointer itself doesn't need to be
205 * (since it's just local memory).
206 */
208
209static bool twophaseExitRegistered = false;
210
213 int nchildren,
214 TransactionId *children,
215 int nrels,
216 RelFileLocator *rels,
217 int nstats,
218 xl_xact_stats_item *stats,
219 int ninvalmsgs,
221 bool initfileinval,
222 const char *gid);
224 int nchildren,
225 TransactionId *children,
226 int nrels,
227 RelFileLocator *rels,
228 int nstats,
229 xl_xact_stats_item *stats,
230 const char *gid);
231static void ProcessRecords(char *bufptr, FullTransactionId fxid,
232 const TwoPhaseCallback callbacks[]);
234
235static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
237 XLogRecPtr prepare_start_lsn,
238 bool fromdisk, bool setParent, bool setNextXid);
240 const char *gid, TimestampTz prepared_at, Oid owner,
242static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning);
243static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len);
244
245/*
246 * Register shared memory for two-phase state.
247 */
248static void
250{
251 Size size;
252
253 /* Need the fixed struct, the array of pointers, and the GTD structs */
254 size = offsetof(TwoPhaseStateData, prepXacts);
256 sizeof(GlobalTransaction)));
257 size = MAXALIGN(size);
259 sizeof(GlobalTransactionData)));
260 ShmemRequestStruct(.name = "Prepared Transaction Table",
261 .size = size,
262 .ptr = (void **) &TwoPhaseState,
263 );
264}
265
266/*
267 * Initialize shared memory for two-phase state.
268 */
269static void
271{
273 int i;
274
277
278 /*
279 * Initialize the linked list of free GlobalTransactionData structs
280 */
282 ((char *) TwoPhaseState +
285 for (i = 0; i < max_prepared_xacts; i++)
286 {
287 /* insert into linked list */
290
291 /* associate it with a PGPROC assigned by ProcGlobalShmemInit */
293 }
294}
295
296/*
297 * Exit hook to unlock the global transaction entry we're working on.
298 */
299static void
301{
302 /* same logic as abort */
304}
305
306/*
307 * Abort hook to unlock the global transaction entry we're working on.
308 */
309void
311{
312 if (MyLockedGxact == NULL)
313 return;
314
315 /*
316 * What to do with the locked global transaction entry? If we were in the
317 * process of preparing the transaction, but haven't written the WAL
318 * record and state file yet, the transaction must not be considered as
319 * prepared. Likewise, if we are in the process of finishing an
320 * already-prepared transaction, and fail after having already written the
321 * 2nd phase commit or rollback record to the WAL, the transaction should
322 * not be considered as prepared anymore. In those cases, just remove the
323 * entry from shared memory.
324 *
325 * Otherwise, the entry must be left in place so that the transaction can
326 * be finished later, so just unlock it.
327 *
328 * If we abort during prepare, after having written the WAL record, we
329 * might not have transferred all locks and other state to the prepared
330 * transaction yet. Likewise, if we abort during commit or rollback,
331 * after having written the WAL record, we might not have released all the
332 * resources held by the transaction yet. In those cases, the in-memory
333 * state can be wrong, but it's too late to back out.
334 */
336 if (!MyLockedGxact->valid)
338 else
341
343}
344
345/*
346 * This is called after we have finished transferring state to the prepared
347 * PGPROC entry.
348 */
349void
358
359
360/*
361 * MarkAsPreparing
362 * Reserve the GID for the given transaction.
363 */
365MarkAsPreparing(FullTransactionId fxid, const char *gid,
366 TimestampTz prepared_at, Oid owner, Oid databaseid)
367{
369 int i;
370
371 if (strlen(gid) >= GIDSIZE)
374 errmsg("transaction identifier \"%s\" is too long",
375 gid)));
376
377 /* fail immediately if feature is disabled */
378 if (max_prepared_xacts == 0)
381 errmsg("prepared transactions are disabled"),
382 errhint("Set \"max_prepared_transactions\" to a nonzero value.")));
383
384 /* on first call, register the exit hook */
386 {
389 }
390
392
393 /* Check for conflicting GID */
394 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
395 {
397 if (strcmp(gxact->gid, gid) == 0)
398 {
401 errmsg("transaction identifier \"%s\" is already in use",
402 gid)));
403 }
404 }
405
406 /* Get a free gxact from the freelist */
410 errmsg("maximum number of prepared transactions reached"),
411 errhint("Increase \"max_prepared_transactions\" (currently %d).",
415
416 MarkAsPreparingGuts(gxact, fxid, gid, prepared_at, owner, databaseid);
417
418 gxact->ondisk = false;
419
420 /* And insert it into the active array */
423
425
426 return gxact;
427}
428
429/*
430 * MarkAsPreparingGuts
431 *
432 * This uses a gxact struct and puts it into the active array.
433 * NOTE: this is also used when reloading a gxact after a crash; so avoid
434 * assuming that we can use very much backend context.
435 *
436 * Note: This function should be called with appropriate locks held.
437 */
438static void
440 const char *gid, TimestampTz prepared_at, Oid owner,
442{
443 PGPROC *proc;
444 int i;
446
448
449 Assert(gxact != NULL);
450 proc = GetPGProcByNumber(gxact->pgprocno);
451
452 /* Initialize the PGPROC entry */
453 MemSet(proc, 0, sizeof(PGPROC));
456 {
457 /* clone VXID, for TwoPhaseGetXidByVirtualXID() to find */
458 proc->vxid.lxid = MyProc->vxid.lxid;
460 }
461 else
462 {
464 /* GetLockConflicts() uses this to specify a wait on the XID */
465 proc->vxid.lxid = xid;
467 }
468 proc->xid = xid;
470 proc->delayChkptFlags = 0;
471 proc->statusFlags = 0;
472 proc->pid = 0;
473 proc->databaseId = databaseid;
474 proc->roleId = owner;
476 proc->backendType = B_INVALID;
478 proc->lwWaitMode = 0;
479 proc->waitLock = NULL;
481 proc->waitProcLock = NULL;
482 pg_atomic_init_u64(&proc->waitStart, 0);
483 for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
484 dlist_init(&proc->myProcLocks[i]);
485 /* subxid data must be filled later by GXactLoadSubxactData */
486 proc->subxidStatus.overflowed = false;
487 proc->subxidStatus.count = 0;
488
489 gxact->prepared_at = prepared_at;
490 gxact->fxid = fxid;
491 gxact->owner = owner;
492 gxact->locking_backend = MyProcNumber;
493 gxact->valid = false;
494 gxact->inredo = false;
495 strcpy(gxact->gid, gid);
496
497 /*
498 * Remember that we have this GlobalTransaction entry locked for us. If we
499 * abort after this, we must release it.
500 */
502}
503
504/*
505 * GXactLoadSubxactData
506 *
507 * If the transaction being persisted had any subtransactions, this must
508 * be called before MarkAsPrepared() to load information into the dummy
509 * PGPROC.
510 */
511static void
513 TransactionId *children)
514{
515 PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
516
517 /* We need no extra lock since the GXACT isn't valid yet */
518 if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
519 {
520 proc->subxidStatus.overflowed = true;
521 nsubxacts = PGPROC_MAX_CACHED_SUBXIDS;
522 }
523 if (nsubxacts > 0)
524 {
525 memcpy(proc->subxids.xids, children,
526 nsubxacts * sizeof(TransactionId));
527 proc->subxidStatus.count = nsubxacts;
528 }
529}
530
531/*
532 * MarkAsPrepared
533 * Mark the GXACT as fully valid, and enter it into the global ProcArray.
534 *
535 * lock_held indicates whether caller already holds TwoPhaseStateLock.
536 */
537static void
539{
540 /* Lock here may be overkill, but I'm not convinced of that ... */
541 if (!lock_held)
543 Assert(!gxact->valid);
544 gxact->valid = true;
545 if (!lock_held)
547
548 /*
549 * Put it into the global ProcArray so TransactionIdIsInProgress considers
550 * the XID as still running.
551 */
553}
554
555/*
556 * LockGXact
557 * Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
558 */
560LockGXact(const char *gid, Oid user)
561{
562 int i;
563
564 /* on first call, register the exit hook */
566 {
569 }
570
572
573 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
574 {
576 PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
577
578 /* Ignore not-yet-valid GIDs */
579 if (!gxact->valid)
580 continue;
581 if (strcmp(gxact->gid, gid) != 0)
582 continue;
583
584 /* Found it, but has someone else got it locked? */
585 if (gxact->locking_backend != INVALID_PROC_NUMBER)
588 errmsg("prepared transaction with identifier \"%s\" is busy",
589 gid)));
590
591 if (user != gxact->owner && !superuser_arg(user))
594 errmsg("permission denied to finish prepared transaction"),
595 errhint("Must be superuser or the user that prepared the transaction.")));
596
597 /*
598 * Note: it probably would be possible to allow committing from
599 * another database; but at the moment NOTIFY is known not to work and
600 * there may be some other issues as well. Hence disallow until
601 * someone gets motivated to make it work.
602 */
603 if (MyDatabaseId != proc->databaseId)
606 errmsg("prepared transaction belongs to another database"),
607 errhint("Connect to the database where the transaction was prepared to finish it.")));
608
609 /* OK for me to lock it */
610 gxact->locking_backend = MyProcNumber;
612
614
615 return gxact;
616 }
617
619
622 errmsg("prepared transaction with identifier \"%s\" does not exist",
623 gid)));
624
625 /* NOTREACHED */
626 return NULL;
627}
628
629/*
630 * RemoveGXact
631 * Remove the prepared transaction from the shared memory array.
632 *
633 * NB: caller should have already removed it from ProcArray
634 */
635static void
637{
638 int i;
639
641
642 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
643 {
645 {
646 /* remove from the active array */
649
650 /* and put it back in the freelist */
653
654 return;
655 }
656 }
657
658 elog(ERROR, "failed to find %p in GlobalTransaction array", gxact);
659}
660
661/*
662 * Returns an array of all prepared transactions for the user-level
663 * function pg_prepared_xact.
664 *
665 * The returned array and all its elements are copies of internal data
666 * structures, to minimize the time we need to hold the TwoPhaseStateLock.
667 *
668 * WARNING -- we return even those transactions that are not fully prepared
669 * yet. The caller should filter them out if he doesn't want them.
670 *
671 * The returned array is palloc'd.
672 */
673static int
675{
676 GlobalTransaction array;
677 int num;
678 int i;
679
681
682 if (TwoPhaseState->numPrepXacts == 0)
683 {
685
686 *gxacts = NULL;
687 return 0;
688 }
689
692 *gxacts = array;
693 for (i = 0; i < num; i++)
694 memcpy(array + i, TwoPhaseState->prepXacts[i],
695 sizeof(GlobalTransactionData));
696
698
699 return num;
700}
701
702
703/* Working status for pg_prepared_xact */
710
711/*
712 * pg_prepared_xact
713 * Produce a view with one row per prepared transaction.
714 *
715 * This function is here so we don't have to export the
716 * GlobalTransactionData struct definition.
717 */
718Datum
720{
722 Working_State *status;
723
724 if (SRF_IS_FIRSTCALL())
725 {
726 TupleDesc tupdesc;
727 MemoryContext oldcontext;
728
729 /* create a function context for cross-call persistence */
731
732 /*
733 * Switch to memory context appropriate for multiple function calls
734 */
735 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
736
737 /* build tupdesc for result tuples */
738 /* this had better match pg_prepared_xacts view in system_views.sql */
739 tupdesc = CreateTemplateTupleDesc(5);
740 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "transaction",
741 XIDOID, -1, 0);
742 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "gid",
743 TEXTOID, -1, 0);
744 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepared",
745 TIMESTAMPTZOID, -1, 0);
746 TupleDescInitEntry(tupdesc, (AttrNumber) 4, "ownerid",
747 OIDOID, -1, 0);
748 TupleDescInitEntry(tupdesc, (AttrNumber) 5, "dbid",
749 OIDOID, -1, 0);
750
751 TupleDescFinalize(tupdesc);
752 funcctx->tuple_desc = BlessTupleDesc(tupdesc);
753
754 /*
755 * Collect all the 2PC status information that we will format and send
756 * out as a result set.
757 */
759 funcctx->user_fctx = status;
760
761 status->ngxacts = GetPreparedTransactionList(&status->array);
762 status->currIdx = 0;
763
764 MemoryContextSwitchTo(oldcontext);
765 }
766
768 status = (Working_State *) funcctx->user_fctx;
769
770 while (status->array != NULL && status->currIdx < status->ngxacts)
771 {
772 GlobalTransaction gxact = &status->array[status->currIdx++];
773 PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
774 Datum values[5] = {0};
775 bool nulls[5] = {0};
776 HeapTuple tuple;
778
779 if (!gxact->valid)
780 continue;
781
782 /*
783 * Form tuple with appropriate data.
784 */
785
786 values[0] = TransactionIdGetDatum(proc->xid);
788 values[2] = TimestampTzGetDatum(gxact->prepared_at);
789 values[3] = ObjectIdGetDatum(gxact->owner);
791
792 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
793 result = HeapTupleGetDatum(tuple);
795 }
796
798}
799
800/*
801 * TwoPhaseGetGXact
802 * Get the GlobalTransaction struct for a prepared transaction
803 * specified by XID
804 *
805 * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
806 * caller had better hold it.
807 */
810{
812 int i;
813
816
818
819 /*
820 * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
821 * repeatedly for the same XID. We can save work with a simple cache.
822 */
824 return cached_gxact;
825
826 if (!lock_held)
828
829 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
830 {
832
833 if (FullTransactionIdEquals(gxact->fxid, fxid))
834 {
835 result = gxact;
836 break;
837 }
838 }
839
840 if (!lock_held)
842
843 if (result == NULL) /* should not happen */
844 elog(ERROR, "failed to find GlobalTransaction for xid %u",
846
847 cached_fxid = fxid;
849
850 return result;
851}
852
853/*
854 * TwoPhaseGetXidByVirtualXID
855 * Lookup VXID among xacts prepared since last startup.
856 *
857 * (This won't find recovered xacts.) If more than one matches, return any
858 * and set "have_more" to true. To witness multiple matches, a single
859 * proc number must consume 2^32 LXIDs, with no intervening database restart.
860 */
863 bool *have_more)
864{
865 int i;
867
870
871 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
872 {
874 PGPROC *proc;
876
877 if (!gxact->valid)
878 continue;
879 proc = GetPGProcByNumber(gxact->pgprocno);
882 {
883 /*
884 * Startup process sets proc->vxid.procNumber to
885 * INVALID_PROC_NUMBER.
886 */
887 Assert(!gxact->inredo);
888
890 {
891 *have_more = true;
892 break;
893 }
895 }
896 }
897
899
900 return result;
901}
902
903/*
904 * TwoPhaseGetDummyProcNumber
905 * Get the dummy proc number for prepared transaction
906 *
907 * Dummy proc numbers are similar to proc numbers of real backends. They
908 * start at FIRST_PREPARED_XACT_PROC_NUMBER, and are unique across all
909 * currently active real backends and prepared transactions. If lock_held is
910 * set to true, TwoPhaseStateLock will not be taken, so the caller had better
911 * hold it.
912 */
920
921/*
922 * TwoPhaseGetDummyProc
923 * Get the PGPROC that represents a prepared transaction
924 *
925 * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
926 * caller had better hold it.
927 */
928PGPROC *
935
936/************************************************************************/
937/* State file support */
938/************************************************************************/
939
940/*
941 * Compute the FullTransactionId for the given TransactionId.
942 *
943 * This is safe if the xid has not yet reached COMMIT PREPARED or ROLLBACK
944 * PREPARED. After those commands, concurrent vac_truncate_clog() may make
945 * the xid cease to qualify as allowable. XXX Not all callers limit their
946 * calls accordingly.
947 */
948static inline FullTransactionId
954
955static inline int
957{
958 return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X",
961}
962
963/*
964 * 2PC state file format:
965 *
966 * 1. TwoPhaseFileHeader
967 * 2. TransactionId[] (subtransactions)
968 * 3. RelFileLocator[] (files to be deleted at commit)
969 * 4. RelFileLocator[] (files to be deleted at abort)
970 * 5. SharedInvalidationMessage[] (inval messages to be sent at commit)
971 * 6. TwoPhaseRecordOnDisk
972 * 7. ...
973 * 8. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
974 * 9. checksum (CRC-32C)
975 *
976 * Each segment except the final checksum is MAXALIGN'd.
977 */
978
979/*
980 * Header for a 2PC state file
981 */
982#define TWOPHASE_MAGIC 0x57F94534 /* format identifier */
983
985
986/*
987 * Header for each record in a state file
988 *
989 * NOTE: len counts only the rmgr data, not the TwoPhaseRecordOnDisk header.
990 * The rmgr data will be stored starting on a MAXALIGN boundary.
991 */
993{
994 uint32 len; /* length of rmgr data */
995 TwoPhaseRmgrId rmid; /* resource manager for this record */
996 uint16 info; /* flag bits for use by rmgr */
998
999/*
1000 * During prepare, the state file is assembled in memory before writing it
1001 * to WAL and the actual state file. We use a chain of StateFileChunk blocks
1002 * for that.
1003 */
1010
1011static struct xllist
1012{
1013 StateFileChunk *head; /* first data block in the chain */
1014 StateFileChunk *tail; /* last block in chain */
1016 uint32 bytes_free; /* free bytes left in tail block */
1017 uint32 total_len; /* total data bytes in chain */
1019
1020
1021/*
1022 * Append a block of data to records data structure.
1023 *
1024 * NB: each block is padded to a MAXALIGN multiple. This must be
1025 * accounted for when the file is later read!
1026 *
1027 * The data is copied, so the caller is free to modify it afterwards.
1028 */
1029static void
1031{
1033
1035 {
1038 records.tail->len = 0;
1039 records.tail->next = NULL;
1041
1042 records.bytes_free = Max(padlen, 512);
1044 }
1045
1047 records.tail->len += padlen;
1050}
1051
1052/*
1053 * Start preparing a state file.
1054 *
1055 * Initializes data structure and inserts the 2PC file header record.
1056 */
1057void
1059{
1060 PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
1063 TransactionId *children;
1066 xl_xact_stats_item *abortstats = NULL;
1069
1070 /* Initialize linked list */
1072 records.head->len = 0;
1073 records.head->next = NULL;
1074
1075 records.bytes_free = Max(sizeof(TwoPhaseFileHeader), 512);
1077
1079 records.num_chunks = 1;
1080
1081 records.total_len = 0;
1082
1083 /* Create header */
1084 hdr.magic = TWOPHASE_MAGIC;
1085 hdr.total_len = 0; /* EndPrepare will fill this in */
1086 hdr.xid = xid;
1087 hdr.database = proc->databaseId;
1088 hdr.prepared_at = gxact->prepared_at;
1089 hdr.owner = gxact->owner;
1090 hdr.nsubxacts = xactGetCommittedChildren(&children);
1093 hdr.ncommitstats =
1095 hdr.nabortstats =
1096 pgstat_get_transactional_drops(false, &abortstats);
1098 &hdr.initfileinval);
1099 hdr.gidlen = strlen(gxact->gid) + 1; /* Include '\0' */
1100 /* EndPrepare will fill the origin data, if necessary */
1102 hdr.origin_timestamp = 0;
1103
1104 save_state_data(&hdr, sizeof(TwoPhaseFileHeader));
1105 save_state_data(gxact->gid, hdr.gidlen);
1106
1107 /*
1108 * Add the additional info about subxacts, deletable files and cache
1109 * invalidation messages.
1110 */
1111 if (hdr.nsubxacts > 0)
1112 {
1113 save_state_data(children, hdr.nsubxacts * sizeof(TransactionId));
1114 /* While we have the child-xact data, stuff it in the gxact too */
1115 GXactLoadSubxactData(gxact, hdr.nsubxacts, children);
1116 }
1117 if (hdr.ncommitrels > 0)
1118 {
1121 }
1122 if (hdr.nabortrels > 0)
1123 {
1126 }
1127 if (hdr.ncommitstats > 0)
1128 {
1130 hdr.ncommitstats * sizeof(xl_xact_stats_item));
1132 }
1133 if (hdr.nabortstats > 0)
1134 {
1135 save_state_data(abortstats,
1136 hdr.nabortstats * sizeof(xl_xact_stats_item));
1137 pfree(abortstats);
1138 }
1139 if (hdr.ninvalmsgs > 0)
1140 {
1144 }
1145}
1146
1147/*
1148 * Finish preparing state data and writing it to WAL.
1149 */
1150void
1152{
1153 TwoPhaseFileHeader *hdr;
1154 StateFileChunk *record;
1155 bool replorigin;
1156
1157 /* Add the end sentinel to the list of 2PC records */
1159 NULL, 0);
1160
1161 /* Go back and fill in total_len in the file header record */
1163 Assert(hdr->magic == TWOPHASE_MAGIC);
1164 hdr->total_len = records.total_len + sizeof(pg_crc32c);
1165
1168
1169 if (replorigin)
1170 {
1173 }
1174
1175 /*
1176 * If the data size exceeds MaxAllocSize, we won't be able to read it in
1177 * ReadTwoPhaseFile. Check for that now, rather than fail in the case
1178 * where we write data to file and then re-read at commit time.
1179 */
1180 if (hdr->total_len > MaxAllocSize)
1181 ereport(ERROR,
1183 errmsg("two-phase state file maximum length exceeded")));
1184
1185 /*
1186 * Now writing 2PC state data to WAL. We let the WAL's CRC protection
1187 * cover us, so no need to calculate a separate CRC.
1188 *
1189 * We have to set DELAY_CHKPT_START here, too; otherwise a checkpoint
1190 * starting immediately after the WAL record is inserted could complete
1191 * without fsync'ing our state file. (This is essentially the same kind
1192 * of race condition as the COMMIT-to-clog-write case that
1193 * RecordTransactionCommit uses DELAY_CHKPT_IN_COMMIT for; see notes
1194 * there.) Note that DELAY_CHKPT_IN_COMMIT is used to find transactions in
1195 * the critical commit section. We need to know about such transactions
1196 * for conflict detection in logical replication. See
1197 * GetOldestActiveTransactionId(true, false) and its use.
1198 *
1199 * We save the PREPARE record's location in the gxact for later use by
1200 * CheckPointTwoPhase.
1201 */
1203
1205
1208
1210 for (record = records.head; record != NULL; record = record->next)
1211 XLogRegisterData(record->data, record->len);
1212
1214
1215 gxact->prepare_end_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
1216
1217 if (replorigin)
1218 {
1219 /* Move LSNs forward for this replication origin */
1221 gxact->prepare_end_lsn);
1222 }
1223
1224 XLogFlush(gxact->prepare_end_lsn);
1225
1226 /* If we crash now, we have prepared: WAL replay will fix things */
1227
1228 /* Store record's start location to read that later on Commit */
1229 gxact->prepare_start_lsn = ProcLastRecPtr;
1230
1231 /*
1232 * Mark the prepared transaction as valid. As soon as xact.c marks MyProc
1233 * as not running our XID (which it will do immediately after this
1234 * function returns), others can commit/rollback the xact.
1235 *
1236 * NB: a side effect of this is to make a dummy ProcArray entry for the
1237 * prepared XID. This must happen before we clear the XID from MyProc /
1238 * ProcGlobal->xids[], else there is a window where the XID is not running
1239 * according to TransactionIdIsInProgress, and onlookers would be entitled
1240 * to assume the xact crashed. Instead we have a window where the same
1241 * XID appears twice in ProcArray, which is OK.
1242 */
1243 MarkAsPrepared(gxact, false);
1244
1245 /*
1246 * Now we can mark ourselves as out of the commit critical section: a
1247 * checkpoint starting after this will certainly see the gxact as a
1248 * candidate for fsyncing.
1249 */
1251
1252 /*
1253 * Remember that we have this GlobalTransaction entry locked for us. If
1254 * we crash after this point, it's too late to abort, but we must unlock
1255 * it so that the prepared transaction can be committed or rolled back.
1256 */
1258
1260
1261 /*
1262 * Wait for synchronous replication, if required.
1263 *
1264 * Note that at this stage we have marked the prepare, but still show as
1265 * running in the procarray (twice!) and continue to hold locks.
1266 */
1267 SyncRepWaitForLSN(gxact->prepare_end_lsn, false);
1268
1270 records.num_chunks = 0;
1271}
1272
1273/*
1274 * Register a 2PC record to be written to state file.
1275 */
1276void
1278 const void *data, uint32 len)
1279{
1280 TwoPhaseRecordOnDisk record;
1281
1282 record.rmid = rmid;
1283 record.info = info;
1284 record.len = len;
1285 save_state_data(&record, sizeof(TwoPhaseRecordOnDisk));
1286 if (len > 0)
1288}
1289
1290
1291/*
1292 * Read and validate the state file for xid.
1293 *
1294 * If it looks OK (has a valid magic number and CRC), return the palloc'd
1295 * contents of the file, issuing an error when finding corrupted data. If
1296 * missing_ok is true, which indicates that missing files can be safely
1297 * ignored, then return NULL. This state can be reached when doing recovery
1298 * after discarding two-phase files from frozen epochs.
1299 */
1300static char *
1302{
1303 char path[MAXPGPATH];
1304 char *buf;
1305 TwoPhaseFileHeader *hdr;
1306 int fd;
1307 struct stat stat;
1310 file_crc;
1311 int r;
1312
1313 TwoPhaseFilePath(path, fxid);
1314
1316 if (fd < 0)
1317 {
1318 if (missing_ok && errno == ENOENT)
1319 return NULL;
1320
1321 ereport(ERROR,
1323 errmsg("could not open file \"%s\": %m", path)));
1324 }
1325
1326 /*
1327 * Check file length. We can determine a lower bound pretty easily. We
1328 * set an upper bound to avoid palloc() failure on a corrupt file, though
1329 * we can't guarantee that we won't get an out of memory error anyway,
1330 * even on a valid file.
1331 */
1332 if (fstat(fd, &stat))
1333 ereport(ERROR,
1335 errmsg("could not stat file \"%s\": %m", path)));
1336
1337 if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
1339 sizeof(pg_crc32c)) ||
1341 ereport(ERROR,
1343 errmsg_plural("incorrect size of file \"%s\": %lld byte",
1344 "incorrect size of file \"%s\": %lld bytes",
1345 (long long int) stat.st_size, path,
1346 (long long int) stat.st_size)));
1347
1348 crc_offset = stat.st_size - sizeof(pg_crc32c);
1350 ereport(ERROR,
1352 errmsg("incorrect alignment of CRC offset for file \"%s\"",
1353 path)));
1354
1355 /*
1356 * OK, slurp in the file.
1357 */
1358 buf = (char *) palloc(stat.st_size);
1359
1361 r = read(fd, buf, stat.st_size);
1362 if (r != stat.st_size)
1363 {
1364 if (r < 0)
1365 ereport(ERROR,
1367 errmsg("could not read file \"%s\": %m", path)));
1368 else
1369 ereport(ERROR,
1370 (errmsg("could not read file \"%s\": read %d of %lld",
1371 path, r, (long long int) stat.st_size)));
1372 }
1373
1375
1376 if (CloseTransientFile(fd) != 0)
1377 ereport(ERROR,
1379 errmsg("could not close file \"%s\": %m", path)));
1380
1381 hdr = (TwoPhaseFileHeader *) buf;
1382 if (hdr->magic != TWOPHASE_MAGIC)
1383 ereport(ERROR,
1385 errmsg("invalid magic number stored in file \"%s\"",
1386 path)));
1387
1388 if (hdr->total_len != stat.st_size)
1389 ereport(ERROR,
1391 errmsg("invalid size stored in file \"%s\"",
1392 path)));
1393
1397
1398 file_crc = *((pg_crc32c *) (buf + crc_offset));
1399
1401 ereport(ERROR,
1403 errmsg("calculated CRC checksum does not match value stored in file \"%s\"",
1404 path)));
1405
1406 return buf;
1407}
1408
1409
1410/*
1411 * Reads 2PC data from xlog. During checkpoint this data will be moved to
1412 * twophase files and ReadTwoPhaseFile should be used instead.
1413 *
1414 * Note clearly that this function can access WAL during normal operation,
1415 * similarly to the way WALSender or Logical Decoding would do.
1416 */
1417static void
1419{
1420 XLogRecord *record;
1422 char *errormsg;
1423
1425 XL_ROUTINE(.page_read = &read_local_xlog_page,
1426 .segment_open = &wal_segment_open,
1427 .segment_close = &wal_segment_close),
1428 NULL);
1429 if (!xlogreader)
1430 ereport(ERROR,
1432 errmsg("out of memory"),
1433 errdetail("Failed while allocating a WAL reading processor.")));
1434
1436 record = XLogReadRecord(xlogreader, &errormsg);
1437
1438 if (record == NULL)
1439 {
1440 if (errormsg)
1441 ereport(ERROR,
1443 errmsg("could not read two-phase state from WAL at %X/%08X: %s",
1444 LSN_FORMAT_ARGS(lsn), errormsg)));
1445 else
1446 ereport(ERROR,
1448 errmsg("could not read two-phase state from WAL at %X/%08X",
1449 LSN_FORMAT_ARGS(lsn))));
1450 }
1451
1454 ereport(ERROR,
1456 errmsg("expected two-phase state data is not present in WAL at %X/%08X",
1457 LSN_FORMAT_ARGS(lsn))));
1458
1459 if (len != NULL)
1461
1464
1466}
1467
1468
1469/*
1470 * Confirms an xid is prepared, during recovery
1471 */
1472bool
1474{
1475 char *buf;
1476 TwoPhaseFileHeader *hdr;
1477 bool result;
1478 FullTransactionId fxid;
1479
1481
1482 if (max_prepared_xacts <= 0)
1483 return false; /* nothing to do */
1484
1485 /* Read and validate file */
1486 fxid = AdjustToFullTransactionId(xid);
1487 buf = ReadTwoPhaseFile(fxid, true);
1488 if (buf == NULL)
1489 return false;
1490
1491 /* Check header also */
1492 hdr = (TwoPhaseFileHeader *) buf;
1493 result = TransactionIdEquals(hdr->xid, xid);
1494 pfree(buf);
1495
1496 return result;
1497}
1498
1499/*
1500 * FinishPreparedTransaction: execute COMMIT PREPARED or ROLLBACK PREPARED
1501 */
1502void
1504{
1506 PGPROC *proc;
1507 FullTransactionId fxid;
1508 TransactionId xid;
1509 bool ondisk;
1510 char *buf;
1511 char *bufptr;
1512 TwoPhaseFileHeader *hdr;
1514 TransactionId *children;
1518 int ndelrels;
1520 xl_xact_stats_item *abortstats;
1522
1523 /*
1524 * Validate the GID, and lock the GXACT to ensure that two backends do not
1525 * try to commit the same GID at once.
1526 */
1527 gxact = LockGXact(gid, GetUserId());
1528 proc = GetPGProcByNumber(gxact->pgprocno);
1529 fxid = gxact->fxid;
1530 xid = XidFromFullTransactionId(fxid);
1531
1532 /*
1533 * Read and validate 2PC state data. State data will typically be stored
1534 * in WAL files if the LSN is after the last checkpoint record, or moved
1535 * to disk if for some reason they have lived for a long time.
1536 */
1537 if (gxact->ondisk)
1538 buf = ReadTwoPhaseFile(fxid, false);
1539 else
1540 XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
1541
1542
1543 /*
1544 * Disassemble the header area
1545 */
1546 hdr = (TwoPhaseFileHeader *) buf;
1547 Assert(TransactionIdEquals(hdr->xid, xid));
1548 bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
1549 bufptr += MAXALIGN(hdr->gidlen);
1550 children = (TransactionId *) bufptr;
1551 bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
1552 commitrels = (RelFileLocator *) bufptr;
1553 bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileLocator));
1554 abortrels = (RelFileLocator *) bufptr;
1555 bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileLocator));
1556 commitstats = (xl_xact_stats_item *) bufptr;
1557 bufptr += MAXALIGN(hdr->ncommitstats * sizeof(xl_xact_stats_item));
1558 abortstats = (xl_xact_stats_item *) bufptr;
1559 bufptr += MAXALIGN(hdr->nabortstats * sizeof(xl_xact_stats_item));
1561 bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
1562
1563 /* compute latestXid among all children */
1564 latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children);
1565
1566 /* Prevent cancel/die interrupt while cleaning up */
1568
1569 /*
1570 * The order of operations here is critical: make the XLOG entry for
1571 * commit or abort, then mark the transaction committed or aborted in
1572 * pg_xact, then remove its PGPROC from the global ProcArray (which means
1573 * TransactionIdIsInProgress will stop saying the prepared xact is in
1574 * progress), then run the post-commit or post-abort callbacks. The
1575 * callbacks will release the locks the transaction held.
1576 */
1577 if (isCommit)
1579 hdr->nsubxacts, children,
1580 hdr->ncommitrels, commitrels,
1581 hdr->ncommitstats,
1583 hdr->ninvalmsgs, invalmsgs,
1584 hdr->initfileinval, gid);
1585 else
1587 hdr->nsubxacts, children,
1588 hdr->nabortrels, abortrels,
1589 hdr->nabortstats,
1590 abortstats,
1591 gid);
1592
1594
1595 /*
1596 * In case we fail while running the callbacks, mark the gxact invalid so
1597 * no one else will try to commit/rollback, and so it will be recycled if
1598 * we fail after this point. It is still locked by our backend so it
1599 * won't go away yet.
1600 *
1601 * (We assume it's safe to do this without taking TwoPhaseStateLock.)
1602 */
1603 gxact->valid = false;
1604
1605 /*
1606 * We have to remove any files that were supposed to be dropped. For
1607 * consistency with the regular xact.c code paths, must do this before
1608 * releasing locks, so do it before running the callbacks.
1609 *
1610 * NB: this code knows that we couldn't be dropping any temp rels ...
1611 */
1612 if (isCommit)
1613 {
1615 ndelrels = hdr->ncommitrels;
1616 }
1617 else
1618 {
1620 ndelrels = hdr->nabortrels;
1621 }
1622
1623 /* Make sure files supposed to be dropped are dropped */
1625
1626 if (isCommit)
1628 else
1629 pgstat_execute_transactional_drops(hdr->nabortstats, abortstats, false);
1630
1631 /*
1632 * Handle cache invalidation messages.
1633 *
1634 * Relcache init file invalidation requires processing both before and
1635 * after we send the SI messages, only when committing. See
1636 * AtEOXact_Inval().
1637 */
1638 if (isCommit)
1639 {
1640 if (hdr->initfileinval)
1643 if (hdr->initfileinval)
1645 }
1646
1647 /*
1648 * Acquire the two-phase lock. We want to work on the two-phase callbacks
1649 * while holding it to avoid potential conflicts with other transactions
1650 * attempting to use the same GID, so the lock is released once the shared
1651 * memory state is cleared.
1652 */
1654
1655 /* And now do the callbacks */
1656 if (isCommit)
1658 else
1660
1662
1663 /*
1664 * Read this value while holding the two-phase lock, as the on-disk 2PC
1665 * file is physically removed after the lock is released.
1666 */
1667 ondisk = gxact->ondisk;
1668
1669 /* Clear shared memory state */
1671
1672 /*
1673 * Release the lock as all callbacks are called and shared memory cleanup
1674 * is done.
1675 */
1677
1678 /* Count the prepared xact as committed or aborted */
1679 AtEOXact_PgStat(isCommit, false);
1680
1681 /*
1682 * And now we can clean up any files we may have left.
1683 */
1684 if (ondisk)
1685 RemoveTwoPhaseFile(fxid, true);
1686
1688
1690
1691 pfree(buf);
1692}
1693
1694/*
1695 * Scan 2PC state data in memory and call the indicated callbacks for each 2PC record.
1696 */
1697static void
1699 const TwoPhaseCallback callbacks[])
1700{
1701 for (;;)
1702 {
1703 TwoPhaseRecordOnDisk *record = (TwoPhaseRecordOnDisk *) bufptr;
1704
1705 Assert(record->rmid <= TWOPHASE_RM_MAX_ID);
1706 if (record->rmid == TWOPHASE_RM_END_ID)
1707 break;
1708
1709 bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk));
1710
1711 if (callbacks[record->rmid] != NULL)
1712 callbacks[record->rmid] (fxid, record->info, bufptr, record->len);
1713
1714 bufptr += MAXALIGN(record->len);
1715 }
1716}
1717
1718/*
1719 * Remove the 2PC file.
1720 *
1721 * If giveWarning is false, do not complain about file-not-present;
1722 * this is an expected case during WAL replay.
1723 *
1724 * This routine is used at early stages at recovery where future and
1725 * past orphaned files are checked, hence the FullTransactionId to build
1726 * a complete file name fit for the removal.
1727 */
1728static void
1730{
1731 char path[MAXPGPATH];
1732
1733 TwoPhaseFilePath(path, fxid);
1734 if (unlink(path))
1735 if (errno != ENOENT || giveWarning)
1738 errmsg("could not remove file \"%s\": %m", path)));
1739}
1740
1741/*
1742 * Recreates a state file. This is used in WAL replay and during
1743 * checkpoint creation.
1744 *
1745 * Note: content and len don't include CRC.
1746 */
1747static void
1749{
1750 char path[MAXPGPATH];
1752 int fd;
1753
1754 /* Recompute CRC */
1756 COMP_CRC32C(statefile_crc, content, len);
1758
1759 TwoPhaseFilePath(path, fxid);
1760
1761 fd = OpenTransientFile(path,
1763 if (fd < 0)
1764 ereport(ERROR,
1766 errmsg("could not recreate file \"%s\": %m", path)));
1767
1768 /* Write content and CRC */
1769 errno = 0;
1771 if (write(fd, content, len) != len)
1772 {
1773 /* if write didn't set errno, assume problem is no disk space */
1774 if (errno == 0)
1775 errno = ENOSPC;
1776 ereport(ERROR,
1778 errmsg("could not write file \"%s\": %m", path)));
1779 }
1780 if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
1781 {
1782 /* if write didn't set errno, assume problem is no disk space */
1783 if (errno == 0)
1784 errno = ENOSPC;
1785 ereport(ERROR,
1787 errmsg("could not write file \"%s\": %m", path)));
1788 }
1790
1791 /*
1792 * We must fsync the file because the end-of-replay checkpoint will not do
1793 * so, there being no GXACT in shared memory yet to tell it to.
1794 */
1796 if (pg_fsync(fd) != 0)
1797 ereport(ERROR,
1799 errmsg("could not fsync file \"%s\": %m", path)));
1801
1802 if (CloseTransientFile(fd) != 0)
1803 ereport(ERROR,
1805 errmsg("could not close file \"%s\": %m", path)));
1806}
1807
1808/*
1809 * CheckPointTwoPhase -- handle 2PC component of checkpointing.
1810 *
1811 * We must fsync the state file of any GXACT that is valid or has been
1812 * generated during redo and has a PREPARE LSN <= the checkpoint's redo
1813 * horizon. (If the gxact isn't valid yet, has not been generated in
1814 * redo, or has a later LSN, this checkpoint is not responsible for
1815 * fsyncing it.)
1816 *
1817 * This is deliberately run as late as possible in the checkpoint sequence,
1818 * because GXACTs ordinarily have short lifespans, and so it is quite
1819 * possible that GXACTs that were valid at checkpoint start will no longer
1820 * exist if we wait a little bit. With typical checkpoint settings this
1821 * will be about 3 minutes for an online checkpoint, so as a result we
1822 * expect that there will be no GXACTs that need to be copied to disk.
1823 *
1824 * If a GXACT remains valid across multiple checkpoints, it will already
1825 * be on disk so we don't bother to repeat that write.
1826 */
1827void
1829{
1830 int i;
1831 int serialized_xacts = 0;
1832
1833 if (max_prepared_xacts <= 0)
1834 return; /* nothing to do */
1835
1837
1838 /*
1839 * We are expecting there to be zero GXACTs that need to be copied to
1840 * disk, so we perform all I/O while holding TwoPhaseStateLock for
1841 * simplicity. This prevents any new xacts from preparing while this
1842 * occurs, which shouldn't be a problem since the presence of long-lived
1843 * prepared xacts indicates the transaction manager isn't active.
1844 *
1845 * It's also possible to move I/O out of the lock, but on every error we
1846 * should check whether somebody committed our transaction in different
1847 * backend. Let's leave this optimization for future, if somebody will
1848 * spot that this place cause bottleneck.
1849 *
1850 * Note that it isn't possible for there to be a GXACT with a
1851 * prepare_end_lsn set prior to the last checkpoint yet is marked invalid,
1852 * because of the efforts with delayChkptFlags.
1853 */
1855 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
1856 {
1857 /*
1858 * Note that we are using gxact not PGPROC so this works in recovery
1859 * also
1860 */
1862
1863 if ((gxact->valid || gxact->inredo) &&
1864 !gxact->ondisk &&
1865 gxact->prepare_end_lsn <= redo_horizon)
1866 {
1867 char *buf;
1868 int len;
1869
1870 XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, &len);
1872 gxact->ondisk = true;
1873 gxact->prepare_start_lsn = InvalidXLogRecPtr;
1874 gxact->prepare_end_lsn = InvalidXLogRecPtr;
1875 pfree(buf);
1877 }
1878 }
1880
1881 /*
1882 * Flush unconditionally the parent directory to make any information
1883 * durable on disk. Two-phase files could have been removed and those
1884 * removals need to be made persistent as well as any files newly created
1885 * previously since the last checkpoint.
1886 */
1888
1890
1892 ereport(LOG,
1893 (errmsg_plural("%u two-phase state file was written "
1894 "for a long-running prepared transaction",
1895 "%u two-phase state files were written "
1896 "for long-running prepared transactions",
1899}
1900
1901/*
1902 * restoreTwoPhaseData
1903 *
1904 * Scan pg_twophase and fill TwoPhaseState depending on the on-disk data.
1905 * This is called once at the beginning of recovery, saving any extra
1906 * lookups in the future. Two-phase files that are newer than the
1907 * minimum XID horizon are discarded on the way.
1908 */
1909void
1911{
1912 DIR *cldir;
1913 struct dirent *clde;
1914
1917 while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
1918 {
1919 if (strlen(clde->d_name) == 16 &&
1920 strspn(clde->d_name, "0123456789ABCDEF") == 16)
1921 {
1922 FullTransactionId fxid;
1923 char *buf;
1924
1925 fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16));
1926
1928 true, false, false);
1929 if (buf == NULL)
1930 continue;
1931
1934 }
1935 }
1937 FreeDir(cldir);
1938}
1939
1940/*
1941 * PrescanPreparedTransactions
1942 *
1943 * Scan the shared memory entries of TwoPhaseState and determine the range
1944 * of valid XIDs present. This is run during database startup, after we
1945 * have completed reading WAL. TransamVariables->nextXid has been set to
1946 * one more than the highest XID for which evidence exists in WAL.
1947 *
1948 * We throw away any prepared xacts with main XID beyond nextXid --- if any
1949 * are present, it suggests that the DBA has done a PITR recovery to an
1950 * earlier point in time without cleaning out pg_twophase. We dare not
1951 * try to recover such prepared xacts since they likely depend on database
1952 * state that doesn't exist now.
1953 *
1954 * However, we will advance nextXid beyond any subxact XIDs belonging to
1955 * valid prepared xacts. We need to do this since subxact commit doesn't
1956 * write a WAL entry, and so there might be no evidence in WAL of those
1957 * subxact XIDs.
1958 *
1959 * On corrupted two-phase files, fail immediately. Keeping around broken
1960 * entries and let replay continue causes harm on the system, and a new
1961 * backup should be rolled in.
1962 *
1963 * Our other responsibility is to determine and return the oldest valid XID
1964 * among the prepared xacts (if none, return TransamVariables->nextXid).
1965 * This is needed to synchronize pg_subtrans startup properly.
1966 *
1967 * If xids_p and nxids_p are not NULL, pointer to a palloc'd array of all
1968 * top-level xids is stored in *xids_p. The number of entries in the array
1969 * is returned in *nxids_p.
1970 */
1973{
1977 TransactionId *xids = NULL;
1978 int nxids = 0;
1979 int allocsize = 0;
1980 int i;
1981
1983 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
1984 {
1985 TransactionId xid;
1986 char *buf;
1988
1989 Assert(gxact->inredo);
1990
1992 gxact->prepare_start_lsn,
1993 gxact->ondisk, false, true);
1994
1995 if (buf == NULL)
1996 continue;
1997
1998 /*
1999 * OK, we think this file is valid. Incorporate xid into the
2000 * running-minimum result.
2001 */
2002 xid = XidFromFullTransactionId(gxact->fxid);
2003 if (TransactionIdPrecedes(xid, result))
2004 result = xid;
2005
2006 if (xids_p)
2007 {
2008 if (nxids == allocsize)
2009 {
2010 if (nxids == 0)
2011 {
2012 allocsize = 10;
2013 xids = palloc(allocsize * sizeof(TransactionId));
2014 }
2015 else
2016 {
2017 allocsize = allocsize * 2;
2018 xids = repalloc(xids, allocsize * sizeof(TransactionId));
2019 }
2020 }
2021 xids[nxids++] = xid;
2022 }
2023
2024 pfree(buf);
2025 }
2027
2028 if (xids_p)
2029 {
2030 *xids_p = xids;
2031 *nxids_p = nxids;
2032 }
2033
2034 return result;
2035}
2036
2037/*
2038 * StandbyRecoverPreparedTransactions
2039 *
2040 * Scan the shared memory entries of TwoPhaseState and setup all the required
2041 * information to allow standby queries to treat prepared transactions as still
2042 * active.
2043 *
2044 * This is never called at the end of recovery - we use
2045 * RecoverPreparedTransactions() at that point.
2046 *
2047 * This updates pg_subtrans, so that any subtransactions will be correctly
2048 * seen as in-progress in snapshots taken during recovery.
2049 */
2050void
2052{
2053 int i;
2054
2056 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
2057 {
2058 char *buf;
2060
2061 Assert(gxact->inredo);
2062
2064 gxact->prepare_start_lsn,
2065 gxact->ondisk, true, false);
2066 if (buf != NULL)
2067 pfree(buf);
2068 }
2070}
2071
2072/*
2073 * RecoverPreparedTransactions
2074 *
2075 * Scan the shared memory entries of TwoPhaseState and reload the state for
2076 * each prepared transaction (reacquire locks, etc).
2077 *
2078 * This is run at the end of recovery, but before we allow backends to write
2079 * WAL.
2080 *
2081 * At the end of recovery the way we take snapshots will change. We now need
2082 * to mark all running transactions with their full SubTransSetParent() info
2083 * to allow normal snapshots to work correctly if snapshots overflow.
2084 * We do this here because by definition prepared transactions are the only
2085 * type of write transaction still running, so this is necessary and
2086 * complete.
2087 */
2088void
2090{
2091 int i;
2092
2094 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
2095 {
2096 char *buf;
2098 FullTransactionId fxid = gxact->fxid;
2099 char *bufptr;
2100 TwoPhaseFileHeader *hdr;
2101 TransactionId *subxids;
2102 const char *gid;
2103
2104 /*
2105 * Reconstruct subtrans state for the transaction --- needed because
2106 * pg_subtrans is not preserved over a restart. Note that we are
2107 * linking all the subtransactions directly to the top-level XID;
2108 * there may originally have been a more complex hierarchy, but
2109 * there's no need to restore that exactly. It's possible that
2110 * SubTransSetParent has been set before, if the prepared transaction
2111 * generated xid assignment records.
2112 */
2114 gxact->prepare_start_lsn,
2115 gxact->ondisk, true, false);
2116 if (buf == NULL)
2117 continue;
2118
2119 ereport(LOG,
2120 (errmsg("recovering prepared transaction %u of epoch %u from shared memory",
2123
2124 hdr = (TwoPhaseFileHeader *) buf;
2127 bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
2128 gid = (const char *) bufptr;
2129 bufptr += MAXALIGN(hdr->gidlen);
2130 subxids = (TransactionId *) bufptr;
2131 bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
2132 bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileLocator));
2133 bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileLocator));
2134 bufptr += MAXALIGN(hdr->ncommitstats * sizeof(xl_xact_stats_item));
2135 bufptr += MAXALIGN(hdr->nabortstats * sizeof(xl_xact_stats_item));
2136 bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
2137
2138 /*
2139 * Recreate its GXACT and dummy PGPROC. But, check whether it was
2140 * added in redo and already has a shmem entry for it.
2141 */
2142 MarkAsPreparingGuts(gxact, gxact->fxid, gid,
2143 hdr->prepared_at,
2144 hdr->owner, hdr->database);
2145
2146 /* recovered, so reset the flag for entries generated by redo */
2147 gxact->inredo = false;
2148
2149 GXactLoadSubxactData(gxact, hdr->nsubxacts, subxids);
2150 MarkAsPrepared(gxact, true);
2151
2153
2154 /*
2155 * Recover other state (notably locks) using resource managers.
2156 */
2158
2159 /*
2160 * Release locks held by the standby process after we process each
2161 * prepared transaction. As a result, we don't need too many
2162 * additional locks at any one time.
2163 */
2164 if (InHotStandby)
2165 StandbyReleaseLockTree(hdr->xid, hdr->nsubxacts, subxids);
2166
2167 /*
2168 * We're done with recovering this transaction. Clear MyLockedGxact,
2169 * like we do in PrepareTransaction() during normal operation.
2170 */
2172
2173 pfree(buf);
2174
2176 }
2177
2179}
2180
2181/*
2182 * ProcessTwoPhaseBuffer
2183 *
2184 * Given a FullTransactionId, read it either from disk or read it directly
2185 * via shmem xlog record pointer using the provided "prepare_start_lsn".
2186 *
2187 * If setParent is true, set up subtransaction parent linkages.
2188 *
2189 * If setNextXid is true, set TransamVariables->nextXid to the newest
2190 * value scanned.
2191 */
2192static char *
2194 XLogRecPtr prepare_start_lsn,
2195 bool fromdisk,
2196 bool setParent, bool setNextXid)
2197{
2199 TransactionId *subxids;
2200 char *buf;
2201 TwoPhaseFileHeader *hdr;
2202 int i;
2203
2205
2206 if (!fromdisk)
2207 Assert(XLogRecPtrIsValid(prepare_start_lsn));
2208
2209 /* Already processed? */
2212 {
2213 if (fromdisk)
2214 {
2216 (errmsg("removing stale two-phase state file for transaction %u of epoch %u",
2219 RemoveTwoPhaseFile(fxid, true);
2220 }
2221 else
2222 {
2224 (errmsg("removing stale two-phase state from memory for transaction %u of epoch %u",
2227 PrepareRedoRemoveFull(fxid, true);
2228 }
2229 return NULL;
2230 }
2231
2232 /* Reject XID if too new */
2233 if (FullTransactionIdFollowsOrEquals(fxid, nextXid))
2234 {
2235 if (fromdisk)
2236 {
2238 (errmsg("removing future two-phase state file for transaction %u of epoch %u",
2241 RemoveTwoPhaseFile(fxid, true);
2242 }
2243 else
2244 {
2246 (errmsg("removing future two-phase state from memory for transaction %u of epoch %u",
2249 PrepareRedoRemoveFull(fxid, true);
2250 }
2251 return NULL;
2252 }
2253
2254 if (fromdisk)
2255 {
2256 /* Read and validate file */
2257 buf = ReadTwoPhaseFile(fxid, false);
2258 }
2259 else
2260 {
2261 /* Read xlog data */
2262 XlogReadTwoPhaseData(prepare_start_lsn, &buf, NULL);
2263 }
2264
2265 /* Deconstruct header */
2266 hdr = (TwoPhaseFileHeader *) buf;
2268 {
2269 if (fromdisk)
2270 ereport(ERROR,
2272 errmsg("corrupted two-phase state file for transaction %u of epoch %u",
2275 else
2276 ereport(ERROR,
2278 errmsg("corrupted two-phase state in memory for transaction %u of epoch %u",
2281 }
2282
2283 /*
2284 * Examine subtransaction XIDs ... they should all follow main XID, and
2285 * they may force us to advance nextXid.
2286 */
2287 subxids = (TransactionId *) (buf +
2288 MAXALIGN(sizeof(TwoPhaseFileHeader)) +
2289 MAXALIGN(hdr->gidlen));
2290 for (i = 0; i < hdr->nsubxacts; i++)
2291 {
2292 TransactionId subxid = subxids[i];
2293
2295
2296 /* update nextXid if needed */
2297 if (setNextXid)
2299
2300 if (setParent)
2302 }
2303
2304 return buf;
2305}
2306
2307
2308/*
2309 * RecordTransactionCommitPrepared
2310 *
2311 * This is basically the same as RecordTransactionCommit (q.v. if you change
2312 * this function): in particular, we must set DELAY_CHKPT_IN_COMMIT to avoid a
2313 * race condition.
2314 *
2315 * We know the transaction made at least one XLOG entry (its PREPARE),
2316 * so it is never possible to optimize out the commit record.
2317 */
2318static void
2320 int nchildren,
2321 TransactionId *children,
2322 int nrels,
2323 RelFileLocator *rels,
2324 int nstats,
2325 xl_xact_stats_item *stats,
2326 int ninvalmsgs,
2328 bool initfileinval,
2329 const char *gid)
2330{
2333 bool replorigin;
2334
2335 /*
2336 * Are we using the replication origins feature? Or, in other words, are
2337 * we replaying remote actions?
2338 */
2341
2342 /* Load the injection point before entering the critical section */
2343 INJECTION_POINT_LOAD("commit-after-delay-checkpoint");
2344
2346
2347 /* See notes in RecordTransactionCommit */
2350
2351 INJECTION_POINT_CACHED("commit-after-delay-checkpoint", NULL);
2352
2353 /*
2354 * Ensures the DELAY_CHKPT_IN_COMMIT flag write is globally visible before
2355 * commit time is written.
2356 */
2358
2359 /*
2360 * Note it is important to set committs value after marking ourselves as
2361 * in the commit critical section (DELAY_CHKPT_IN_COMMIT). This is because
2362 * we want to ensure all transactions that have acquired commit timestamp
2363 * are finished before we allow the logical replication client to advance
2364 * its xid which is used to hold back dead rows for conflict detection.
2365 * See comments atop worker.c.
2366 */
2368
2369 /*
2370 * Emit the XLOG commit record. Note that we mark 2PC commits as
2371 * potentially having AccessExclusiveLocks since we don't know whether or
2372 * not they do.
2373 */
2375 nchildren, children, nrels, rels,
2376 nstats, stats,
2377 ninvalmsgs, invalmsgs,
2378 initfileinval,
2380 xid, gid);
2381
2382
2383 if (replorigin)
2384 /* Move LSNs forward for this replication origin */
2387
2388 /*
2389 * Record commit timestamp. The value comes from plain commit timestamp
2390 * if replorigin is not enabled, or replorigin already set a value for us
2391 * in replorigin_xact_state.origin_timestamp otherwise.
2392 *
2393 * We don't need to WAL-log anything here, as the commit record written
2394 * above already contains the data.
2395 */
2398
2402
2403 /*
2404 * We don't currently try to sleep before flush here ... nor is there any
2405 * support for async commit of a prepared xact (the very idea is probably
2406 * a contradiction)
2407 */
2408
2409 /* Flush XLOG to disk */
2411
2412 /* Mark the transaction committed in pg_xact */
2413 TransactionIdCommitTree(xid, nchildren, children);
2414
2415 /* Checkpoint can proceed now */
2417
2419
2420 /*
2421 * Wait for synchronous replication, if required.
2422 *
2423 * Note that at this stage we have marked clog, but still show as running
2424 * in the procarray and continue to hold locks.
2425 */
2427}
2428
2429/*
2430 * RecordTransactionAbortPrepared
2431 *
2432 * This is basically the same as RecordTransactionAbort.
2433 *
2434 * We know the transaction made at least one XLOG entry (its PREPARE),
2435 * so it is never possible to optimize out the abort record.
2436 */
2437static void
2439 int nchildren,
2440 TransactionId *children,
2441 int nrels,
2442 RelFileLocator *rels,
2443 int nstats,
2444 xl_xact_stats_item *stats,
2445 const char *gid)
2446{
2448 bool replorigin;
2449
2450 /*
2451 * Are we using the replication origins feature? Or, in other words, are
2452 * we replaying remote actions?
2453 */
2456
2457 /*
2458 * Catch the scenario where we aborted partway through
2459 * RecordTransactionCommitPrepared ...
2460 */
2461 if (TransactionIdDidCommit(xid))
2462 elog(PANIC, "cannot abort transaction %u, it was already committed",
2463 xid);
2464
2466
2467 /*
2468 * Emit the XLOG commit record. Note that we mark 2PC aborts as
2469 * potentially having AccessExclusiveLocks since we don't know whether or
2470 * not they do.
2471 */
2473 nchildren, children,
2474 nrels, rels,
2475 nstats, stats,
2477 xid, gid);
2478
2479 if (replorigin)
2480 /* Move LSNs forward for this replication origin */
2483
2484 /* Always flush, since we're about to remove the 2PC state file */
2486
2487 /*
2488 * Mark the transaction aborted in clog. This is not absolutely necessary
2489 * but we may as well do it while we are here.
2490 */
2491 TransactionIdAbortTree(xid, nchildren, children);
2492
2494
2495 /*
2496 * Wait for synchronous replication, if required.
2497 *
2498 * Note that at this stage we have marked clog, but still show as running
2499 * in the procarray and continue to hold locks.
2500 */
2501 SyncRepWaitForLSN(recptr, false);
2502}
2503
2504/*
2505 * PrepareRedoAdd
2506 *
2507 * Store pointers to the start/end of the WAL record along with the xid in
2508 * a gxact entry in shared memory TwoPhaseState structure. If caller
2509 * specifies InvalidXLogRecPtr as WAL location to fetch the two-phase
2510 * data, the entry is marked as located on disk.
2511 */
2512void
2514 XLogRecPtr start_lsn, XLogRecPtr end_lsn,
2515 ReplOriginId origin_id)
2516{
2518 char *bufptr;
2519 const char *gid;
2521
2524
2525 if (!FullTransactionIdIsValid(fxid))
2526 {
2529 hdr->xid);
2530 }
2531
2532 bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
2533 gid = (const char *) bufptr;
2534
2535 /*
2536 * Reserve the GID for the given transaction in the redo code path.
2537 *
2538 * This creates a gxact struct and puts it into the active array.
2539 *
2540 * In redo, this struct is mainly used to track PREPARE/COMMIT entries in
2541 * shared memory. Hence, we only fill up the bare minimum contents here.
2542 * The gxact also gets marked with gxact->inredo set to true to indicate
2543 * that it got added in the redo phase
2544 */
2545
2546 /*
2547 * In the event of a crash while a checkpoint was running, it may be
2548 * possible that some two-phase data found its way to disk while its
2549 * corresponding record needs to be replayed in the follow-up recovery. As
2550 * the 2PC data was on disk, it has already been restored at the beginning
2551 * of recovery with restoreTwoPhaseData(), so skip this record to avoid
2552 * duplicates in TwoPhaseState. If a consistent state has been reached,
2553 * the record is added to TwoPhaseState and it should have no
2554 * corresponding file in pg_twophase.
2555 */
2556 if (XLogRecPtrIsValid(start_lsn))
2557 {
2558 char path[MAXPGPATH];
2559
2561 TwoPhaseFilePath(path, fxid);
2562
2563 if (access(path, F_OK) == 0)
2564 {
2566 (errmsg("could not recover two-phase state file for transaction %u",
2567 hdr->xid),
2568 errdetail("Two-phase state file has been found in WAL record %X/%08X, but this transaction has already been restored from disk.",
2569 LSN_FORMAT_ARGS(start_lsn))));
2570 return;
2571 }
2572
2573 if (errno != ENOENT)
2574 ereport(ERROR,
2576 errmsg("could not access file \"%s\": %m", path)));
2577 }
2578
2579 /* Get a free gxact from the freelist */
2581 ereport(ERROR,
2583 errmsg("maximum number of prepared transactions reached"),
2584 errhint("Increase \"max_prepared_transactions\" (currently %d).",
2588
2590 gxact->prepare_start_lsn = start_lsn;
2591 gxact->prepare_end_lsn = end_lsn;
2592 gxact->fxid = fxid;
2593 gxact->owner = hdr->owner;
2594 gxact->locking_backend = INVALID_PROC_NUMBER;
2595 gxact->valid = false;
2596 gxact->ondisk = !XLogRecPtrIsValid(start_lsn);
2597 gxact->inredo = true; /* yes, added in redo */
2598 strcpy(gxact->gid, gid);
2599
2600 /* And insert it into the active array */
2603
2604 if (origin_id != InvalidReplOriginId)
2605 {
2606 /* recover apply progress */
2607 replorigin_advance(origin_id, hdr->origin_lsn, end_lsn,
2608 false /* backward */ , false /* WAL */ );
2609 }
2610
2611 elog(DEBUG2, "added 2PC data in shared memory for transaction %u of epoch %u",
2614}
2615
2616/*
2617 * PrepareRedoRemoveFull
2618 *
2619 * Remove the corresponding gxact entry from TwoPhaseState. Also remove
2620 * the 2PC file if a prepared transaction was saved via an earlier checkpoint.
2621 *
2622 * Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState
2623 * is updated.
2624 */
2625static void
2627{
2629 int i;
2630 bool found = false;
2631
2634
2635 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
2636 {
2638
2639 if (FullTransactionIdEquals(gxact->fxid, fxid))
2640 {
2641 Assert(gxact->inredo);
2642 found = true;
2643 break;
2644 }
2645 }
2646
2647 /*
2648 * Just leave if there is nothing, this is expected during WAL replay.
2649 */
2650 if (!found)
2651 return;
2652
2653 /*
2654 * And now we can clean up any files we may have left.
2655 */
2656 elog(DEBUG2, "removing 2PC data for transaction %u of epoch %u ",
2659
2660 if (gxact->ondisk)
2662
2664}
2665
2666/*
2667 * Wrapper of PrepareRedoRemoveFull(), for TransactionIds.
2668 */
2669void
2677
2678/*
2679 * LookupGXact
2680 * Check if the prepared transaction with the given GID, lsn and timestamp
2681 * exists.
2682 *
2683 * Note that we always compare with the LSN where prepare ends because that is
2684 * what is stored as origin_lsn in the 2PC file.
2685 *
2686 * This function is primarily used to check if the prepared transaction
2687 * received from the upstream (remote node) already exists. Checking only GID
2688 * is not sufficient because a different prepared xact with the same GID can
2689 * exist on the same node. So, we are ensuring to match origin_lsn and
2690 * origin_timestamp of prepared xact to avoid the possibility of a match of
2691 * prepared xact from two different nodes.
2692 */
2693bool
2694LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn,
2696{
2697 int i;
2698 bool found = false;
2699
2701 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
2702 {
2704
2705 /* Ignore not-yet-valid GIDs. */
2706 if (gxact->valid && strcmp(gxact->gid, gid) == 0)
2707 {
2708 char *buf;
2709 TwoPhaseFileHeader *hdr;
2710
2711 /*
2712 * We are not expecting collisions of GXACTs (same gid) between
2713 * publisher and subscribers, so we perform all I/O while holding
2714 * TwoPhaseStateLock for simplicity.
2715 *
2716 * To move the I/O out of the lock, we need to ensure that no
2717 * other backend commits the prepared xact in the meantime. We can
2718 * do this optimization if we encounter many collisions in GID
2719 * between publisher and subscriber.
2720 */
2721 if (gxact->ondisk)
2722 buf = ReadTwoPhaseFile(gxact->fxid, false);
2723 else
2724 {
2725 Assert(gxact->prepare_start_lsn);
2726 XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
2727 }
2728
2729 hdr = (TwoPhaseFileHeader *) buf;
2730
2731 if (hdr->origin_lsn == prepare_end_lsn &&
2733 {
2734 found = true;
2735 pfree(buf);
2736 break;
2737 }
2738
2739 pfree(buf);
2740 }
2741 }
2743 return found;
2744}
2745
2746/*
2747 * TwoPhaseTransactionGid
2748 * Form the prepared transaction GID for two_phase transactions.
2749 *
2750 * Return the GID in the supplied buffer.
2751 */
2752void
2754{
2755 Assert(OidIsValid(subid));
2756
2757 if (!TransactionIdIsValid(xid))
2758 ereport(ERROR,
2760 errmsg_internal("invalid two-phase transaction ID")));
2761
2762 snprintf(gid_res, szgid, "pg_gid_%u_%u", subid, xid);
2763}
2764
2765/*
2766 * IsTwoPhaseTransactionGidForSubid
2767 * Check whether the given GID (as formed by TwoPhaseTransactionGid) is
2768 * for the specified 'subid'.
2769 */
2770static bool
2772{
2773 int ret;
2776 char gid_tmp[GIDSIZE];
2777
2778 /* Extract the subid and xid from the given GID */
2779 ret = sscanf(gid, "pg_gid_%u_%u", &subid_from_gid, &xid_from_gid);
2780
2781 /*
2782 * Check that the given GID has expected format, and at least the subid
2783 * matches.
2784 */
2785 if (ret != 2 || subid != subid_from_gid)
2786 return false;
2787
2788 /*
2789 * Reconstruct a temporary GID based on the subid and xid extracted from
2790 * the given GID and check whether the temporary GID and the given GID
2791 * match.
2792 */
2794
2795 return strcmp(gid, gid_tmp) == 0;
2796}
2797
2798/*
2799 * LookupGXactBySubid
2800 * Check if the prepared transaction done by apply worker exists.
2801 */
2802bool
2804{
2805 bool found = false;
2806
2808 for (int i = 0; i < TwoPhaseState->numPrepXacts; i++)
2809 {
2811
2812 /* Ignore not-yet-valid GIDs. */
2813 if (gxact->valid &&
2815 {
2816 found = true;
2817 break;
2818 }
2819 }
2821
2822 return found;
2823}
2824
2825/*
2826 * TwoPhaseGetOldestXidInCommit
2827 * Return the oldest transaction ID from prepared transactions that are
2828 * currently in the commit critical section.
2829 *
2830 * This function only considers transactions in the currently connected
2831 * database. If no matching transactions are found, it returns
2832 * InvalidTransactionId.
2833 */
2836{
2837 TransactionId oldestRunningXid = InvalidTransactionId;
2838
2840
2841 for (int i = 0; i < TwoPhaseState->numPrepXacts; i++)
2842 {
2845 TransactionId xid;
2846
2847 if (!gxact->valid)
2848 continue;
2849
2850 if (gxact->locking_backend == INVALID_PROC_NUMBER)
2851 continue;
2852
2853 /*
2854 * Get the backend that is handling the transaction. It's safe to
2855 * access this backend while holding TwoPhaseStateLock, as the backend
2856 * can only be destroyed after either removing or unlocking the
2857 * current global transaction, both of which require an exclusive
2858 * TwoPhaseStateLock.
2859 */
2860 commitproc = GetPGProcByNumber(gxact->locking_backend);
2861
2862 if (MyDatabaseId != commitproc->databaseId)
2863 continue;
2864
2865 if ((commitproc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0)
2866 continue;
2867
2868 xid = XidFromFullTransactionId(gxact->fxid);
2869
2870 if (!TransactionIdIsValid(oldestRunningXid) ||
2871 TransactionIdPrecedes(xid, oldestRunningXid))
2872 oldestRunningXid = xid;
2873 }
2874
2876
2877 return oldestRunningXid;
2878}
#define pg_write_barrier()
Definition atomics.h:155
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:453
int16 AttrNumber
Definition attnum.h:21
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1639
static Datum values[MAXATTR]
Definition bootstrap.c:190
#define CStringGetTextDatum(s)
Definition builtins.h:98
#define MAXALIGN(LEN)
Definition c.h:896
#define Max(x, y)
Definition c.h:1085
#define Assert(condition)
Definition c.h:943
#define PG_BINARY
Definition c.h:1374
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:558
uint16_t uint16
Definition c.h:623
uint32_t uint32
Definition c.h:624
#define MemSet(start, val, len)
Definition c.h:1107
uint32 TransactionId
Definition c.h:736
#define OidIsValid(objectId)
Definition c.h:858
size_t Size
Definition c.h:689
uint32 result
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz timestamp, ReplOriginId nodeid)
Definition commit_ts.c:150
int64 TimestampTz
Definition timestamp.h:39
Datum arg
Definition elog.c:1322
int errcode_for_file_access(void)
Definition elog.c:897
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:31
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:36
int int int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:29
#define PANIC
Definition elog.h:43
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:227
#define ereport(elevel,...)
Definition elog.h:151
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
int FreeDir(DIR *dir)
Definition fd.c:3009
int CloseTransientFile(int fd)
Definition fd.c:2855
void fsync_fname(const char *fname, bool isdir)
Definition fd.c:757
DIR * AllocateDir(const char *dirname)
Definition fd.c:2891
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2957
int pg_fsync(int fd)
Definition fd.c:390
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
#define ERRCODE_PROTOCOL_VIOLATION
Definition fe-connect.c:96
#define palloc_object(type)
Definition fe_memutils.h:74
#define MaxAllocSize
Definition fe_memutils.h:22
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define SRF_IS_FIRSTCALL()
Definition funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition funcapi.h:308
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition funcapi.h:328
ProcNumber MyProcNumber
Definition globals.c:90
bool IsPostmasterEnvironment
Definition globals.c:119
Oid MyDatabaseId
Definition globals.c:94
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1025
static void dlist_init(dlist_head *head)
Definition ilist.h:314
static void dlist_node_init(dlist_node *node)
Definition ilist.h:325
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
#define write(a, b, c)
Definition win32.h:14
#define read(a, b, c)
Definition win32.h:13
int xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition inval.c:1012
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344
int i
Definition isn.c:77
#define VirtualTransactionIdIsValid(vxid)
Definition lock.h:70
#define GET_VXID_FROM_PGPROC(vxid_dst, proc)
Definition lock.h:80
#define LocalTransactionIdIsValid(lxid)
Definition lock.h:69
#define VirtualTransactionIdEquals(vxid1, vxid2)
Definition lock.h:74
bool LWLockHeldByMe(LWLock *lock)
Definition lwlock.c:1885
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1929
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_WS_NOT_WAITING
Definition lwlock.h:30
#define NUM_LOCK_PARTITIONS
Definition lwlock.h:87
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
Definition md.c:1612
#define RESUME_INTERRUPTS()
Definition miscadmin.h:136
#define AmStartupProcess()
Definition miscadmin.h:393
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
@ B_INVALID
Definition miscadmin.h:339
#define END_CRIT_SECTION()
Definition miscadmin.h:152
Oid GetUserId(void)
Definition miscinit.c:470
static char * errmsg
ReplOriginXactState replorigin_xact_state
Definition origin.c:168
void replorigin_advance(ReplOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition origin.c:928
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
Definition origin.c:1335
#define DoNotReplicateId
Definition origin.h:34
#define InvalidReplOriginId
Definition origin.h:33
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define ERRCODE_DATA_CORRUPTED
#define MAXPGPATH
uint32 pg_crc32c
Definition pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition pg_crc32c.h:165
#define EQ_CRC32C(c1, c2)
Definition pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition pg_crc32c.h:170
const void size_t len
const void * data
static char * user
Definition pg_regress.c:121
static char buf[DEFAULT_XLOG_SEG_SIZE]
void pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_item *items, bool is_redo)
void AtEOXact_PgStat(bool isCommit, bool parallel)
Definition pgstat_xact.c:40
int pgstat_get_transactional_drops(bool isCommit, xl_xact_stats_item **items)
#define snprintf
Definition port.h:260
static Datum TransactionIdGetDatum(TransactionId X)
Definition postgres.h:292
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
#define InvalidOid
unsigned int Oid
void PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit)
Definition predicate.c:4812
static int fd(const char *x, int i)
static int fb(int x)
short access
#define DELAY_CHKPT_IN_COMMIT
Definition proc.h:141
#define GetPGProcByNumber(n)
Definition proc.h:504
#define PGPROC_MAX_CACHED_SUBXIDS
Definition proc.h:43
#define GetNumberFromPGProc(proc)
Definition proc.h:505
#define DELAY_CHKPT_START
Definition proc.h:139
@ PROC_WAIT_STATUS_OK
Definition proc.h:145
void ProcArrayAdd(PGPROC *proc)
Definition procarray.c:464
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
Definition procarray.c:561
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
int ProcNumber
Definition procnumber.h:24
void RelationCacheInitFilePostInvalidate(void)
Definition relcache.c:6901
void RelationCacheInitFilePreInvalidate(void)
Definition relcache.c:6876
Size add_size(Size s1, Size s2)
Definition shmem.c:1043
Size mul_size(Size s1, Size s2)
Definition shmem.c:1058
#define ShmemRequestStruct(...)
Definition shmem.h:176
void SendSharedInvalidMessages(const SharedInvalidationMessage *msgs, int n)
Definition sinval.c:47
PGPROC * MyProc
Definition proc.c:71
PGPROC * PreparedXactProcs
Definition proc.c:78
void StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition standby.c:1094
int smgrGetPendingDeletes(bool forCommit, RelFileLocator **ptr)
Definition storage.c:893
#define ERRCODE_DUPLICATE_OBJECT
Definition streamutil.c:30
Definition dirent.c:26
TimestampTz prepared_at
Definition twophase.c:154
XLogRecPtr prepare_start_lsn
Definition twophase.c:163
XLogRecPtr prepare_end_lsn
Definition twophase.c:164
GlobalTransaction next
Definition twophase.c:152
FullTransactionId fxid
Definition twophase.c:165
ProcNumber locking_backend
Definition twophase.c:168
char gid[GIDSIZE]
Definition twophase.c:172
Definition proc.h:179
TransactionId xmin
Definition proc.h:242
LocalTransactionId lxid
Definition proc.h:231
PROCLOCK * waitProcLock
Definition proc.h:306
uint8 lwWaitMode
Definition proc.h:284
BackendType backendType
Definition proc.h:198
uint8 statusFlags
Definition proc.h:210
Oid databaseId
Definition proc.h:201
pg_atomic_uint64 waitStart
Definition proc.h:311
ProcNumber procNumber
Definition proc.h:226
int pid
Definition proc.h:197
struct PGPROC::@136 vxid
XidCacheStatus subxidStatus
Definition proc.h:247
LOCK * waitLock
Definition proc.h:304
TransactionId xid
Definition proc.h:237
struct XidCache subxids
Definition proc.h:249
int delayChkptFlags
Definition proc.h:260
dlist_node waitLink
Definition proc.h:305
dlist_head myProcLocks[NUM_LOCK_PARTITIONS]
Definition proc.h:321
Oid roleId
Definition proc.h:202
ProcWaitStatus waitStatus
Definition proc.h:314
Oid tempNamespaceId
Definition proc.h:204
uint8 lwWaiting
Definition proc.h:283
ReplOriginId origin
Definition origin.h:45
XLogRecPtr origin_lsn
Definition origin.h:46
TimestampTz origin_timestamp
Definition origin.h:47
ShmemRequestCallback request_fn
Definition shmem.h:133
struct StateFileChunk * next
Definition twophase.c:1008
FullTransactionId nextXid
Definition transam.h:220
TwoPhaseRmgrId rmid
Definition twophase.c:995
GlobalTransaction freeGXacts
Definition twophase.c:182
GlobalTransaction prepXacts[FLEXIBLE_ARRAY_MEMBER]
Definition twophase.c:188
GlobalTransaction array
Definition twophase.c:706
bool overflowed
Definition proc.h:50
uint8 count
Definition proc.h:48
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
Definition proc.h:55
__int64 st_size
Definition win32_port.h:263
TimestampTz prepared_at
Definition xact.h:359
int32 nabortrels
Definition xact.h:363
int32 ninvalmsgs
Definition xact.h:366
bool initfileinval
Definition xact.h:367
int32 ncommitstats
Definition xact.h:364
TimestampTz origin_timestamp
Definition xact.h:370
uint16 gidlen
Definition xact.h:368
uint32 total_len
Definition xact.h:356
int32 nabortstats
Definition xact.h:365
XLogRecPtr origin_lsn
Definition xact.h:369
uint32 magic
Definition xact.h:355
int32 ncommitrels
Definition xact.h:362
TransactionId xid
Definition xact.h:357
int32 nsubxacts
Definition xact.h:361
uint32 total_len
Definition twophase.c:1017
uint32 num_chunks
Definition twophase.c:1015
StateFileChunk * head
Definition twophase.c:1013
StateFileChunk * tail
Definition twophase.c:1014
uint32 bytes_free
Definition twophase.c:1016
void SubTransSetParent(TransactionId xid, TransactionId parent)
Definition subtrans.c:92
bool superuser_arg(Oid roleid)
Definition superuser.c:57
void SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
Definition syncrep.c:149
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
Definition transam.c:281
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids)
Definition transam.c:240
void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids)
Definition transam.c:270
bool TransactionIdDidAbort(TransactionId transactionId)
Definition transam.c:188
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define FullTransactionIdEquals(a, b)
Definition transam.h:50
#define InvalidTransactionId
Definition transam.h:31
static FullTransactionId FullTransactionIdFromAllowableAt(FullTransactionId nextFullXid, TransactionId xid)
Definition transam.h:441
#define EpochFromFullTransactionId(x)
Definition transam.h:47
static FullTransactionId FullTransactionIdFromU64(uint64 value)
Definition transam.h:81
#define FullTransactionIdFollowsOrEquals(a, b)
Definition transam.h:54
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define FullTransactionIdIsValid(x)
Definition transam.h:55
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
TupleDesc CreateTemplateTupleDesc(int natts)
Definition tupdesc.c:165
void TupleDescFinalize(TupleDesc tupdesc)
Definition tupdesc.c:511
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition tupdesc.c:900
static char * ReadTwoPhaseFile(FullTransactionId fxid, bool missing_ok)
Definition twophase.c:1301
static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
Definition twophase.c:1418
ProcNumber TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
Definition twophase.c:914
TransactionId TwoPhaseGetOldestXidInCommit(void)
Definition twophase.c:2835
static void ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[])
Definition twophase.c:1698
void TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid_res, int szgid)
Definition twophase.c:2753
void RecoverPreparedTransactions(void)
Definition twophase.c:2089
static bool twophaseExitRegistered
Definition twophase.c:209
static void TwoPhaseShmemInit(void *arg)
Definition twophase.c:270
void restoreTwoPhaseData(void)
Definition twophase.c:1910
static GlobalTransaction TwoPhaseGetGXact(FullTransactionId fxid, bool lock_held)
Definition twophase.c:809
bool LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, TimestampTz origin_prepare_timestamp)
Definition twophase.c:2694
#define TWOPHASE_DIR
Definition twophase.c:115
GlobalTransaction MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid)
Definition twophase.c:365
static void RecordTransactionAbortPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, RelFileLocator *rels, int nstats, xl_xact_stats_item *stats, const char *gid)
Definition twophase.c:2438
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition twophase.c:1277
int max_prepared_xacts
Definition twophase.c:118
static FullTransactionId AdjustToFullTransactionId(TransactionId xid)
Definition twophase.c:949
static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, RelFileLocator *rels, int nstats, xl_xact_stats_item *stats, int ninvalmsgs, SharedInvalidationMessage *invalmsgs, bool initfileinval, const char *gid)
Definition twophase.c:2319
static void RemoveGXact(GlobalTransaction gxact)
Definition twophase.c:636
void PrepareRedoAdd(FullTransactionId fxid, char *buf, XLogRecPtr start_lsn, XLogRecPtr end_lsn, ReplOriginId origin_id)
Definition twophase.c:2513
static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning)
Definition twophase.c:1729
static GlobalTransaction MyLockedGxact
Definition twophase.c:207
static TwoPhaseStateData * TwoPhaseState
Definition twophase.c:191
void AtAbort_Twophase(void)
Definition twophase.c:310
static void save_state_data(const void *data, uint32 len)
Definition twophase.c:1030
#define TWOPHASE_MAGIC
Definition twophase.c:982
void FinishPreparedTransaction(const char *gid, bool isCommit)
Definition twophase.c:1503
TransactionId TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, bool *have_more)
Definition twophase.c:862
static char * ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid)
Definition twophase.c:2193
static void GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts, TransactionId *children)
Definition twophase.c:512
void PrepareRedoRemove(TransactionId xid, bool giveWarning)
Definition twophase.c:2670
Datum pg_prepared_xact(PG_FUNCTION_ARGS)
Definition twophase.c:719
void EndPrepare(GlobalTransaction gxact)
Definition twophase.c:1151
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition twophase.c:1972
void StartPrepare(GlobalTransaction gxact)
Definition twophase.c:1058
static int GetPreparedTransactionList(GlobalTransaction *gxacts)
Definition twophase.c:674
void StandbyRecoverPreparedTransactions(void)
Definition twophase.c:2051
static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len)
Definition twophase.c:1748
static void AtProcExit_Twophase(int code, Datum arg)
Definition twophase.c:300
static void PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning)
Definition twophase.c:2626
static int TwoPhaseFilePath(char *path, FullTransactionId fxid)
Definition twophase.c:956
static void MarkAsPrepared(GlobalTransaction gxact, bool lock_held)
Definition twophase.c:538
void PostPrepare_Twophase(void)
Definition twophase.c:350
bool LookupGXactBySubid(Oid subid)
Definition twophase.c:2803
PGPROC * TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held)
Definition twophase.c:929
xl_xact_prepare TwoPhaseFileHeader
Definition twophase.c:984
static void TwoPhaseShmemRequest(void *arg)
Definition twophase.c:249
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition twophase.c:1828
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition twophase.c:1473
static GlobalTransaction LockGXact(const char *gid, Oid user)
Definition twophase.c:560
const ShmemCallbacks TwoPhaseShmemCallbacks
Definition twophase.c:196
static void MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid)
Definition twophase.c:439
static bool IsTwoPhaseTransactionGidForSubid(Oid subid, char *gid)
Definition twophase.c:2771
static struct xllist records
struct GlobalTransactionData * GlobalTransaction
Definition twophase.h:31
const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID+1]
const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID+1]
const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID+1]
void(* TwoPhaseCallback)(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
#define TWOPHASE_RM_MAX_ID
uint8 TwoPhaseRmgrId
#define TWOPHASE_RM_END_ID
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
FullTransactionId ReadNextFullTransactionId(void)
Definition varsup.c:283
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition varsup.c:299
TransamVariablesData * TransamVariables
Definition varsup.c:37
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:67
static void pgstat_report_wait_end(void)
Definition wait_event.h:83
const char * name
#define fstat
Definition win32_port.h:73
XLogRecPtr XactLogCommitRecord(TimestampTz commit_time, int nsubxacts, TransactionId *subxacts, int nrels, RelFileLocator *rels, int ndroppedstats, xl_xact_stats_item *droppedstats, int nmsgs, SharedInvalidationMessage *msgs, bool relcacheInval, int xactflags, TransactionId twophase_xid, const char *twophase_gid)
Definition xact.c:5865
int xactGetCommittedChildren(TransactionId **ptr)
Definition xact.c:5841
int MyXactFlags
Definition xact.c:138
XLogRecPtr XactLogAbortRecord(TimestampTz abort_time, int nsubxacts, TransactionId *subxacts, int nrels, RelFileLocator *rels, int ndroppedstats, xl_xact_stats_item *droppedstats, int xactflags, TransactionId twophase_xid, const char *twophase_gid)
Definition xact.c:6037
#define XLOG_XACT_PREPARE
Definition xact.h:171
#define XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK
Definition xact.h:109
#define XLOG_XACT_OPMASK
Definition xact.h:180
#define GIDSIZE
Definition xact.h:31
XLogRecPtr ProcLastRecPtr
Definition xlog.c:260
bool RecoveryInProgress(void)
Definition xlog.c:6830
XLogRecPtr XactLastRecEnd
Definition xlog.c:261
int wal_segment_size
Definition xlog.c:150
bool log_checkpoints
Definition xlog.c:136
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2801
#define XLOG_INCLUDE_ORIGIN
Definition xlog.h:166
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint16 ReplOriginId
Definition xlogdefs.h:69
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:482
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:372
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:464
void XLogBeginInsert(void)
Definition xloginsert.c:153
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition xloginsert.c:179
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition xlogreader.c:108
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition xlogreader.c:391
void XLogReaderFree(XLogReaderState *state)
Definition xlogreader.c:163
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition xlogreader.c:233
#define XLogRecGetDataLen(decoder)
Definition xlogreader.h:416
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:410
#define XLogRecGetRmid(decoder)
Definition xlogreader.h:411
#define XLogRecGetData(decoder)
Definition xlogreader.h:415
#define XL_ROUTINE(...)
Definition xlogreader.h:117
bool reachedConsistency
static XLogReaderState * xlogreader
void wal_segment_close(XLogReaderState *state)
Definition xlogutils.c:831
void wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p)
Definition xlogutils.c:806
bool InRecovery
Definition xlogutils.c:50
int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
Definition xlogutils.c:845
#define InHotStandby
Definition xlogutils.h:60