PostgreSQL Source Code git master
Loading...
Searching...
No Matches
twophase.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * twophase.c
4 * Two-phase commit support functions.
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/access/transam/twophase.c
11 *
12 * NOTES
13 * Each global transaction is associated with a global transaction
14 * identifier (GID). The client assigns a GID to a postgres
15 * transaction with the PREPARE TRANSACTION command.
16 *
17 * We keep all active global transactions in a shared memory array.
18 * When the PREPARE TRANSACTION command is issued, the GID is
19 * reserved for the transaction in the array. This is done before
20 * a WAL entry is made, because the reservation checks for duplicate
21 * GIDs and aborts the transaction if there already is a global
22 * transaction in prepared state with the same GID.
23 *
24 * A global transaction (gxact) also has dummy PGPROC; this is what keeps
25 * the XID considered running by TransactionIdIsInProgress. It is also
26 * convenient as a PGPROC to hook the gxact's locks to.
27 *
28 * Information to recover prepared transactions in case of crash is
29 * now stored in WAL for the common case. In some cases there will be
30 * an extended period between preparing a GXACT and commit/abort, in
31 * which case we need to separately record prepared transaction data
32 * in permanent storage. This includes locking information, pending
33 * notifications etc. All that state information is written to the
34 * per-transaction state file in the pg_twophase directory.
35 * All prepared transactions will be written prior to shutdown.
36 *
37 * Life track of state data is following:
38 *
39 * * On PREPARE TRANSACTION backend writes state data only to the WAL and
40 * stores pointer to the start of the WAL record in
41 * gxact->prepare_start_lsn.
42 * * If COMMIT occurs before checkpoint then backend reads data from WAL
43 * using prepare_start_lsn.
44 * * On checkpoint state data copied to files in pg_twophase directory and
45 * fsynced
46 * * If COMMIT happens after checkpoint then backend reads state data from
47 * files
48 *
49 * During replay and replication, TwoPhaseState also holds information
50 * about active prepared transactions that haven't been moved to disk yet.
51 *
52 * Replay of twophase records happens by the following rules:
53 *
54 * * At the beginning of recovery, pg_twophase is scanned once, filling
55 * TwoPhaseState with entries marked with gxact->inredo and
56 * gxact->ondisk. Two-phase file data older than the XID horizon of
57 * the redo position are discarded.
58 * * On PREPARE redo, the transaction is added to TwoPhaseState->prepXacts.
59 * gxact->inredo is set to true for such entries.
60 * * On Checkpoint we iterate through TwoPhaseState->prepXacts entries
61 * that have gxact->inredo set and are behind the redo_horizon. We
62 * save them to disk and then switch gxact->ondisk to true.
63 * * On COMMIT/ABORT we delete the entry from TwoPhaseState->prepXacts.
64 * If gxact->ondisk is true, the corresponding entry from the disk
65 * is additionally deleted.
66 * * RecoverPreparedTransactions(), StandbyRecoverPreparedTransactions()
67 * and PrescanPreparedTransactions() have been modified to go through
68 * gxact->inredo entries that have not made it to disk.
69 *
70 *-------------------------------------------------------------------------
71 */
72#include "postgres.h"
73
74#include <fcntl.h>
75#include <sys/stat.h>
76#include <time.h>
77#include <unistd.h>
78
79#include "access/commit_ts.h"
80#include "access/htup_details.h"
81#include "access/subtrans.h"
82#include "access/transam.h"
83#include "access/twophase.h"
85#include "access/xact.h"
86#include "access/xlog.h"
87#include "access/xloginsert.h"
88#include "access/xlogreader.h"
89#include "access/xlogrecovery.h"
90#include "access/xlogutils.h"
91#include "catalog/pg_type.h"
92#include "catalog/storage.h"
93#include "funcapi.h"
94#include "miscadmin.h"
95#include "pg_trace.h"
96#include "pgstat.h"
97#include "replication/origin.h"
98#include "replication/syncrep.h"
99#include "storage/fd.h"
100#include "storage/ipc.h"
101#include "storage/md.h"
102#include "storage/predicate.h"
103#include "storage/proc.h"
104#include "storage/procarray.h"
105#include "utils/builtins.h"
107#include "utils/memutils.h"
108#include "utils/timestamp.h"
109#include "utils/wait_event.h"
110
111/*
112 * Directory where Two-phase commit files reside within PGDATA
113 */
114#define TWOPHASE_DIR "pg_twophase"
115
116/* GUC variable, can't be changed after startup */
118
119/*
120 * This struct describes one global transaction that is in prepared state
121 * or attempting to become prepared.
122 *
123 * The lifecycle of a global transaction is:
124 *
125 * 1. After checking that the requested GID is not in use, set up an entry in
126 * the TwoPhaseState->prepXacts array with the correct GID and valid = false,
127 * and mark it as locked by my backend.
128 *
129 * 2. After successfully completing prepare, set valid = true and enter the
130 * referenced PGPROC into the global ProcArray.
131 *
132 * 3. To begin COMMIT PREPARED or ROLLBACK PREPARED, check that the entry is
133 * valid and not locked, then mark the entry as locked by storing my current
134 * proc number into locking_backend. This prevents concurrent attempts to
135 * commit or rollback the same prepared xact.
136 *
137 * 4. On completion of COMMIT PREPARED or ROLLBACK PREPARED, remove the entry
138 * from the ProcArray and the TwoPhaseState->prepXacts array and return it to
139 * the freelist.
140 *
141 * Note that if the preparing transaction fails between steps 1 and 2, the
142 * entry must be removed so that the GID and the GlobalTransaction struct
143 * can be reused. See AtAbort_Twophase().
144 *
145 * typedef struct GlobalTransactionData *GlobalTransaction appears in
146 * twophase.h
147 */
148
150{
151 GlobalTransaction next; /* list link for free list */
152 int pgprocno; /* ID of associated dummy PGPROC */
153 TimestampTz prepared_at; /* time of preparation */
154
155 /*
156 * Note that we need to keep track of two LSNs for each GXACT. We keep
157 * track of the start LSN because this is the address we must use to read
158 * state data back from WAL when committing a prepared GXACT. We keep
159 * track of the end LSN because that is the LSN we need to wait for prior
160 * to commit.
161 */
162 XLogRecPtr prepare_start_lsn; /* XLOG offset of prepare record start */
163 XLogRecPtr prepare_end_lsn; /* XLOG offset of prepare record end */
164 FullTransactionId fxid; /* The GXACT full xid */
165
166 Oid owner; /* ID of user that executed the xact */
167 ProcNumber locking_backend; /* backend currently working on the xact */
168 bool valid; /* true if PGPROC entry is in proc array */
169 bool ondisk; /* true if prepare state file is on disk */
170 bool inredo; /* true if entry was added via xlog_redo */
171 char gid[GIDSIZE]; /* The GID assigned to the prepared xact */
173
174/*
175 * Two Phase Commit shared state. Access to this struct is protected
176 * by TwoPhaseStateLock.
177 */
178typedef struct TwoPhaseStateData
179{
180 /* Head of linked list of free GlobalTransactionData structs */
182
183 /* Number of valid prepXacts entries. */
185
186 /* There are max_prepared_xacts items in this array */
189
191
192/*
193 * Global transaction entry currently locked by us, if any. Note that any
194 * access to the entry pointed to by this variable must be protected by
195 * TwoPhaseStateLock, though obviously the pointer itself doesn't need to be
196 * (since it's just local memory).
197 */
199
200static bool twophaseExitRegistered = false;
201
204 int nchildren,
205 TransactionId *children,
206 int nrels,
207 RelFileLocator *rels,
208 int nstats,
209 xl_xact_stats_item *stats,
210 int ninvalmsgs,
212 bool initfileinval,
213 const char *gid);
215 int nchildren,
216 TransactionId *children,
217 int nrels,
218 RelFileLocator *rels,
219 int nstats,
220 xl_xact_stats_item *stats,
221 const char *gid);
222static void ProcessRecords(char *bufptr, FullTransactionId fxid,
223 const TwoPhaseCallback callbacks[]);
225
226static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
228 XLogRecPtr prepare_start_lsn,
229 bool fromdisk, bool setParent, bool setNextXid);
231 const char *gid, TimestampTz prepared_at, Oid owner,
233static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning);
234static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len);
235
236/*
237 * Initialization of shared memory
238 */
239Size
241{
242 Size size;
243
244 /* Need the fixed struct, the array of pointers, and the GTD structs */
245 size = offsetof(TwoPhaseStateData, prepXacts);
247 sizeof(GlobalTransaction)));
248 size = MAXALIGN(size);
250 sizeof(GlobalTransactionData)));
251
252 return size;
253}
254
255void
257{
258 bool found;
259
260 TwoPhaseState = ShmemInitStruct("Prepared Transaction Table",
262 &found);
264 {
266 int i;
267
268 Assert(!found);
271
272 /*
273 * Initialize the linked list of free GlobalTransactionData structs
274 */
276 ((char *) TwoPhaseState +
279 for (i = 0; i < max_prepared_xacts; i++)
280 {
281 /* insert into linked list */
284
285 /* associate it with a PGPROC assigned by InitProcGlobal */
287 }
288 }
289 else
290 Assert(found);
291}
292
293/*
294 * Exit hook to unlock the global transaction entry we're working on.
295 */
296static void
298{
299 /* same logic as abort */
301}
302
303/*
304 * Abort hook to unlock the global transaction entry we're working on.
305 */
306void
308{
309 if (MyLockedGxact == NULL)
310 return;
311
312 /*
313 * What to do with the locked global transaction entry? If we were in the
314 * process of preparing the transaction, but haven't written the WAL
315 * record and state file yet, the transaction must not be considered as
316 * prepared. Likewise, if we are in the process of finishing an
317 * already-prepared transaction, and fail after having already written the
318 * 2nd phase commit or rollback record to the WAL, the transaction should
319 * not be considered as prepared anymore. In those cases, just remove the
320 * entry from shared memory.
321 *
322 * Otherwise, the entry must be left in place so that the transaction can
323 * be finished later, so just unlock it.
324 *
325 * If we abort during prepare, after having written the WAL record, we
326 * might not have transferred all locks and other state to the prepared
327 * transaction yet. Likewise, if we abort during commit or rollback,
328 * after having written the WAL record, we might not have released all the
329 * resources held by the transaction yet. In those cases, the in-memory
330 * state can be wrong, but it's too late to back out.
331 */
333 if (!MyLockedGxact->valid)
335 else
338
340}
341
342/*
343 * This is called after we have finished transferring state to the prepared
344 * PGPROC entry.
345 */
346void
355
356
357/*
358 * MarkAsPreparing
359 * Reserve the GID for the given transaction.
360 */
362MarkAsPreparing(FullTransactionId fxid, const char *gid,
363 TimestampTz prepared_at, Oid owner, Oid databaseid)
364{
366 int i;
367
368 if (strlen(gid) >= GIDSIZE)
371 errmsg("transaction identifier \"%s\" is too long",
372 gid)));
373
374 /* fail immediately if feature is disabled */
375 if (max_prepared_xacts == 0)
378 errmsg("prepared transactions are disabled"),
379 errhint("Set \"max_prepared_transactions\" to a nonzero value.")));
380
381 /* on first call, register the exit hook */
383 {
386 }
387
389
390 /* Check for conflicting GID */
391 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
392 {
394 if (strcmp(gxact->gid, gid) == 0)
395 {
398 errmsg("transaction identifier \"%s\" is already in use",
399 gid)));
400 }
401 }
402
403 /* Get a free gxact from the freelist */
407 errmsg("maximum number of prepared transactions reached"),
408 errhint("Increase \"max_prepared_transactions\" (currently %d).",
412
413 MarkAsPreparingGuts(gxact, fxid, gid, prepared_at, owner, databaseid);
414
415 gxact->ondisk = false;
416
417 /* And insert it into the active array */
420
422
423 return gxact;
424}
425
426/*
427 * MarkAsPreparingGuts
428 *
429 * This uses a gxact struct and puts it into the active array.
430 * NOTE: this is also used when reloading a gxact after a crash; so avoid
431 * assuming that we can use very much backend context.
432 *
433 * Note: This function should be called with appropriate locks held.
434 */
435static void
437 const char *gid, TimestampTz prepared_at, Oid owner,
439{
440 PGPROC *proc;
441 int i;
443
445
446 Assert(gxact != NULL);
447 proc = GetPGProcByNumber(gxact->pgprocno);
448
449 /* Initialize the PGPROC entry */
450 MemSet(proc, 0, sizeof(PGPROC));
453 {
454 /* clone VXID, for TwoPhaseGetXidByVirtualXID() to find */
455 proc->vxid.lxid = MyProc->vxid.lxid;
457 }
458 else
459 {
461 /* GetLockConflicts() uses this to specify a wait on the XID */
462 proc->vxid.lxid = xid;
464 }
465 proc->xid = xid;
467 proc->delayChkptFlags = 0;
468 proc->statusFlags = 0;
469 proc->pid = 0;
470 proc->databaseId = databaseid;
471 proc->roleId = owner;
473 proc->backendType = B_INVALID;
475 proc->lwWaitMode = 0;
476 proc->waitLock = NULL;
478 proc->waitProcLock = NULL;
479 pg_atomic_init_u64(&proc->waitStart, 0);
480 for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
481 dlist_init(&proc->myProcLocks[i]);
482 /* subxid data must be filled later by GXactLoadSubxactData */
483 proc->subxidStatus.overflowed = false;
484 proc->subxidStatus.count = 0;
485
486 gxact->prepared_at = prepared_at;
487 gxact->fxid = fxid;
488 gxact->owner = owner;
489 gxact->locking_backend = MyProcNumber;
490 gxact->valid = false;
491 gxact->inredo = false;
492 strcpy(gxact->gid, gid);
493
494 /*
495 * Remember that we have this GlobalTransaction entry locked for us. If we
496 * abort after this, we must release it.
497 */
499}
500
501/*
502 * GXactLoadSubxactData
503 *
504 * If the transaction being persisted had any subtransactions, this must
505 * be called before MarkAsPrepared() to load information into the dummy
506 * PGPROC.
507 */
508static void
510 TransactionId *children)
511{
512 PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
513
514 /* We need no extra lock since the GXACT isn't valid yet */
515 if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
516 {
517 proc->subxidStatus.overflowed = true;
518 nsubxacts = PGPROC_MAX_CACHED_SUBXIDS;
519 }
520 if (nsubxacts > 0)
521 {
522 memcpy(proc->subxids.xids, children,
523 nsubxacts * sizeof(TransactionId));
524 proc->subxidStatus.count = nsubxacts;
525 }
526}
527
528/*
529 * MarkAsPrepared
530 * Mark the GXACT as fully valid, and enter it into the global ProcArray.
531 *
532 * lock_held indicates whether caller already holds TwoPhaseStateLock.
533 */
534static void
536{
537 /* Lock here may be overkill, but I'm not convinced of that ... */
538 if (!lock_held)
540 Assert(!gxact->valid);
541 gxact->valid = true;
542 if (!lock_held)
544
545 /*
546 * Put it into the global ProcArray so TransactionIdIsInProgress considers
547 * the XID as still running.
548 */
550}
551
552/*
553 * LockGXact
554 * Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
555 */
557LockGXact(const char *gid, Oid user)
558{
559 int i;
560
561 /* on first call, register the exit hook */
563 {
566 }
567
569
570 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
571 {
573 PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
574
575 /* Ignore not-yet-valid GIDs */
576 if (!gxact->valid)
577 continue;
578 if (strcmp(gxact->gid, gid) != 0)
579 continue;
580
581 /* Found it, but has someone else got it locked? */
582 if (gxact->locking_backend != INVALID_PROC_NUMBER)
585 errmsg("prepared transaction with identifier \"%s\" is busy",
586 gid)));
587
588 if (user != gxact->owner && !superuser_arg(user))
591 errmsg("permission denied to finish prepared transaction"),
592 errhint("Must be superuser or the user that prepared the transaction.")));
593
594 /*
595 * Note: it probably would be possible to allow committing from
596 * another database; but at the moment NOTIFY is known not to work and
597 * there may be some other issues as well. Hence disallow until
598 * someone gets motivated to make it work.
599 */
600 if (MyDatabaseId != proc->databaseId)
603 errmsg("prepared transaction belongs to another database"),
604 errhint("Connect to the database where the transaction was prepared to finish it.")));
605
606 /* OK for me to lock it */
607 gxact->locking_backend = MyProcNumber;
609
611
612 return gxact;
613 }
614
616
619 errmsg("prepared transaction with identifier \"%s\" does not exist",
620 gid)));
621
622 /* NOTREACHED */
623 return NULL;
624}
625
626/*
627 * RemoveGXact
628 * Remove the prepared transaction from the shared memory array.
629 *
630 * NB: caller should have already removed it from ProcArray
631 */
632static void
634{
635 int i;
636
638
639 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
640 {
642 {
643 /* remove from the active array */
646
647 /* and put it back in the freelist */
650
651 return;
652 }
653 }
654
655 elog(ERROR, "failed to find %p in GlobalTransaction array", gxact);
656}
657
658/*
659 * Returns an array of all prepared transactions for the user-level
660 * function pg_prepared_xact.
661 *
662 * The returned array and all its elements are copies of internal data
663 * structures, to minimize the time we need to hold the TwoPhaseStateLock.
664 *
665 * WARNING -- we return even those transactions that are not fully prepared
666 * yet. The caller should filter them out if he doesn't want them.
667 *
668 * The returned array is palloc'd.
669 */
670static int
672{
673 GlobalTransaction array;
674 int num;
675 int i;
676
678
679 if (TwoPhaseState->numPrepXacts == 0)
680 {
682
683 *gxacts = NULL;
684 return 0;
685 }
686
689 *gxacts = array;
690 for (i = 0; i < num; i++)
691 memcpy(array + i, TwoPhaseState->prepXacts[i],
692 sizeof(GlobalTransactionData));
693
695
696 return num;
697}
698
699
700/* Working status for pg_prepared_xact */
707
708/*
709 * pg_prepared_xact
710 * Produce a view with one row per prepared transaction.
711 *
712 * This function is here so we don't have to export the
713 * GlobalTransactionData struct definition.
714 */
715Datum
717{
719 Working_State *status;
720
721 if (SRF_IS_FIRSTCALL())
722 {
723 TupleDesc tupdesc;
724 MemoryContext oldcontext;
725
726 /* create a function context for cross-call persistence */
728
729 /*
730 * Switch to memory context appropriate for multiple function calls
731 */
732 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
733
734 /* build tupdesc for result tuples */
735 /* this had better match pg_prepared_xacts view in system_views.sql */
736 tupdesc = CreateTemplateTupleDesc(5);
737 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "transaction",
738 XIDOID, -1, 0);
739 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "gid",
740 TEXTOID, -1, 0);
741 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepared",
742 TIMESTAMPTZOID, -1, 0);
743 TupleDescInitEntry(tupdesc, (AttrNumber) 4, "ownerid",
744 OIDOID, -1, 0);
745 TupleDescInitEntry(tupdesc, (AttrNumber) 5, "dbid",
746 OIDOID, -1, 0);
747
748 TupleDescFinalize(tupdesc);
749 funcctx->tuple_desc = BlessTupleDesc(tupdesc);
750
751 /*
752 * Collect all the 2PC status information that we will format and send
753 * out as a result set.
754 */
756 funcctx->user_fctx = status;
757
758 status->ngxacts = GetPreparedTransactionList(&status->array);
759 status->currIdx = 0;
760
761 MemoryContextSwitchTo(oldcontext);
762 }
763
765 status = (Working_State *) funcctx->user_fctx;
766
767 while (status->array != NULL && status->currIdx < status->ngxacts)
768 {
769 GlobalTransaction gxact = &status->array[status->currIdx++];
770 PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
771 Datum values[5] = {0};
772 bool nulls[5] = {0};
773 HeapTuple tuple;
774 Datum result;
775
776 if (!gxact->valid)
777 continue;
778
779 /*
780 * Form tuple with appropriate data.
781 */
782
783 values[0] = TransactionIdGetDatum(proc->xid);
785 values[2] = TimestampTzGetDatum(gxact->prepared_at);
786 values[3] = ObjectIdGetDatum(gxact->owner);
788
789 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
790 result = HeapTupleGetDatum(tuple);
791 SRF_RETURN_NEXT(funcctx, result);
792 }
793
795}
796
797/*
798 * TwoPhaseGetGXact
799 * Get the GlobalTransaction struct for a prepared transaction
800 * specified by XID
801 *
802 * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
803 * caller had better hold it.
804 */
807{
808 GlobalTransaction result = NULL;
809 int i;
810
813
815
816 /*
817 * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
818 * repeatedly for the same XID. We can save work with a simple cache.
819 */
821 return cached_gxact;
822
823 if (!lock_held)
825
826 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
827 {
829
830 if (FullTransactionIdEquals(gxact->fxid, fxid))
831 {
832 result = gxact;
833 break;
834 }
835 }
836
837 if (!lock_held)
839
840 if (result == NULL) /* should not happen */
841 elog(ERROR, "failed to find GlobalTransaction for xid %u",
843
844 cached_fxid = fxid;
845 cached_gxact = result;
846
847 return result;
848}
849
850/*
851 * TwoPhaseGetXidByVirtualXID
852 * Lookup VXID among xacts prepared since last startup.
853 *
854 * (This won't find recovered xacts.) If more than one matches, return any
855 * and set "have_more" to true. To witness multiple matches, a single
856 * proc number must consume 2^32 LXIDs, with no intervening database restart.
857 */
860 bool *have_more)
861{
862 int i;
864
867
868 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
869 {
871 PGPROC *proc;
873
874 if (!gxact->valid)
875 continue;
876 proc = GetPGProcByNumber(gxact->pgprocno);
879 {
880 /*
881 * Startup process sets proc->vxid.procNumber to
882 * INVALID_PROC_NUMBER.
883 */
884 Assert(!gxact->inredo);
885
886 if (result != InvalidTransactionId)
887 {
888 *have_more = true;
889 break;
890 }
891 result = XidFromFullTransactionId(gxact->fxid);
892 }
893 }
894
896
897 return result;
898}
899
900/*
901 * TwoPhaseGetDummyProcNumber
902 * Get the dummy proc number for prepared transaction
903 *
904 * Dummy proc numbers are similar to proc numbers of real backends. They
905 * start at FIRST_PREPARED_XACT_PROC_NUMBER, and are unique across all
906 * currently active real backends and prepared transactions. If lock_held is
907 * set to true, TwoPhaseStateLock will not be taken, so the caller had better
908 * hold it.
909 */
917
918/*
919 * TwoPhaseGetDummyProc
920 * Get the PGPROC that represents a prepared transaction
921 *
922 * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
923 * caller had better hold it.
924 */
925PGPROC *
932
933/************************************************************************/
934/* State file support */
935/************************************************************************/
936
937/*
938 * Compute the FullTransactionId for the given TransactionId.
939 *
940 * This is safe if the xid has not yet reached COMMIT PREPARED or ROLLBACK
941 * PREPARED. After those commands, concurrent vac_truncate_clog() may make
942 * the xid cease to qualify as allowable. XXX Not all callers limit their
943 * calls accordingly.
944 */
945static inline FullTransactionId
951
952static inline int
954{
955 return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X",
958}
959
960/*
961 * 2PC state file format:
962 *
963 * 1. TwoPhaseFileHeader
964 * 2. TransactionId[] (subtransactions)
965 * 3. RelFileLocator[] (files to be deleted at commit)
966 * 4. RelFileLocator[] (files to be deleted at abort)
967 * 5. SharedInvalidationMessage[] (inval messages to be sent at commit)
968 * 6. TwoPhaseRecordOnDisk
969 * 7. ...
970 * 8. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
971 * 9. checksum (CRC-32C)
972 *
973 * Each segment except the final checksum is MAXALIGN'd.
974 */
975
976/*
977 * Header for a 2PC state file
978 */
979#define TWOPHASE_MAGIC 0x57F94534 /* format identifier */
980
982
983/*
984 * Header for each record in a state file
985 *
986 * NOTE: len counts only the rmgr data, not the TwoPhaseRecordOnDisk header.
987 * The rmgr data will be stored starting on a MAXALIGN boundary.
988 */
990{
991 uint32 len; /* length of rmgr data */
992 TwoPhaseRmgrId rmid; /* resource manager for this record */
993 uint16 info; /* flag bits for use by rmgr */
995
996/*
997 * During prepare, the state file is assembled in memory before writing it
998 * to WAL and the actual state file. We use a chain of StateFileChunk blocks
999 * for that.
1000 */
1007
1008static struct xllist
1009{
1010 StateFileChunk *head; /* first data block in the chain */
1011 StateFileChunk *tail; /* last block in chain */
1013 uint32 bytes_free; /* free bytes left in tail block */
1014 uint32 total_len; /* total data bytes in chain */
1016
1017
1018/*
1019 * Append a block of data to records data structure.
1020 *
1021 * NB: each block is padded to a MAXALIGN multiple. This must be
1022 * accounted for when the file is later read!
1023 *
1024 * The data is copied, so the caller is free to modify it afterwards.
1025 */
1026static void
1028{
1030
1032 {
1035 records.tail->len = 0;
1036 records.tail->next = NULL;
1038
1039 records.bytes_free = Max(padlen, 512);
1041 }
1042
1044 records.tail->len += padlen;
1047}
1048
1049/*
1050 * Start preparing a state file.
1051 *
1052 * Initializes data structure and inserts the 2PC file header record.
1053 */
1054void
1056{
1057 PGPROC *proc = GetPGProcByNumber(gxact->pgprocno);
1060 TransactionId *children;
1063 xl_xact_stats_item *abortstats = NULL;
1066
1067 /* Initialize linked list */
1069 records.head->len = 0;
1070 records.head->next = NULL;
1071
1072 records.bytes_free = Max(sizeof(TwoPhaseFileHeader), 512);
1074
1076 records.num_chunks = 1;
1077
1078 records.total_len = 0;
1079
1080 /* Create header */
1081 hdr.magic = TWOPHASE_MAGIC;
1082 hdr.total_len = 0; /* EndPrepare will fill this in */
1083 hdr.xid = xid;
1084 hdr.database = proc->databaseId;
1085 hdr.prepared_at = gxact->prepared_at;
1086 hdr.owner = gxact->owner;
1087 hdr.nsubxacts = xactGetCommittedChildren(&children);
1090 hdr.ncommitstats =
1092 hdr.nabortstats =
1093 pgstat_get_transactional_drops(false, &abortstats);
1095 &hdr.initfileinval);
1096 hdr.gidlen = strlen(gxact->gid) + 1; /* Include '\0' */
1097 /* EndPrepare will fill the origin data, if necessary */
1099 hdr.origin_timestamp = 0;
1100
1101 save_state_data(&hdr, sizeof(TwoPhaseFileHeader));
1102 save_state_data(gxact->gid, hdr.gidlen);
1103
1104 /*
1105 * Add the additional info about subxacts, deletable files and cache
1106 * invalidation messages.
1107 */
1108 if (hdr.nsubxacts > 0)
1109 {
1110 save_state_data(children, hdr.nsubxacts * sizeof(TransactionId));
1111 /* While we have the child-xact data, stuff it in the gxact too */
1112 GXactLoadSubxactData(gxact, hdr.nsubxacts, children);
1113 }
1114 if (hdr.ncommitrels > 0)
1115 {
1118 }
1119 if (hdr.nabortrels > 0)
1120 {
1123 }
1124 if (hdr.ncommitstats > 0)
1125 {
1127 hdr.ncommitstats * sizeof(xl_xact_stats_item));
1129 }
1130 if (hdr.nabortstats > 0)
1131 {
1132 save_state_data(abortstats,
1133 hdr.nabortstats * sizeof(xl_xact_stats_item));
1134 pfree(abortstats);
1135 }
1136 if (hdr.ninvalmsgs > 0)
1137 {
1141 }
1142}
1143
1144/*
1145 * Finish preparing state data and writing it to WAL.
1146 */
1147void
1149{
1150 TwoPhaseFileHeader *hdr;
1151 StateFileChunk *record;
1152 bool replorigin;
1153
1154 /* Add the end sentinel to the list of 2PC records */
1156 NULL, 0);
1157
1158 /* Go back and fill in total_len in the file header record */
1160 Assert(hdr->magic == TWOPHASE_MAGIC);
1161 hdr->total_len = records.total_len + sizeof(pg_crc32c);
1162
1165
1166 if (replorigin)
1167 {
1170 }
1171
1172 /*
1173 * If the data size exceeds MaxAllocSize, we won't be able to read it in
1174 * ReadTwoPhaseFile. Check for that now, rather than fail in the case
1175 * where we write data to file and then re-read at commit time.
1176 */
1177 if (hdr->total_len > MaxAllocSize)
1178 ereport(ERROR,
1180 errmsg("two-phase state file maximum length exceeded")));
1181
1182 /*
1183 * Now writing 2PC state data to WAL. We let the WAL's CRC protection
1184 * cover us, so no need to calculate a separate CRC.
1185 *
1186 * We have to set DELAY_CHKPT_START here, too; otherwise a checkpoint
1187 * starting immediately after the WAL record is inserted could complete
1188 * without fsync'ing our state file. (This is essentially the same kind
1189 * of race condition as the COMMIT-to-clog-write case that
1190 * RecordTransactionCommit uses DELAY_CHKPT_IN_COMMIT for; see notes
1191 * there.) Note that DELAY_CHKPT_IN_COMMIT is used to find transactions in
1192 * the critical commit section. We need to know about such transactions
1193 * for conflict detection in logical replication. See
1194 * GetOldestActiveTransactionId(true, false) and its use.
1195 *
1196 * We save the PREPARE record's location in the gxact for later use by
1197 * CheckPointTwoPhase.
1198 */
1200
1202
1205
1207 for (record = records.head; record != NULL; record = record->next)
1208 XLogRegisterData(record->data, record->len);
1209
1211
1212 gxact->prepare_end_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
1213
1214 if (replorigin)
1215 {
1216 /* Move LSNs forward for this replication origin */
1218 gxact->prepare_end_lsn);
1219 }
1220
1221 XLogFlush(gxact->prepare_end_lsn);
1222
1223 /* If we crash now, we have prepared: WAL replay will fix things */
1224
1225 /* Store record's start location to read that later on Commit */
1226 gxact->prepare_start_lsn = ProcLastRecPtr;
1227
1228 /*
1229 * Mark the prepared transaction as valid. As soon as xact.c marks MyProc
1230 * as not running our XID (which it will do immediately after this
1231 * function returns), others can commit/rollback the xact.
1232 *
1233 * NB: a side effect of this is to make a dummy ProcArray entry for the
1234 * prepared XID. This must happen before we clear the XID from MyProc /
1235 * ProcGlobal->xids[], else there is a window where the XID is not running
1236 * according to TransactionIdIsInProgress, and onlookers would be entitled
1237 * to assume the xact crashed. Instead we have a window where the same
1238 * XID appears twice in ProcArray, which is OK.
1239 */
1240 MarkAsPrepared(gxact, false);
1241
1242 /*
1243 * Now we can mark ourselves as out of the commit critical section: a
1244 * checkpoint starting after this will certainly see the gxact as a
1245 * candidate for fsyncing.
1246 */
1248
1249 /*
1250 * Remember that we have this GlobalTransaction entry locked for us. If
1251 * we crash after this point, it's too late to abort, but we must unlock
1252 * it so that the prepared transaction can be committed or rolled back.
1253 */
1255
1257
1258 /*
1259 * Wait for synchronous replication, if required.
1260 *
1261 * Note that at this stage we have marked the prepare, but still show as
1262 * running in the procarray (twice!) and continue to hold locks.
1263 */
1264 SyncRepWaitForLSN(gxact->prepare_end_lsn, false);
1265
1267 records.num_chunks = 0;
1268}
1269
1270/*
1271 * Register a 2PC record to be written to state file.
1272 */
1273void
1275 const void *data, uint32 len)
1276{
1277 TwoPhaseRecordOnDisk record;
1278
1279 record.rmid = rmid;
1280 record.info = info;
1281 record.len = len;
1282 save_state_data(&record, sizeof(TwoPhaseRecordOnDisk));
1283 if (len > 0)
1285}
1286
1287
1288/*
1289 * Read and validate the state file for xid.
1290 *
1291 * If it looks OK (has a valid magic number and CRC), return the palloc'd
1292 * contents of the file, issuing an error when finding corrupted data. If
1293 * missing_ok is true, which indicates that missing files can be safely
1294 * ignored, then return NULL. This state can be reached when doing recovery
1295 * after discarding two-phase files from frozen epochs.
1296 */
1297static char *
1299{
1300 char path[MAXPGPATH];
1301 char *buf;
1302 TwoPhaseFileHeader *hdr;
1303 int fd;
1304 struct stat stat;
1307 file_crc;
1308 int r;
1309
1310 TwoPhaseFilePath(path, fxid);
1311
1313 if (fd < 0)
1314 {
1315 if (missing_ok && errno == ENOENT)
1316 return NULL;
1317
1318 ereport(ERROR,
1320 errmsg("could not open file \"%s\": %m", path)));
1321 }
1322
1323 /*
1324 * Check file length. We can determine a lower bound pretty easily. We
1325 * set an upper bound to avoid palloc() failure on a corrupt file, though
1326 * we can't guarantee that we won't get an out of memory error anyway,
1327 * even on a valid file.
1328 */
1329 if (fstat(fd, &stat))
1330 ereport(ERROR,
1332 errmsg("could not stat file \"%s\": %m", path)));
1333
1334 if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
1336 sizeof(pg_crc32c)) ||
1338 ereport(ERROR,
1340 errmsg_plural("incorrect size of file \"%s\": %lld byte",
1341 "incorrect size of file \"%s\": %lld bytes",
1342 (long long int) stat.st_size, path,
1343 (long long int) stat.st_size)));
1344
1345 crc_offset = stat.st_size - sizeof(pg_crc32c);
1347 ereport(ERROR,
1349 errmsg("incorrect alignment of CRC offset for file \"%s\"",
1350 path)));
1351
1352 /*
1353 * OK, slurp in the file.
1354 */
1355 buf = (char *) palloc(stat.st_size);
1356
1358 r = read(fd, buf, stat.st_size);
1359 if (r != stat.st_size)
1360 {
1361 if (r < 0)
1362 ereport(ERROR,
1364 errmsg("could not read file \"%s\": %m", path)));
1365 else
1366 ereport(ERROR,
1367 (errmsg("could not read file \"%s\": read %d of %lld",
1368 path, r, (long long int) stat.st_size)));
1369 }
1370
1372
1373 if (CloseTransientFile(fd) != 0)
1374 ereport(ERROR,
1376 errmsg("could not close file \"%s\": %m", path)));
1377
1378 hdr = (TwoPhaseFileHeader *) buf;
1379 if (hdr->magic != TWOPHASE_MAGIC)
1380 ereport(ERROR,
1382 errmsg("invalid magic number stored in file \"%s\"",
1383 path)));
1384
1385 if (hdr->total_len != stat.st_size)
1386 ereport(ERROR,
1388 errmsg("invalid size stored in file \"%s\"",
1389 path)));
1390
1394
1395 file_crc = *((pg_crc32c *) (buf + crc_offset));
1396
1398 ereport(ERROR,
1400 errmsg("calculated CRC checksum does not match value stored in file \"%s\"",
1401 path)));
1402
1403 return buf;
1404}
1405
1406
1407/*
1408 * Reads 2PC data from xlog. During checkpoint this data will be moved to
1409 * twophase files and ReadTwoPhaseFile should be used instead.
1410 *
1411 * Note clearly that this function can access WAL during normal operation,
1412 * similarly to the way WALSender or Logical Decoding would do.
1413 */
1414static void
1416{
1417 XLogRecord *record;
1419 char *errormsg;
1420
1422 XL_ROUTINE(.page_read = &read_local_xlog_page,
1423 .segment_open = &wal_segment_open,
1424 .segment_close = &wal_segment_close),
1425 NULL);
1426 if (!xlogreader)
1427 ereport(ERROR,
1429 errmsg("out of memory"),
1430 errdetail("Failed while allocating a WAL reading processor.")));
1431
1433 record = XLogReadRecord(xlogreader, &errormsg);
1434
1435 if (record == NULL)
1436 {
1437 if (errormsg)
1438 ereport(ERROR,
1440 errmsg("could not read two-phase state from WAL at %X/%08X: %s",
1441 LSN_FORMAT_ARGS(lsn), errormsg)));
1442 else
1443 ereport(ERROR,
1445 errmsg("could not read two-phase state from WAL at %X/%08X",
1446 LSN_FORMAT_ARGS(lsn))));
1447 }
1448
1451 ereport(ERROR,
1453 errmsg("expected two-phase state data is not present in WAL at %X/%08X",
1454 LSN_FORMAT_ARGS(lsn))));
1455
1456 if (len != NULL)
1458
1461
1463}
1464
1465
1466/*
1467 * Confirms an xid is prepared, during recovery
1468 */
1469bool
1471{
1472 char *buf;
1473 TwoPhaseFileHeader *hdr;
1474 bool result;
1475 FullTransactionId fxid;
1476
1478
1479 if (max_prepared_xacts <= 0)
1480 return false; /* nothing to do */
1481
1482 /* Read and validate file */
1483 fxid = AdjustToFullTransactionId(xid);
1484 buf = ReadTwoPhaseFile(fxid, true);
1485 if (buf == NULL)
1486 return false;
1487
1488 /* Check header also */
1489 hdr = (TwoPhaseFileHeader *) buf;
1490 result = TransactionIdEquals(hdr->xid, xid);
1491 pfree(buf);
1492
1493 return result;
1494}
1495
1496/*
1497 * FinishPreparedTransaction: execute COMMIT PREPARED or ROLLBACK PREPARED
1498 */
1499void
1501{
1503 PGPROC *proc;
1504 FullTransactionId fxid;
1505 TransactionId xid;
1506 bool ondisk;
1507 char *buf;
1508 char *bufptr;
1509 TwoPhaseFileHeader *hdr;
1511 TransactionId *children;
1515 int ndelrels;
1517 xl_xact_stats_item *abortstats;
1519
1520 /*
1521 * Validate the GID, and lock the GXACT to ensure that two backends do not
1522 * try to commit the same GID at once.
1523 */
1524 gxact = LockGXact(gid, GetUserId());
1525 proc = GetPGProcByNumber(gxact->pgprocno);
1526 fxid = gxact->fxid;
1527 xid = XidFromFullTransactionId(fxid);
1528
1529 /*
1530 * Read and validate 2PC state data. State data will typically be stored
1531 * in WAL files if the LSN is after the last checkpoint record, or moved
1532 * to disk if for some reason they have lived for a long time.
1533 */
1534 if (gxact->ondisk)
1535 buf = ReadTwoPhaseFile(fxid, false);
1536 else
1537 XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
1538
1539
1540 /*
1541 * Disassemble the header area
1542 */
1543 hdr = (TwoPhaseFileHeader *) buf;
1544 Assert(TransactionIdEquals(hdr->xid, xid));
1545 bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
1546 bufptr += MAXALIGN(hdr->gidlen);
1547 children = (TransactionId *) bufptr;
1548 bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
1549 commitrels = (RelFileLocator *) bufptr;
1550 bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileLocator));
1551 abortrels = (RelFileLocator *) bufptr;
1552 bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileLocator));
1553 commitstats = (xl_xact_stats_item *) bufptr;
1554 bufptr += MAXALIGN(hdr->ncommitstats * sizeof(xl_xact_stats_item));
1555 abortstats = (xl_xact_stats_item *) bufptr;
1556 bufptr += MAXALIGN(hdr->nabortstats * sizeof(xl_xact_stats_item));
1558 bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
1559
1560 /* compute latestXid among all children */
1561 latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children);
1562
1563 /* Prevent cancel/die interrupt while cleaning up */
1565
1566 /*
1567 * The order of operations here is critical: make the XLOG entry for
1568 * commit or abort, then mark the transaction committed or aborted in
1569 * pg_xact, then remove its PGPROC from the global ProcArray (which means
1570 * TransactionIdIsInProgress will stop saying the prepared xact is in
1571 * progress), then run the post-commit or post-abort callbacks. The
1572 * callbacks will release the locks the transaction held.
1573 */
1574 if (isCommit)
1576 hdr->nsubxacts, children,
1577 hdr->ncommitrels, commitrels,
1578 hdr->ncommitstats,
1580 hdr->ninvalmsgs, invalmsgs,
1581 hdr->initfileinval, gid);
1582 else
1584 hdr->nsubxacts, children,
1585 hdr->nabortrels, abortrels,
1586 hdr->nabortstats,
1587 abortstats,
1588 gid);
1589
1591
1592 /*
1593 * In case we fail while running the callbacks, mark the gxact invalid so
1594 * no one else will try to commit/rollback, and so it will be recycled if
1595 * we fail after this point. It is still locked by our backend so it
1596 * won't go away yet.
1597 *
1598 * (We assume it's safe to do this without taking TwoPhaseStateLock.)
1599 */
1600 gxact->valid = false;
1601
1602 /*
1603 * We have to remove any files that were supposed to be dropped. For
1604 * consistency with the regular xact.c code paths, must do this before
1605 * releasing locks, so do it before running the callbacks.
1606 *
1607 * NB: this code knows that we couldn't be dropping any temp rels ...
1608 */
1609 if (isCommit)
1610 {
1612 ndelrels = hdr->ncommitrels;
1613 }
1614 else
1615 {
1617 ndelrels = hdr->nabortrels;
1618 }
1619
1620 /* Make sure files supposed to be dropped are dropped */
1622
1623 if (isCommit)
1625 else
1626 pgstat_execute_transactional_drops(hdr->nabortstats, abortstats, false);
1627
1628 /*
1629 * Handle cache invalidation messages.
1630 *
1631 * Relcache init file invalidation requires processing both before and
1632 * after we send the SI messages, only when committing. See
1633 * AtEOXact_Inval().
1634 */
1635 if (isCommit)
1636 {
1637 if (hdr->initfileinval)
1640 if (hdr->initfileinval)
1642 }
1643
1644 /*
1645 * Acquire the two-phase lock. We want to work on the two-phase callbacks
1646 * while holding it to avoid potential conflicts with other transactions
1647 * attempting to use the same GID, so the lock is released once the shared
1648 * memory state is cleared.
1649 */
1651
1652 /* And now do the callbacks */
1653 if (isCommit)
1655 else
1657
1659
1660 /*
1661 * Read this value while holding the two-phase lock, as the on-disk 2PC
1662 * file is physically removed after the lock is released.
1663 */
1664 ondisk = gxact->ondisk;
1665
1666 /* Clear shared memory state */
1668
1669 /*
1670 * Release the lock as all callbacks are called and shared memory cleanup
1671 * is done.
1672 */
1674
1675 /* Count the prepared xact as committed or aborted */
1676 AtEOXact_PgStat(isCommit, false);
1677
1678 /*
1679 * And now we can clean up any files we may have left.
1680 */
1681 if (ondisk)
1682 RemoveTwoPhaseFile(fxid, true);
1683
1685
1687
1688 pfree(buf);
1689}
1690
1691/*
1692 * Scan 2PC state data in memory and call the indicated callbacks for each 2PC record.
1693 */
1694static void
1696 const TwoPhaseCallback callbacks[])
1697{
1698 for (;;)
1699 {
1700 TwoPhaseRecordOnDisk *record = (TwoPhaseRecordOnDisk *) bufptr;
1701
1702 Assert(record->rmid <= TWOPHASE_RM_MAX_ID);
1703 if (record->rmid == TWOPHASE_RM_END_ID)
1704 break;
1705
1706 bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk));
1707
1708 if (callbacks[record->rmid] != NULL)
1709 callbacks[record->rmid] (fxid, record->info, bufptr, record->len);
1710
1711 bufptr += MAXALIGN(record->len);
1712 }
1713}
1714
1715/*
1716 * Remove the 2PC file.
1717 *
1718 * If giveWarning is false, do not complain about file-not-present;
1719 * this is an expected case during WAL replay.
1720 *
1721 * This routine is used at early stages at recovery where future and
1722 * past orphaned files are checked, hence the FullTransactionId to build
1723 * a complete file name fit for the removal.
1724 */
1725static void
1727{
1728 char path[MAXPGPATH];
1729
1730 TwoPhaseFilePath(path, fxid);
1731 if (unlink(path))
1732 if (errno != ENOENT || giveWarning)
1735 errmsg("could not remove file \"%s\": %m", path)));
1736}
1737
1738/*
1739 * Recreates a state file. This is used in WAL replay and during
1740 * checkpoint creation.
1741 *
1742 * Note: content and len don't include CRC.
1743 */
1744static void
1746{
1747 char path[MAXPGPATH];
1749 int fd;
1750
1751 /* Recompute CRC */
1753 COMP_CRC32C(statefile_crc, content, len);
1755
1756 TwoPhaseFilePath(path, fxid);
1757
1758 fd = OpenTransientFile(path,
1760 if (fd < 0)
1761 ereport(ERROR,
1763 errmsg("could not recreate file \"%s\": %m", path)));
1764
1765 /* Write content and CRC */
1766 errno = 0;
1768 if (write(fd, content, len) != len)
1769 {
1770 /* if write didn't set errno, assume problem is no disk space */
1771 if (errno == 0)
1772 errno = ENOSPC;
1773 ereport(ERROR,
1775 errmsg("could not write file \"%s\": %m", path)));
1776 }
1777 if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
1778 {
1779 /* if write didn't set errno, assume problem is no disk space */
1780 if (errno == 0)
1781 errno = ENOSPC;
1782 ereport(ERROR,
1784 errmsg("could not write file \"%s\": %m", path)));
1785 }
1787
1788 /*
1789 * We must fsync the file because the end-of-replay checkpoint will not do
1790 * so, there being no GXACT in shared memory yet to tell it to.
1791 */
1793 if (pg_fsync(fd) != 0)
1794 ereport(ERROR,
1796 errmsg("could not fsync file \"%s\": %m", path)));
1798
1799 if (CloseTransientFile(fd) != 0)
1800 ereport(ERROR,
1802 errmsg("could not close file \"%s\": %m", path)));
1803}
1804
1805/*
1806 * CheckPointTwoPhase -- handle 2PC component of checkpointing.
1807 *
1808 * We must fsync the state file of any GXACT that is valid or has been
1809 * generated during redo and has a PREPARE LSN <= the checkpoint's redo
1810 * horizon. (If the gxact isn't valid yet, has not been generated in
1811 * redo, or has a later LSN, this checkpoint is not responsible for
1812 * fsyncing it.)
1813 *
1814 * This is deliberately run as late as possible in the checkpoint sequence,
1815 * because GXACTs ordinarily have short lifespans, and so it is quite
1816 * possible that GXACTs that were valid at checkpoint start will no longer
1817 * exist if we wait a little bit. With typical checkpoint settings this
1818 * will be about 3 minutes for an online checkpoint, so as a result we
1819 * expect that there will be no GXACTs that need to be copied to disk.
1820 *
1821 * If a GXACT remains valid across multiple checkpoints, it will already
1822 * be on disk so we don't bother to repeat that write.
1823 */
1824void
1826{
1827 int i;
1828 int serialized_xacts = 0;
1829
1830 if (max_prepared_xacts <= 0)
1831 return; /* nothing to do */
1832
1834
1835 /*
1836 * We are expecting there to be zero GXACTs that need to be copied to
1837 * disk, so we perform all I/O while holding TwoPhaseStateLock for
1838 * simplicity. This prevents any new xacts from preparing while this
1839 * occurs, which shouldn't be a problem since the presence of long-lived
1840 * prepared xacts indicates the transaction manager isn't active.
1841 *
1842 * It's also possible to move I/O out of the lock, but on every error we
1843 * should check whether somebody committed our transaction in different
1844 * backend. Let's leave this optimization for future, if somebody will
1845 * spot that this place cause bottleneck.
1846 *
1847 * Note that it isn't possible for there to be a GXACT with a
1848 * prepare_end_lsn set prior to the last checkpoint yet is marked invalid,
1849 * because of the efforts with delayChkptFlags.
1850 */
1852 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
1853 {
1854 /*
1855 * Note that we are using gxact not PGPROC so this works in recovery
1856 * also
1857 */
1859
1860 if ((gxact->valid || gxact->inredo) &&
1861 !gxact->ondisk &&
1862 gxact->prepare_end_lsn <= redo_horizon)
1863 {
1864 char *buf;
1865 int len;
1866
1867 XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, &len);
1869 gxact->ondisk = true;
1870 gxact->prepare_start_lsn = InvalidXLogRecPtr;
1871 gxact->prepare_end_lsn = InvalidXLogRecPtr;
1872 pfree(buf);
1874 }
1875 }
1877
1878 /*
1879 * Flush unconditionally the parent directory to make any information
1880 * durable on disk. Two-phase files could have been removed and those
1881 * removals need to be made persistent as well as any files newly created
1882 * previously since the last checkpoint.
1883 */
1885
1887
1889 ereport(LOG,
1890 (errmsg_plural("%u two-phase state file was written "
1891 "for a long-running prepared transaction",
1892 "%u two-phase state files were written "
1893 "for long-running prepared transactions",
1896}
1897
1898/*
1899 * restoreTwoPhaseData
1900 *
1901 * Scan pg_twophase and fill TwoPhaseState depending on the on-disk data.
1902 * This is called once at the beginning of recovery, saving any extra
1903 * lookups in the future. Two-phase files that are newer than the
1904 * minimum XID horizon are discarded on the way.
1905 */
1906void
1908{
1909 DIR *cldir;
1910 struct dirent *clde;
1911
1914 while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
1915 {
1916 if (strlen(clde->d_name) == 16 &&
1917 strspn(clde->d_name, "0123456789ABCDEF") == 16)
1918 {
1919 FullTransactionId fxid;
1920 char *buf;
1921
1922 fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16));
1923
1925 true, false, false);
1926 if (buf == NULL)
1927 continue;
1928
1931 }
1932 }
1934 FreeDir(cldir);
1935}
1936
1937/*
1938 * PrescanPreparedTransactions
1939 *
1940 * Scan the shared memory entries of TwoPhaseState and determine the range
1941 * of valid XIDs present. This is run during database startup, after we
1942 * have completed reading WAL. TransamVariables->nextXid has been set to
1943 * one more than the highest XID for which evidence exists in WAL.
1944 *
1945 * We throw away any prepared xacts with main XID beyond nextXid --- if any
1946 * are present, it suggests that the DBA has done a PITR recovery to an
1947 * earlier point in time without cleaning out pg_twophase. We dare not
1948 * try to recover such prepared xacts since they likely depend on database
1949 * state that doesn't exist now.
1950 *
1951 * However, we will advance nextXid beyond any subxact XIDs belonging to
1952 * valid prepared xacts. We need to do this since subxact commit doesn't
1953 * write a WAL entry, and so there might be no evidence in WAL of those
1954 * subxact XIDs.
1955 *
1956 * On corrupted two-phase files, fail immediately. Keeping around broken
1957 * entries and let replay continue causes harm on the system, and a new
1958 * backup should be rolled in.
1959 *
1960 * Our other responsibility is to determine and return the oldest valid XID
1961 * among the prepared xacts (if none, return TransamVariables->nextXid).
1962 * This is needed to synchronize pg_subtrans startup properly.
1963 *
1964 * If xids_p and nxids_p are not NULL, pointer to a palloc'd array of all
1965 * top-level xids is stored in *xids_p. The number of entries in the array
1966 * is returned in *nxids_p.
1967 */
1970{
1973 TransactionId result = origNextXid;
1974 TransactionId *xids = NULL;
1975 int nxids = 0;
1976 int allocsize = 0;
1977 int i;
1978
1980 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
1981 {
1982 TransactionId xid;
1983 char *buf;
1985
1986 Assert(gxact->inredo);
1987
1989 gxact->prepare_start_lsn,
1990 gxact->ondisk, false, true);
1991
1992 if (buf == NULL)
1993 continue;
1994
1995 /*
1996 * OK, we think this file is valid. Incorporate xid into the
1997 * running-minimum result.
1998 */
1999 xid = XidFromFullTransactionId(gxact->fxid);
2000 if (TransactionIdPrecedes(xid, result))
2001 result = xid;
2002
2003 if (xids_p)
2004 {
2005 if (nxids == allocsize)
2006 {
2007 if (nxids == 0)
2008 {
2009 allocsize = 10;
2010 xids = palloc(allocsize * sizeof(TransactionId));
2011 }
2012 else
2013 {
2014 allocsize = allocsize * 2;
2015 xids = repalloc(xids, allocsize * sizeof(TransactionId));
2016 }
2017 }
2018 xids[nxids++] = xid;
2019 }
2020
2021 pfree(buf);
2022 }
2024
2025 if (xids_p)
2026 {
2027 *xids_p = xids;
2028 *nxids_p = nxids;
2029 }
2030
2031 return result;
2032}
2033
2034/*
2035 * StandbyRecoverPreparedTransactions
2036 *
2037 * Scan the shared memory entries of TwoPhaseState and setup all the required
2038 * information to allow standby queries to treat prepared transactions as still
2039 * active.
2040 *
2041 * This is never called at the end of recovery - we use
2042 * RecoverPreparedTransactions() at that point.
2043 *
2044 * This updates pg_subtrans, so that any subtransactions will be correctly
2045 * seen as in-progress in snapshots taken during recovery.
2046 */
2047void
2049{
2050 int i;
2051
2053 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
2054 {
2055 char *buf;
2057
2058 Assert(gxact->inredo);
2059
2061 gxact->prepare_start_lsn,
2062 gxact->ondisk, true, false);
2063 if (buf != NULL)
2064 pfree(buf);
2065 }
2067}
2068
2069/*
2070 * RecoverPreparedTransactions
2071 *
2072 * Scan the shared memory entries of TwoPhaseState and reload the state for
2073 * each prepared transaction (reacquire locks, etc).
2074 *
2075 * This is run at the end of recovery, but before we allow backends to write
2076 * WAL.
2077 *
2078 * At the end of recovery the way we take snapshots will change. We now need
2079 * to mark all running transactions with their full SubTransSetParent() info
2080 * to allow normal snapshots to work correctly if snapshots overflow.
2081 * We do this here because by definition prepared transactions are the only
2082 * type of write transaction still running, so this is necessary and
2083 * complete.
2084 */
2085void
2087{
2088 int i;
2089
2091 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
2092 {
2093 char *buf;
2095 FullTransactionId fxid = gxact->fxid;
2096 char *bufptr;
2097 TwoPhaseFileHeader *hdr;
2098 TransactionId *subxids;
2099 const char *gid;
2100
2101 /*
2102 * Reconstruct subtrans state for the transaction --- needed because
2103 * pg_subtrans is not preserved over a restart. Note that we are
2104 * linking all the subtransactions directly to the top-level XID;
2105 * there may originally have been a more complex hierarchy, but
2106 * there's no need to restore that exactly. It's possible that
2107 * SubTransSetParent has been set before, if the prepared transaction
2108 * generated xid assignment records.
2109 */
2111 gxact->prepare_start_lsn,
2112 gxact->ondisk, true, false);
2113 if (buf == NULL)
2114 continue;
2115
2116 ereport(LOG,
2117 (errmsg("recovering prepared transaction %u of epoch %u from shared memory",
2120
2121 hdr = (TwoPhaseFileHeader *) buf;
2124 bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
2125 gid = (const char *) bufptr;
2126 bufptr += MAXALIGN(hdr->gidlen);
2127 subxids = (TransactionId *) bufptr;
2128 bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
2129 bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileLocator));
2130 bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileLocator));
2131 bufptr += MAXALIGN(hdr->ncommitstats * sizeof(xl_xact_stats_item));
2132 bufptr += MAXALIGN(hdr->nabortstats * sizeof(xl_xact_stats_item));
2133 bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
2134
2135 /*
2136 * Recreate its GXACT and dummy PGPROC. But, check whether it was
2137 * added in redo and already has a shmem entry for it.
2138 */
2139 MarkAsPreparingGuts(gxact, gxact->fxid, gid,
2140 hdr->prepared_at,
2141 hdr->owner, hdr->database);
2142
2143 /* recovered, so reset the flag for entries generated by redo */
2144 gxact->inredo = false;
2145
2146 GXactLoadSubxactData(gxact, hdr->nsubxacts, subxids);
2147 MarkAsPrepared(gxact, true);
2148
2150
2151 /*
2152 * Recover other state (notably locks) using resource managers.
2153 */
2155
2156 /*
2157 * Release locks held by the standby process after we process each
2158 * prepared transaction. As a result, we don't need too many
2159 * additional locks at any one time.
2160 */
2161 if (InHotStandby)
2162 StandbyReleaseLockTree(hdr->xid, hdr->nsubxacts, subxids);
2163
2164 /*
2165 * We're done with recovering this transaction. Clear MyLockedGxact,
2166 * like we do in PrepareTransaction() during normal operation.
2167 */
2169
2170 pfree(buf);
2171
2173 }
2174
2176}
2177
2178/*
2179 * ProcessTwoPhaseBuffer
2180 *
2181 * Given a FullTransactionId, read it either from disk or read it directly
2182 * via shmem xlog record pointer using the provided "prepare_start_lsn".
2183 *
2184 * If setParent is true, set up subtransaction parent linkages.
2185 *
2186 * If setNextXid is true, set TransamVariables->nextXid to the newest
2187 * value scanned.
2188 */
2189static char *
2191 XLogRecPtr prepare_start_lsn,
2192 bool fromdisk,
2193 bool setParent, bool setNextXid)
2194{
2196 TransactionId *subxids;
2197 char *buf;
2198 TwoPhaseFileHeader *hdr;
2199 int i;
2200
2202
2203 if (!fromdisk)
2204 Assert(XLogRecPtrIsValid(prepare_start_lsn));
2205
2206 /* Already processed? */
2209 {
2210 if (fromdisk)
2211 {
2213 (errmsg("removing stale two-phase state file for transaction %u of epoch %u",
2216 RemoveTwoPhaseFile(fxid, true);
2217 }
2218 else
2219 {
2221 (errmsg("removing stale two-phase state from memory for transaction %u of epoch %u",
2224 PrepareRedoRemoveFull(fxid, true);
2225 }
2226 return NULL;
2227 }
2228
2229 /* Reject XID if too new */
2230 if (FullTransactionIdFollowsOrEquals(fxid, nextXid))
2231 {
2232 if (fromdisk)
2233 {
2235 (errmsg("removing future two-phase state file for transaction %u of epoch %u",
2238 RemoveTwoPhaseFile(fxid, true);
2239 }
2240 else
2241 {
2243 (errmsg("removing future two-phase state from memory for transaction %u of epoch %u",
2246 PrepareRedoRemoveFull(fxid, true);
2247 }
2248 return NULL;
2249 }
2250
2251 if (fromdisk)
2252 {
2253 /* Read and validate file */
2254 buf = ReadTwoPhaseFile(fxid, false);
2255 }
2256 else
2257 {
2258 /* Read xlog data */
2259 XlogReadTwoPhaseData(prepare_start_lsn, &buf, NULL);
2260 }
2261
2262 /* Deconstruct header */
2263 hdr = (TwoPhaseFileHeader *) buf;
2265 {
2266 if (fromdisk)
2267 ereport(ERROR,
2269 errmsg("corrupted two-phase state file for transaction %u of epoch %u",
2272 else
2273 ereport(ERROR,
2275 errmsg("corrupted two-phase state in memory for transaction %u of epoch %u",
2278 }
2279
2280 /*
2281 * Examine subtransaction XIDs ... they should all follow main XID, and
2282 * they may force us to advance nextXid.
2283 */
2284 subxids = (TransactionId *) (buf +
2285 MAXALIGN(sizeof(TwoPhaseFileHeader)) +
2286 MAXALIGN(hdr->gidlen));
2287 for (i = 0; i < hdr->nsubxacts; i++)
2288 {
2289 TransactionId subxid = subxids[i];
2290
2292
2293 /* update nextXid if needed */
2294 if (setNextXid)
2296
2297 if (setParent)
2299 }
2300
2301 return buf;
2302}
2303
2304
2305/*
2306 * RecordTransactionCommitPrepared
2307 *
2308 * This is basically the same as RecordTransactionCommit (q.v. if you change
2309 * this function): in particular, we must set DELAY_CHKPT_IN_COMMIT to avoid a
2310 * race condition.
2311 *
2312 * We know the transaction made at least one XLOG entry (its PREPARE),
2313 * so it is never possible to optimize out the commit record.
2314 */
2315static void
2317 int nchildren,
2318 TransactionId *children,
2319 int nrels,
2320 RelFileLocator *rels,
2321 int nstats,
2322 xl_xact_stats_item *stats,
2323 int ninvalmsgs,
2325 bool initfileinval,
2326 const char *gid)
2327{
2330 bool replorigin;
2331
2332 /*
2333 * Are we using the replication origins feature? Or, in other words, are
2334 * we replaying remote actions?
2335 */
2338
2339 /* Load the injection point before entering the critical section */
2340 INJECTION_POINT_LOAD("commit-after-delay-checkpoint");
2341
2343
2344 /* See notes in RecordTransactionCommit */
2347
2348 INJECTION_POINT_CACHED("commit-after-delay-checkpoint", NULL);
2349
2350 /*
2351 * Ensures the DELAY_CHKPT_IN_COMMIT flag write is globally visible before
2352 * commit time is written.
2353 */
2355
2356 /*
2357 * Note it is important to set committs value after marking ourselves as
2358 * in the commit critical section (DELAY_CHKPT_IN_COMMIT). This is because
2359 * we want to ensure all transactions that have acquired commit timestamp
2360 * are finished before we allow the logical replication client to advance
2361 * its xid which is used to hold back dead rows for conflict detection.
2362 * See comments atop worker.c.
2363 */
2365
2366 /*
2367 * Emit the XLOG commit record. Note that we mark 2PC commits as
2368 * potentially having AccessExclusiveLocks since we don't know whether or
2369 * not they do.
2370 */
2372 nchildren, children, nrels, rels,
2373 nstats, stats,
2374 ninvalmsgs, invalmsgs,
2375 initfileinval,
2377 xid, gid);
2378
2379
2380 if (replorigin)
2381 /* Move LSNs forward for this replication origin */
2384
2385 /*
2386 * Record commit timestamp. The value comes from plain commit timestamp
2387 * if replorigin is not enabled, or replorigin already set a value for us
2388 * in replorigin_xact_state.origin_timestamp otherwise.
2389 *
2390 * We don't need to WAL-log anything here, as the commit record written
2391 * above already contains the data.
2392 */
2395
2399
2400 /*
2401 * We don't currently try to sleep before flush here ... nor is there any
2402 * support for async commit of a prepared xact (the very idea is probably
2403 * a contradiction)
2404 */
2405
2406 /* Flush XLOG to disk */
2408
2409 /* Mark the transaction committed in pg_xact */
2410 TransactionIdCommitTree(xid, nchildren, children);
2411
2412 /* Checkpoint can proceed now */
2414
2416
2417 /*
2418 * Wait for synchronous replication, if required.
2419 *
2420 * Note that at this stage we have marked clog, but still show as running
2421 * in the procarray and continue to hold locks.
2422 */
2424}
2425
2426/*
2427 * RecordTransactionAbortPrepared
2428 *
2429 * This is basically the same as RecordTransactionAbort.
2430 *
2431 * We know the transaction made at least one XLOG entry (its PREPARE),
2432 * so it is never possible to optimize out the abort record.
2433 */
2434static void
2436 int nchildren,
2437 TransactionId *children,
2438 int nrels,
2439 RelFileLocator *rels,
2440 int nstats,
2441 xl_xact_stats_item *stats,
2442 const char *gid)
2443{
2445 bool replorigin;
2446
2447 /*
2448 * Are we using the replication origins feature? Or, in other words, are
2449 * we replaying remote actions?
2450 */
2453
2454 /*
2455 * Catch the scenario where we aborted partway through
2456 * RecordTransactionCommitPrepared ...
2457 */
2458 if (TransactionIdDidCommit(xid))
2459 elog(PANIC, "cannot abort transaction %u, it was already committed",
2460 xid);
2461
2463
2464 /*
2465 * Emit the XLOG commit record. Note that we mark 2PC aborts as
2466 * potentially having AccessExclusiveLocks since we don't know whether or
2467 * not they do.
2468 */
2470 nchildren, children,
2471 nrels, rels,
2472 nstats, stats,
2474 xid, gid);
2475
2476 if (replorigin)
2477 /* Move LSNs forward for this replication origin */
2480
2481 /* Always flush, since we're about to remove the 2PC state file */
2483
2484 /*
2485 * Mark the transaction aborted in clog. This is not absolutely necessary
2486 * but we may as well do it while we are here.
2487 */
2488 TransactionIdAbortTree(xid, nchildren, children);
2489
2491
2492 /*
2493 * Wait for synchronous replication, if required.
2494 *
2495 * Note that at this stage we have marked clog, but still show as running
2496 * in the procarray and continue to hold locks.
2497 */
2498 SyncRepWaitForLSN(recptr, false);
2499}
2500
2501/*
2502 * PrepareRedoAdd
2503 *
2504 * Store pointers to the start/end of the WAL record along with the xid in
2505 * a gxact entry in shared memory TwoPhaseState structure. If caller
2506 * specifies InvalidXLogRecPtr as WAL location to fetch the two-phase
2507 * data, the entry is marked as located on disk.
2508 */
2509void
2511 XLogRecPtr start_lsn, XLogRecPtr end_lsn,
2512 ReplOriginId origin_id)
2513{
2515 char *bufptr;
2516 const char *gid;
2518
2521
2522 if (!FullTransactionIdIsValid(fxid))
2523 {
2526 hdr->xid);
2527 }
2528
2529 bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
2530 gid = (const char *) bufptr;
2531
2532 /*
2533 * Reserve the GID for the given transaction in the redo code path.
2534 *
2535 * This creates a gxact struct and puts it into the active array.
2536 *
2537 * In redo, this struct is mainly used to track PREPARE/COMMIT entries in
2538 * shared memory. Hence, we only fill up the bare minimum contents here.
2539 * The gxact also gets marked with gxact->inredo set to true to indicate
2540 * that it got added in the redo phase
2541 */
2542
2543 /*
2544 * In the event of a crash while a checkpoint was running, it may be
2545 * possible that some two-phase data found its way to disk while its
2546 * corresponding record needs to be replayed in the follow-up recovery. As
2547 * the 2PC data was on disk, it has already been restored at the beginning
2548 * of recovery with restoreTwoPhaseData(), so skip this record to avoid
2549 * duplicates in TwoPhaseState. If a consistent state has been reached,
2550 * the record is added to TwoPhaseState and it should have no
2551 * corresponding file in pg_twophase.
2552 */
2553 if (XLogRecPtrIsValid(start_lsn))
2554 {
2555 char path[MAXPGPATH];
2556
2558 TwoPhaseFilePath(path, fxid);
2559
2560 if (access(path, F_OK) == 0)
2561 {
2563 (errmsg("could not recover two-phase state file for transaction %u",
2564 hdr->xid),
2565 errdetail("Two-phase state file has been found in WAL record %X/%08X, but this transaction has already been restored from disk.",
2566 LSN_FORMAT_ARGS(start_lsn))));
2567 return;
2568 }
2569
2570 if (errno != ENOENT)
2571 ereport(ERROR,
2573 errmsg("could not access file \"%s\": %m", path)));
2574 }
2575
2576 /* Get a free gxact from the freelist */
2578 ereport(ERROR,
2580 errmsg("maximum number of prepared transactions reached"),
2581 errhint("Increase \"max_prepared_transactions\" (currently %d).",
2585
2587 gxact->prepare_start_lsn = start_lsn;
2588 gxact->prepare_end_lsn = end_lsn;
2589 gxact->fxid = fxid;
2590 gxact->owner = hdr->owner;
2591 gxact->locking_backend = INVALID_PROC_NUMBER;
2592 gxact->valid = false;
2593 gxact->ondisk = !XLogRecPtrIsValid(start_lsn);
2594 gxact->inredo = true; /* yes, added in redo */
2595 strcpy(gxact->gid, gid);
2596
2597 /* And insert it into the active array */
2600
2601 if (origin_id != InvalidReplOriginId)
2602 {
2603 /* recover apply progress */
2604 replorigin_advance(origin_id, hdr->origin_lsn, end_lsn,
2605 false /* backward */ , false /* WAL */ );
2606 }
2607
2608 elog(DEBUG2, "added 2PC data in shared memory for transaction %u of epoch %u",
2611}
2612
2613/*
2614 * PrepareRedoRemoveFull
2615 *
2616 * Remove the corresponding gxact entry from TwoPhaseState. Also remove
2617 * the 2PC file if a prepared transaction was saved via an earlier checkpoint.
2618 *
2619 * Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState
2620 * is updated.
2621 */
2622static void
2624{
2626 int i;
2627 bool found = false;
2628
2631
2632 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
2633 {
2635
2636 if (FullTransactionIdEquals(gxact->fxid, fxid))
2637 {
2638 Assert(gxact->inredo);
2639 found = true;
2640 break;
2641 }
2642 }
2643
2644 /*
2645 * Just leave if there is nothing, this is expected during WAL replay.
2646 */
2647 if (!found)
2648 return;
2649
2650 /*
2651 * And now we can clean up any files we may have left.
2652 */
2653 elog(DEBUG2, "removing 2PC data for transaction %u of epoch %u ",
2656
2657 if (gxact->ondisk)
2659
2661}
2662
2663/*
2664 * Wrapper of PrepareRedoRemoveFull(), for TransactionIds.
2665 */
2666void
2674
2675/*
2676 * LookupGXact
2677 * Check if the prepared transaction with the given GID, lsn and timestamp
2678 * exists.
2679 *
2680 * Note that we always compare with the LSN where prepare ends because that is
2681 * what is stored as origin_lsn in the 2PC file.
2682 *
2683 * This function is primarily used to check if the prepared transaction
2684 * received from the upstream (remote node) already exists. Checking only GID
2685 * is not sufficient because a different prepared xact with the same GID can
2686 * exist on the same node. So, we are ensuring to match origin_lsn and
2687 * origin_timestamp of prepared xact to avoid the possibility of a match of
2688 * prepared xact from two different nodes.
2689 */
2690bool
2691LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn,
2693{
2694 int i;
2695 bool found = false;
2696
2698 for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
2699 {
2701
2702 /* Ignore not-yet-valid GIDs. */
2703 if (gxact->valid && strcmp(gxact->gid, gid) == 0)
2704 {
2705 char *buf;
2706 TwoPhaseFileHeader *hdr;
2707
2708 /*
2709 * We are not expecting collisions of GXACTs (same gid) between
2710 * publisher and subscribers, so we perform all I/O while holding
2711 * TwoPhaseStateLock for simplicity.
2712 *
2713 * To move the I/O out of the lock, we need to ensure that no
2714 * other backend commits the prepared xact in the meantime. We can
2715 * do this optimization if we encounter many collisions in GID
2716 * between publisher and subscriber.
2717 */
2718 if (gxact->ondisk)
2719 buf = ReadTwoPhaseFile(gxact->fxid, false);
2720 else
2721 {
2722 Assert(gxact->prepare_start_lsn);
2723 XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
2724 }
2725
2726 hdr = (TwoPhaseFileHeader *) buf;
2727
2728 if (hdr->origin_lsn == prepare_end_lsn &&
2730 {
2731 found = true;
2732 pfree(buf);
2733 break;
2734 }
2735
2736 pfree(buf);
2737 }
2738 }
2740 return found;
2741}
2742
2743/*
2744 * TwoPhaseTransactionGid
2745 * Form the prepared transaction GID for two_phase transactions.
2746 *
2747 * Return the GID in the supplied buffer.
2748 */
2749void
2751{
2752 Assert(OidIsValid(subid));
2753
2754 if (!TransactionIdIsValid(xid))
2755 ereport(ERROR,
2757 errmsg_internal("invalid two-phase transaction ID")));
2758
2759 snprintf(gid_res, szgid, "pg_gid_%u_%u", subid, xid);
2760}
2761
2762/*
2763 * IsTwoPhaseTransactionGidForSubid
2764 * Check whether the given GID (as formed by TwoPhaseTransactionGid) is
2765 * for the specified 'subid'.
2766 */
2767static bool
2769{
2770 int ret;
2773 char gid_tmp[GIDSIZE];
2774
2775 /* Extract the subid and xid from the given GID */
2776 ret = sscanf(gid, "pg_gid_%u_%u", &subid_from_gid, &xid_from_gid);
2777
2778 /*
2779 * Check that the given GID has expected format, and at least the subid
2780 * matches.
2781 */
2782 if (ret != 2 || subid != subid_from_gid)
2783 return false;
2784
2785 /*
2786 * Reconstruct a temporary GID based on the subid and xid extracted from
2787 * the given GID and check whether the temporary GID and the given GID
2788 * match.
2789 */
2791
2792 return strcmp(gid, gid_tmp) == 0;
2793}
2794
2795/*
2796 * LookupGXactBySubid
2797 * Check if the prepared transaction done by apply worker exists.
2798 */
2799bool
2801{
2802 bool found = false;
2803
2805 for (int i = 0; i < TwoPhaseState->numPrepXacts; i++)
2806 {
2808
2809 /* Ignore not-yet-valid GIDs. */
2810 if (gxact->valid &&
2812 {
2813 found = true;
2814 break;
2815 }
2816 }
2818
2819 return found;
2820}
2821
2822/*
2823 * TwoPhaseGetOldestXidInCommit
2824 * Return the oldest transaction ID from prepared transactions that are
2825 * currently in the commit critical section.
2826 *
2827 * This function only considers transactions in the currently connected
2828 * database. If no matching transactions are found, it returns
2829 * InvalidTransactionId.
2830 */
2833{
2834 TransactionId oldestRunningXid = InvalidTransactionId;
2835
2837
2838 for (int i = 0; i < TwoPhaseState->numPrepXacts; i++)
2839 {
2842 TransactionId xid;
2843
2844 if (!gxact->valid)
2845 continue;
2846
2847 if (gxact->locking_backend == INVALID_PROC_NUMBER)
2848 continue;
2849
2850 /*
2851 * Get the backend that is handling the transaction. It's safe to
2852 * access this backend while holding TwoPhaseStateLock, as the backend
2853 * can only be destroyed after either removing or unlocking the
2854 * current global transaction, both of which require an exclusive
2855 * TwoPhaseStateLock.
2856 */
2857 commitproc = GetPGProcByNumber(gxact->locking_backend);
2858
2859 if (MyDatabaseId != commitproc->databaseId)
2860 continue;
2861
2862 if ((commitproc->delayChkptFlags & DELAY_CHKPT_IN_COMMIT) == 0)
2863 continue;
2864
2865 xid = XidFromFullTransactionId(gxact->fxid);
2866
2867 if (!TransactionIdIsValid(oldestRunningXid) ||
2868 TransactionIdPrecedes(xid, oldestRunningXid))
2869 oldestRunningXid = xid;
2870 }
2871
2873
2874 return oldestRunningXid;
2875}
#define pg_write_barrier()
Definition atomics.h:155
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:453
int16 AttrNumber
Definition attnum.h:21
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1636
static Datum values[MAXATTR]
Definition bootstrap.c:188
#define CStringGetTextDatum(s)
Definition builtins.h:98
#define MAXALIGN(LEN)
Definition c.h:880
#define Max(x, y)
Definition c.h:1069
#define Assert(condition)
Definition c.h:927
#define PG_BINARY
Definition c.h:1358
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:534
uint16_t uint16
Definition c.h:599
uint32_t uint32
Definition c.h:600
#define MemSet(start, val, len)
Definition c.h:1091
uint32 TransactionId
Definition c.h:720
#define OidIsValid(objectId)
Definition c.h:842
size_t Size
Definition c.h:673
void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz timestamp, ReplOriginId nodeid)
Definition commit_ts.c:140
int64 TimestampTz
Definition timestamp.h:39
Datum arg
Definition elog.c:1322
int errcode_for_file_access(void)
Definition elog.c:897
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:31
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:36
int int int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:29
#define PANIC
Definition elog.h:42
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
int FreeDir(DIR *dir)
Definition fd.c:3009
int CloseTransientFile(int fd)
Definition fd.c:2855
void fsync_fname(const char *fname, bool isdir)
Definition fd.c:757
DIR * AllocateDir(const char *dirname)
Definition fd.c:2891
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition fd.c:2957
int pg_fsync(int fd)
Definition fd.c:390
int OpenTransientFile(const char *fileName, int fileFlags)
Definition fd.c:2678
#define ERRCODE_PROTOCOL_VIOLATION
Definition fe-connect.c:96
#define palloc_object(type)
Definition fe_memutils.h:74
#define MaxAllocSize
Definition fe_memutils.h:22
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define SRF_IS_FIRSTCALL()
Definition funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition funcapi.h:308
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition funcapi.h:328
ProcNumber MyProcNumber
Definition globals.c:90
bool IsUnderPostmaster
Definition globals.c:120
bool IsPostmasterEnvironment
Definition globals.c:119
Oid MyDatabaseId
Definition globals.c:94
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1036
static void dlist_init(dlist_head *head)
Definition ilist.h:314
static void dlist_node_init(dlist_node *node)
Definition ilist.h:325
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
#define write(a, b, c)
Definition win32.h:14
#define read(a, b, c)
Definition win32.h:13
int xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition inval.c:1012
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition ipc.c:344
int i
Definition isn.c:77
#define VirtualTransactionIdIsValid(vxid)
Definition lock.h:69
#define GET_VXID_FROM_PGPROC(vxid_dst, proc)
Definition lock.h:79
#define LocalTransactionIdIsValid(lxid)
Definition lock.h:68
#define VirtualTransactionIdEquals(vxid1, vxid2)
Definition lock.h:73
bool LWLockHeldByMe(LWLock *lock)
Definition lwlock.c:1912
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1177
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1956
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1794
@ LW_WS_NOT_WAITING
Definition lwlock.h:30
#define NUM_LOCK_PARTITIONS
Definition lwlock.h:95
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
Definition md.c:1612
#define RESUME_INTERRUPTS()
Definition miscadmin.h:136
#define AmStartupProcess()
Definition miscadmin.h:390
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define HOLD_INTERRUPTS()
Definition miscadmin.h:134
@ B_INVALID
Definition miscadmin.h:339
#define END_CRIT_SECTION()
Definition miscadmin.h:152
Oid GetUserId(void)
Definition miscinit.c:470
static char * errmsg
ReplOriginXactState replorigin_xact_state
Definition origin.c:167
void replorigin_advance(ReplOriginId node, XLogRecPtr remote_commit, XLogRecPtr local_commit, bool go_backward, bool wal_log)
Definition origin.c:919
void replorigin_session_advance(XLogRecPtr remote_commit, XLogRecPtr local_commit)
Definition origin.c:1311
#define DoNotReplicateId
Definition origin.h:34
#define InvalidReplOriginId
Definition origin.h:33
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define ERRCODE_DATA_CORRUPTED
#define MAXPGPATH
uint32 pg_crc32c
Definition pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition pg_crc32c.h:153
#define EQ_CRC32C(c1, c2)
Definition pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition pg_crc32c.h:158
const void size_t len
const void * data
static char * user
Definition pg_regress.c:119
static char buf[DEFAULT_XLOG_SEG_SIZE]
void pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_item *items, bool is_redo)
void AtEOXact_PgStat(bool isCommit, bool parallel)
Definition pgstat_xact.c:40
int pgstat_get_transactional_drops(bool isCommit, xl_xact_stats_item **items)
#define snprintf
Definition port.h:260
static Datum TransactionIdGetDatum(TransactionId X)
Definition postgres.h:292
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:252
uint64_t Datum
Definition postgres.h:70
#define InvalidOid
unsigned int Oid
void PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit)
Definition predicate.c:4891
static int fd(const char *x, int i)
static int fb(int x)
short access
#define DELAY_CHKPT_IN_COMMIT
Definition proc.h:138
#define GetPGProcByNumber(n)
Definition proc.h:501
#define PGPROC_MAX_CACHED_SUBXIDS
Definition proc.h:40
#define GetNumberFromPGProc(proc)
Definition proc.h:502
#define DELAY_CHKPT_START
Definition proc.h:136
@ PROC_WAIT_STATUS_OK
Definition proc.h:142
void ProcArrayAdd(PGPROC *proc)
Definition procarray.c:472
void ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
Definition procarray.c:569
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
int ProcNumber
Definition procnumber.h:24
void RelationCacheInitFilePostInvalidate(void)
Definition relcache.c:6900
void RelationCacheInitFilePreInvalidate(void)
Definition relcache.c:6875
Size add_size(Size s1, Size s2)
Definition shmem.c:485
Size mul_size(Size s1, Size s2)
Definition shmem.c:500
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:381
void SendSharedInvalidMessages(const SharedInvalidationMessage *msgs, int n)
Definition sinval.c:47
PGPROC * MyProc
Definition proc.c:68
PGPROC * PreparedXactProcs
Definition proc.c:73
void StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
Definition standby.c:1093
int smgrGetPendingDeletes(bool forCommit, RelFileLocator **ptr)
Definition storage.c:893
#define ERRCODE_DUPLICATE_OBJECT
Definition streamutil.c:30
Definition dirent.c:26
TimestampTz prepared_at
Definition twophase.c:153
XLogRecPtr prepare_start_lsn
Definition twophase.c:162
XLogRecPtr prepare_end_lsn
Definition twophase.c:163
GlobalTransaction next
Definition twophase.c:151
FullTransactionId fxid
Definition twophase.c:164
ProcNumber locking_backend
Definition twophase.c:167
char gid[GIDSIZE]
Definition twophase.c:171
Definition proc.h:176
TransactionId xmin
Definition proc.h:239
LocalTransactionId lxid
Definition proc.h:228
PROCLOCK * waitProcLock
Definition proc.h:303
uint8 lwWaitMode
Definition proc.h:281
BackendType backendType
Definition proc.h:195
uint8 statusFlags
Definition proc.h:207
Oid databaseId
Definition proc.h:198
struct PGPROC::@133 vxid
pg_atomic_uint64 waitStart
Definition proc.h:308
ProcNumber procNumber
Definition proc.h:223
int pid
Definition proc.h:194
XidCacheStatus subxidStatus
Definition proc.h:244
LOCK * waitLock
Definition proc.h:301
TransactionId xid
Definition proc.h:234
struct XidCache subxids
Definition proc.h:246
int delayChkptFlags
Definition proc.h:257
dlist_node waitLink
Definition proc.h:302
dlist_head myProcLocks[NUM_LOCK_PARTITIONS]
Definition proc.h:318
Oid roleId
Definition proc.h:199
ProcWaitStatus waitStatus
Definition proc.h:311
Oid tempNamespaceId
Definition proc.h:201
uint8 lwWaiting
Definition proc.h:280
ReplOriginId origin
Definition origin.h:45
XLogRecPtr origin_lsn
Definition origin.h:46
TimestampTz origin_timestamp
Definition origin.h:47
struct StateFileChunk * next
Definition twophase.c:1005
FullTransactionId nextXid
Definition transam.h:220
TwoPhaseRmgrId rmid
Definition twophase.c:992
GlobalTransaction freeGXacts
Definition twophase.c:181
GlobalTransaction prepXacts[FLEXIBLE_ARRAY_MEMBER]
Definition twophase.c:187
GlobalTransaction array
Definition twophase.c:703
bool overflowed
Definition proc.h:47
uint8 count
Definition proc.h:45
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]
Definition proc.h:52
__int64 st_size
Definition win32_port.h:263
TimestampTz prepared_at
Definition xact.h:359
int32 nabortrels
Definition xact.h:363
int32 ninvalmsgs
Definition xact.h:366
bool initfileinval
Definition xact.h:367
int32 ncommitstats
Definition xact.h:364
TimestampTz origin_timestamp
Definition xact.h:370
uint16 gidlen
Definition xact.h:368
uint32 total_len
Definition xact.h:356
int32 nabortstats
Definition xact.h:365
XLogRecPtr origin_lsn
Definition xact.h:369
uint32 magic
Definition xact.h:355
int32 ncommitrels
Definition xact.h:362
TransactionId xid
Definition xact.h:357
int32 nsubxacts
Definition xact.h:361
uint32 total_len
Definition twophase.c:1014
uint32 num_chunks
Definition twophase.c:1012
StateFileChunk * head
Definition twophase.c:1010
StateFileChunk * tail
Definition twophase.c:1011
uint32 bytes_free
Definition twophase.c:1013
void SubTransSetParent(TransactionId xid, TransactionId parent)
Definition subtrans.c:85
bool superuser_arg(Oid roleid)
Definition superuser.c:57
void SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
Definition syncrep.c:149
TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids)
Definition transam.c:281
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids)
Definition transam.c:240
void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids)
Definition transam.c:270
bool TransactionIdDidAbort(TransactionId transactionId)
Definition transam.c:188
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define FullTransactionIdEquals(a, b)
Definition transam.h:50
#define InvalidTransactionId
Definition transam.h:31
static FullTransactionId FullTransactionIdFromAllowableAt(FullTransactionId nextFullXid, TransactionId xid)
Definition transam.h:443
#define EpochFromFullTransactionId(x)
Definition transam.h:47
static FullTransactionId FullTransactionIdFromU64(uint64 value)
Definition transam.h:81
#define FullTransactionIdFollowsOrEquals(a, b)
Definition transam.h:54
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define FullTransactionIdIsValid(x)
Definition transam.h:55
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
TupleDesc CreateTemplateTupleDesc(int natts)
Definition tupdesc.c:165
void TupleDescFinalize(TupleDesc tupdesc)
Definition tupdesc.c:508
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition tupdesc.c:887
static char * ReadTwoPhaseFile(FullTransactionId fxid, bool missing_ok)
Definition twophase.c:1298
static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
Definition twophase.c:1415
ProcNumber TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
Definition twophase.c:911
TransactionId TwoPhaseGetOldestXidInCommit(void)
Definition twophase.c:2832
static void ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[])
Definition twophase.c:1695
void TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid_res, int szgid)
Definition twophase.c:2750
void RecoverPreparedTransactions(void)
Definition twophase.c:2086
static bool twophaseExitRegistered
Definition twophase.c:200
void restoreTwoPhaseData(void)
Definition twophase.c:1907
static GlobalTransaction TwoPhaseGetGXact(FullTransactionId fxid, bool lock_held)
Definition twophase.c:806
bool LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, TimestampTz origin_prepare_timestamp)
Definition twophase.c:2691
Size TwoPhaseShmemSize(void)
Definition twophase.c:240
#define TWOPHASE_DIR
Definition twophase.c:114
GlobalTransaction MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid)
Definition twophase.c:362
static void RecordTransactionAbortPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, RelFileLocator *rels, int nstats, xl_xact_stats_item *stats, const char *gid)
Definition twophase.c:2435
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition twophase.c:1274
int max_prepared_xacts
Definition twophase.c:117
static FullTransactionId AdjustToFullTransactionId(TransactionId xid)
Definition twophase.c:946
static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, int nrels, RelFileLocator *rels, int nstats, xl_xact_stats_item *stats, int ninvalmsgs, SharedInvalidationMessage *invalmsgs, bool initfileinval, const char *gid)
Definition twophase.c:2316
static void RemoveGXact(GlobalTransaction gxact)
Definition twophase.c:633
void PrepareRedoAdd(FullTransactionId fxid, char *buf, XLogRecPtr start_lsn, XLogRecPtr end_lsn, ReplOriginId origin_id)
Definition twophase.c:2510
static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning)
Definition twophase.c:1726
static GlobalTransaction MyLockedGxact
Definition twophase.c:198
static TwoPhaseStateData * TwoPhaseState
Definition twophase.c:190
void AtAbort_Twophase(void)
Definition twophase.c:307
static void save_state_data(const void *data, uint32 len)
Definition twophase.c:1027
#define TWOPHASE_MAGIC
Definition twophase.c:979
void FinishPreparedTransaction(const char *gid, bool isCommit)
Definition twophase.c:1500
TransactionId TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, bool *have_more)
Definition twophase.c:859
static char * ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid)
Definition twophase.c:2190
static void GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts, TransactionId *children)
Definition twophase.c:509
void PrepareRedoRemove(TransactionId xid, bool giveWarning)
Definition twophase.c:2667
Datum pg_prepared_xact(PG_FUNCTION_ARGS)
Definition twophase.c:716
void EndPrepare(GlobalTransaction gxact)
Definition twophase.c:1148
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition twophase.c:1969
void StartPrepare(GlobalTransaction gxact)
Definition twophase.c:1055
static int GetPreparedTransactionList(GlobalTransaction *gxacts)
Definition twophase.c:671
void TwoPhaseShmemInit(void)
Definition twophase.c:256
void StandbyRecoverPreparedTransactions(void)
Definition twophase.c:2048
static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len)
Definition twophase.c:1745
static void AtProcExit_Twophase(int code, Datum arg)
Definition twophase.c:297
static void PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning)
Definition twophase.c:2623
static int TwoPhaseFilePath(char *path, FullTransactionId fxid)
Definition twophase.c:953
static void MarkAsPrepared(GlobalTransaction gxact, bool lock_held)
Definition twophase.c:535
void PostPrepare_Twophase(void)
Definition twophase.c:347
bool LookupGXactBySubid(Oid subid)
Definition twophase.c:2800
PGPROC * TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held)
Definition twophase.c:926
xl_xact_prepare TwoPhaseFileHeader
Definition twophase.c:981
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition twophase.c:1825
bool StandbyTransactionIdIsPrepared(TransactionId xid)
Definition twophase.c:1470
static GlobalTransaction LockGXact(const char *gid, Oid user)
Definition twophase.c:557
static void MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid)
Definition twophase.c:436
static bool IsTwoPhaseTransactionGidForSubid(Oid subid, char *gid)
Definition twophase.c:2768
static struct xllist records
struct GlobalTransactionData * GlobalTransaction
Definition twophase.h:26
const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID+1]
const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID+1]
const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID+1]
void(* TwoPhaseCallback)(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
#define TWOPHASE_RM_MAX_ID
uint8 TwoPhaseRmgrId
#define TWOPHASE_RM_END_ID
static Datum TimestampTzGetDatum(TimestampTz X)
Definition timestamp.h:52
FullTransactionId ReadNextFullTransactionId(void)
Definition varsup.c:288
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition varsup.c:304
TransamVariablesData * TransamVariables
Definition varsup.c:34
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition wait_event.h:69
static void pgstat_report_wait_end(void)
Definition wait_event.h:85
#define fstat
Definition win32_port.h:73
XLogRecPtr XactLogCommitRecord(TimestampTz commit_time, int nsubxacts, TransactionId *subxacts, int nrels, RelFileLocator *rels, int ndroppedstats, xl_xact_stats_item *droppedstats, int nmsgs, SharedInvalidationMessage *msgs, bool relcacheInval, int xactflags, TransactionId twophase_xid, const char *twophase_gid)
Definition xact.c:5837
int xactGetCommittedChildren(TransactionId **ptr)
Definition xact.c:5813
int MyXactFlags
Definition xact.c:138
XLogRecPtr XactLogAbortRecord(TimestampTz abort_time, int nsubxacts, TransactionId *subxacts, int nrels, RelFileLocator *rels, int ndroppedstats, xl_xact_stats_item *droppedstats, int xactflags, TransactionId twophase_xid, const char *twophase_gid)
Definition xact.c:6009
#define XLOG_XACT_PREPARE
Definition xact.h:171
#define XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK
Definition xact.h:109
#define XLOG_XACT_OPMASK
Definition xact.h:180
#define GIDSIZE
Definition xact.h:31
XLogRecPtr ProcLastRecPtr
Definition xlog.c:257
bool RecoveryInProgress(void)
Definition xlog.c:6444
XLogRecPtr XactLastRecEnd
Definition xlog.c:258
int wal_segment_size
Definition xlog.c:147
bool log_checkpoints
Definition xlog.c:133
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2767
#define XLOG_INCLUDE_ORIGIN
Definition xlog.h:165
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint16 ReplOriginId
Definition xlogdefs.h:69
uint64 XLogRecPtr
Definition xlogdefs.h:21
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:479
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:369
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:461
void XLogBeginInsert(void)
Definition xloginsert.c:153
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition xloginsert.c:179
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition xlogreader.c:108
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition xlogreader.c:391
void XLogReaderFree(XLogReaderState *state)
Definition xlogreader.c:163
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition xlogreader.c:233
#define XLogRecGetDataLen(decoder)
Definition xlogreader.h:415
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:409
#define XLogRecGetRmid(decoder)
Definition xlogreader.h:410
#define XLogRecGetData(decoder)
Definition xlogreader.h:414
#define XL_ROUTINE(...)
Definition xlogreader.h:117
bool reachedConsistency
static XLogReaderState * xlogreader
void wal_segment_close(XLogReaderState *state)
Definition xlogutils.c:831
void wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p)
Definition xlogutils.c:806
bool InRecovery
Definition xlogutils.c:50
int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
Definition xlogutils.c:845
#define InHotStandby
Definition xlogutils.h:60