PostgreSQL Source Code git master
Loading...
Searching...
No Matches
multixact.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * multixact.c
4 * PostgreSQL multi-transaction-log manager
5 *
6 * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 * TransactionId and a set of flag bits. The name is a bit historical:
10 * originally, a MultiXactId consisted of more than one TransactionId (except
11 * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 * legitimate to have MultiXactIds that only include a single Xid.
13 *
14 * The meaning of the flag bits is opaque to this module, but they are mostly
15 * used in heapam.c to identify lock modes that each of the member transactions
16 * is holding on any given tuple. This module just contains support to store
17 * and retrieve the arrays.
18 *
19 * We use two SLRU areas, one for storing the offsets at which the data
20 * starts for each MultiXactId in the other one. This trick allows us to
21 * store variable length arrays of TransactionIds. (We could alternatively
22 * use one area containing counts and TransactionIds, with valid MultiXactId
23 * values pointing at slots containing counts; but that way seems less robust
24 * since it would get completely confused if someone inquired about a bogus
25 * MultiXactId that pointed to an intermediate slot containing an XID.)
26 *
27 * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 * MEMBERs page is initialized to zeroes, as well as an
29 * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 * This module ignores the WAL rule "write xlog before data," because it
31 * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 * rule. The only way for the MXID to be referenced from any data page is for
33 * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 * an XLOG record that must follow ours. The normal LSN interlock between the
35 * data page and that XLOG record will ensure that our XLOG record reaches
36 * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 * module's XLOG records completely rebuild the data entered since the last
40 * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 * before each checkpoint is considered complete.
42 *
43 * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 * crashes and ensure that MXID and offset numbering increases monotonically
45 * across a crash. We do this in the same way as it's done for transaction
46 * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 * could need to worry about, and we just make sure that at the end of
48 * replay, the next-MXID and next-offset counters are at least as large as
49 * anything we saw during replay.
50 *
51 * We are able to remove segments no longer necessary by carefully tracking
52 * each table's used values: during vacuum, any multixact older than a certain
53 * value is removed; the cutoff value is stored in pg_class. The minimum value
54 * across all tables in each database is stored in pg_database, and the global
55 * minimum across all databases is part of pg_control and is kept in shared
56 * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 *
58 * When new multixactid values are to be created, care is taken that the
59 * counter does not fall within the wraparound horizon considering the global
60 * minimum value.
61 *
62 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
63 * Portions Copyright (c) 1994, Regents of the University of California
64 *
65 * src/backend/access/transam/multixact.c
66 *
67 *-------------------------------------------------------------------------
68 */
69#include "postgres.h"
70
71#include "access/multixact.h"
73#include "access/slru.h"
74#include "access/twophase.h"
76#include "access/xlog.h"
77#include "access/xloginsert.h"
78#include "access/xlogutils.h"
79#include "miscadmin.h"
80#include "pg_trace.h"
81#include "pgstat.h"
83#include "storage/pmsignal.h"
84#include "storage/proc.h"
85#include "storage/procarray.h"
86#include "storage/subsystems.h"
87#include "utils/guc_hooks.h"
89#include "utils/lsyscache.h"
90#include "utils/memutils.h"
91
92
93/*
94 * Thresholds used to keep members disk usage in check when multixids have a
95 * lot of members. When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
96 * starts freezing multixids more aggressively, even if the normal multixid
97 * age limits haven't been reached yet.
98 */
99#define MULTIXACT_MEMBER_LOW_THRESHOLD UINT64CONST(2000000000)
100#define MULTIXACT_MEMBER_HIGH_THRESHOLD UINT64CONST(4000000000)
101
102static inline MultiXactId
104{
105 return multi == MaxMultiXactId ? FirstMultiXactId : multi + 1;
106}
107
108static inline MultiXactId
110{
111 return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
112}
113
114/*
115 * Links to shared-memory data structures for MultiXact control
116 */
118static int MultiXactOffsetIoErrorDetail(const void *opaque_data);
120static int MultiXactMemberIoErrorDetail(const void *opaque_data);
121
124
125#define MultiXactOffsetCtl (&MultiXactOffsetSlruDesc)
126#define MultiXactMemberCtl (&MultiXactMemberSlruDesc)
127
128/*
129 * MultiXact state shared across all backends. All this state is protected
130 * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
131 * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
132 * concurrency's sake, we avoid holding more than one of these locks at a
133 * time.)
134 */
135typedef struct MultiXactStateData
136{
137 /* next-to-be-assigned MultiXactId */
139
140 /* next-to-be-assigned offset */
142
143 /* Have we completed multixact startup? */
145
146 /*
147 * Oldest multixact that is still potentially referenced by a relation.
148 * Anything older than this should not be consulted. These values are
149 * updated by vacuum.
150 */
153
154 /*
155 * Oldest multixact offset that is potentially referenced by a multixact
156 * referenced by a relation.
157 */
159
160 /* support for anti-wraparound measures */
165
166 /*
167 * Per-backend data starts here. We have two arrays stored in the area
168 * immediately following the MultiXactStateData struct:
169 *
170 * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
171 * transaction(s) could possibly be a member of, or InvalidMultiXactId
172 * when the backend has no live transaction that could possibly be a
173 * member of a MultiXact. Each backend sets its entry to the current
174 * nextMXact counter just before first acquiring a shared lock in a given
175 * transaction, and clears it at transaction end. (This works because only
176 * during or after acquiring a shared lock could an XID possibly become a
177 * member of a MultiXact, and that MultiXact would have to be created
178 * during or after the lock acquisition.)
179 *
180 * In the OldestMemberMXactId array, there's a slot for all normal
181 * backends (0..MaxBackends-1) followed by a slot for max_prepared_xacts
182 * prepared transactions.
183 *
184 * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
185 * current transaction(s) think is potentially live, or InvalidMultiXactId
186 * when not in a transaction or not in a transaction that's paid any
187 * attention to MultiXacts yet. This is computed when first needed in a
188 * given transaction, and cleared at transaction end. We can compute it
189 * as the minimum of the valid OldestMemberMXactId[] entries at the time
190 * we compute it (using nextMXact if none are valid). Each backend is
191 * required not to attempt to access any SLRU data for MultiXactIds older
192 * than its own OldestVisibleMXactId[] setting; this is necessary because
193 * the relevant SLRU data can be concurrently truncated away.
194 *
195 * In the OldestVisibleMXactId array, there's a slot for all normal
196 * backends (0..MaxBackends-1) only. No slots for prepared transactions.
197 *
198 * The oldest valid value among all of the OldestMemberMXactId[] and
199 * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
200 * possible value still having any live member transaction -- OldestMxact.
201 * Any value older than that is typically removed from tuple headers, or
202 * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
203 * remove an individual MultiXact xmax whose value is >= its OldestMxact
204 * cutoff, though typically only when no individual member XID is still
205 * running. See FreezeMultiXactId for full details.
206 *
207 * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
208 * or the oldest extant Multi remaining in the table is used as the new
209 * pg_class.relminmxid value (whichever is earlier). The minimum of all
210 * relminmxid values in each database is stored in pg_database.datminmxid.
211 * In turn, the minimum of all of those values is stored in pg_control.
212 * This is used as the truncation point for pg_multixact when unneeded
213 * segments get removed by vac_truncate_clog() during vacuuming.
214 */
217
218/*
219 * Sizes of OldestMemberMXactId and OldestVisibleMXactId arrays.
220 */
221#define NumMemberSlots (MaxBackends + max_prepared_xacts)
222#define NumVisibleSlots MaxBackends
223
224/* Pointers to the state data in shared memory */
228
229static void MultiXactShmemRequest(void *arg);
230static void MultiXactShmemInit(void *arg);
231static void MultiXactShmemAttach(void *arg);
232
238
239static inline MultiXactId *
241{
242 /*
243 * The first MaxBackends entries in the OldestMemberMXactId array are
244 * reserved for regular backends. MyProcNumber should index into one of
245 * them.
246 */
249}
250
251static inline MultiXactId *
253{
255
258
259 /*
260 * The first MaxBackends entries in the OldestMemberMXactId array are
261 * reserved for regular backends. Prepared xacts come after them.
262 */
265}
266
267static inline MultiXactId *
273
274/*
275 * Definitions for the backend-local MultiXactId cache.
276 *
277 * We use this cache to store known MultiXacts, so we don't need to go to
278 * SLRU areas every time.
279 *
280 * The cache lasts for the duration of a single transaction, the rationale
281 * for this being that most entries will contain our own TransactionId and
282 * so they will be uninteresting by the time our next transaction starts.
283 * (XXX not clear that this is correct --- other members of the MultiXact
284 * could hang around longer than we did. However, it's not clear what a
285 * better policy for flushing old cache entries would be.) FIXME actually
286 * this is plain wrong now that multixact's may contain update Xids.
287 *
288 * We allocate the cache entries in a memory context that is deleted at
289 * transaction end, so we don't need to do retail freeing of entries.
290 */
298
299#define MAX_CACHE_ENTRIES 256
302
303#ifdef MULTIXACT_DEBUG
304#define debug_elog2(a,b) elog(a,b)
305#define debug_elog3(a,b,c) elog(a,b,c)
306#define debug_elog4(a,b,c,d) elog(a,b,c,d)
307#define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
308#define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
309#else
310#define debug_elog2(a,b)
311#define debug_elog3(a,b,c)
312#define debug_elog4(a,b,c,d)
313#define debug_elog5(a,b,c,d,e)
314#define debug_elog6(a,b,c,d,e,f)
315#endif
316
317/* internal MultiXactId management */
318static void MultiXactIdSetOldestVisible(void);
319static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
320 int nmembers, MultiXactMember *members);
321static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
322
323/* MultiXact cache management */
324static int mxactMemberComparator(const void *arg1, const void *arg2);
325static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
326static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
327static void mXactCachePut(MultiXactId multi, int nmembers,
328 MultiXactMember *members);
329
330/* management of SLRU infrastructure */
331
332/* opaque_data type for MultiXactMemberIoErrorDetail */
338
339static void ExtendMultiXactOffset(MultiXactId multi);
340static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
341static void SetOldestOffset(void);
343static void WriteMTruncateXlogRec(Oid oldestMultiDB,
346
347
348/*
349 * MultiXactIdCreate
350 * Construct a MultiXactId representing two TransactionIds.
351 *
352 * The two XIDs must be different, or be requesting different statuses.
353 *
354 * NB - we don't worry about our local MultiXactId cache here, because that
355 * is handled by the lower-level routines.
356 */
360{
362 MultiXactMember members[2];
363
366
368
369 /* MultiXactIdSetOldestMember() must have been called already. */
371
372 /*
373 * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
374 * are still running. In typical usage, xid2 will be our own XID and the
375 * caller just did a check on xid1, so it'd be wasted effort.
376 */
377
378 members[0].xid = xid1;
379 members[0].status = status1;
380 members[1].xid = xid2;
381 members[1].status = status2;
382
384
385 debug_elog3(DEBUG2, "Create: %s",
386 mxid_to_string(newMulti, 2, members));
387
388 return newMulti;
389}
390
391/*
392 * MultiXactIdExpand
393 * Add a TransactionId to a pre-existing MultiXactId.
394 *
395 * If the TransactionId is already a member of the passed MultiXactId with the
396 * same status, just return it as-is.
397 *
398 * Note that we do NOT actually modify the membership of a pre-existing
399 * MultiXactId; instead we create a new one. This is necessary to avoid
400 * a race condition against code trying to wait for one MultiXactId to finish;
401 * see notes in heapam.c.
402 *
403 * NB - we don't worry about our local MultiXactId cache here, because that
404 * is handled by the lower-level routines.
405 *
406 * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
407 * one upgraded by pg_upgrade from a cluster older than this feature) are not
408 * passed in.
409 */
412{
414 MultiXactMember *members;
416 int nmembers;
417 int i;
418 int j;
419
422
423 /* MultiXactIdSetOldestMember() must have been called already. */
425
426 debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
427 multi, xid, mxstatus_to_string(status));
428
429 /*
430 * Note: we don't allow for old multis here. The reason is that the only
431 * caller of this function does a check that the multixact is no longer
432 * running.
433 */
434 nmembers = GetMultiXactIdMembers(multi, &members, false, false);
435
436 if (nmembers < 0)
437 {
438 MultiXactMember member;
439
440 /*
441 * The MultiXactId is obsolete. This can only happen if all the
442 * MultiXactId members stop running between the caller checking and
443 * passing it to us. It would be better to return that fact to the
444 * caller, but it would complicate the API and it's unlikely to happen
445 * too often, so just deal with it by creating a singleton MultiXact.
446 */
447 member.xid = xid;
448 member.status = status;
450
451 debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
452 multi, newMulti);
453 return newMulti;
454 }
455
456 /*
457 * If the TransactionId is already a member of the MultiXactId with the
458 * same status, just return the existing MultiXactId.
459 */
460 for (i = 0; i < nmembers; i++)
461 {
462 if (TransactionIdEquals(members[i].xid, xid) &&
463 (members[i].status == status))
464 {
465 debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
466 xid, multi);
467 pfree(members);
468 return multi;
469 }
470 }
471
472 /*
473 * Determine which of the members of the MultiXactId are still of
474 * interest. This is any running transaction, and also any transaction
475 * that grabbed something stronger than just a lock and was committed. (An
476 * update that aborted is of no interest here; and having more than one
477 * update Xid in a multixact would cause errors elsewhere.)
478 *
479 * Removing dead members is not just an optimization: freezing of tuples
480 * whose Xmax are multis depends on this behavior.
481 *
482 * Note we have the same race condition here as above: j could be 0 at the
483 * end of the loop.
484 */
485 newMembers = palloc_array(MultiXactMember, nmembers + 1);
486
487 for (i = 0, j = 0; i < nmembers; i++)
488 {
489 if (TransactionIdIsInProgress(members[i].xid) ||
490 (ISUPDATE_from_mxstatus(members[i].status) &&
491 TransactionIdDidCommit(members[i].xid)))
492 {
493 newMembers[j].xid = members[i].xid;
494 newMembers[j++].status = members[i].status;
495 }
496 }
497
498 newMembers[j].xid = xid;
499 newMembers[j++].status = status;
501
502 pfree(members);
504
505 debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
506
507 return newMulti;
508}
509
510/*
511 * MultiXactIdIsRunning
512 * Returns whether a MultiXactId is "running".
513 *
514 * We return true if at least one member of the given MultiXactId is still
515 * running. Note that a "false" result is certain not to change,
516 * because it is not legal to add members to an existing MultiXactId.
517 *
518 * Caller is expected to have verified that the multixact does not come from
519 * a pg_upgraded share-locked tuple.
520 */
521bool
523{
524 MultiXactMember *members;
525 int nmembers;
526 int i;
527
528 debug_elog3(DEBUG2, "IsRunning %u?", multi);
529
530 /*
531 * "false" here means we assume our callers have checked that the given
532 * multi cannot possibly come from a pg_upgraded database.
533 */
534 nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
535
536 if (nmembers <= 0)
537 {
538 debug_elog2(DEBUG2, "IsRunning: no members");
539 return false;
540 }
541
542 /*
543 * Checking for myself is cheap compared to looking in shared memory;
544 * return true if any live subtransaction of the current top-level
545 * transaction is a member.
546 *
547 * This is not needed for correctness, it's just a fast path.
548 */
549 for (i = 0; i < nmembers; i++)
550 {
551 if (TransactionIdIsCurrentTransactionId(members[i].xid))
552 {
553 debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
554 pfree(members);
555 return true;
556 }
557 }
558
559 /*
560 * This could be made faster by having another entry point in procarray.c,
561 * walking the PGPROC array only once for all the members. But in most
562 * cases nmembers should be small enough that it doesn't much matter.
563 */
564 for (i = 0; i < nmembers; i++)
565 {
566 if (TransactionIdIsInProgress(members[i].xid))
567 {
568 debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
569 i, members[i].xid);
570 pfree(members);
571 return true;
572 }
573 }
574
575 pfree(members);
576
577 debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
578
579 return false;
580}
581
582/*
583 * MultiXactIdSetOldestMember
584 * Save the oldest MultiXactId this transaction could be a member of.
585 *
586 * We set the OldestMemberMXactId for a given transaction the first time it's
587 * going to do some operation that might require a MultiXactId (tuple lock,
588 * update or delete). We need to do this even if we end up using a
589 * TransactionId instead of a MultiXactId, because there is a chance that
590 * another transaction would add our XID to a MultiXactId.
591 *
592 * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
593 * be called just before doing any such possibly-MultiXactId-able operation.
594 */
595void
597{
599 {
600 MultiXactId nextMXact;
601
602 /*
603 * You might think we don't need to acquire a lock here, since
604 * fetching and storing of TransactionIds is probably atomic, but in
605 * fact we do: suppose we pick up nextMXact and then lose the CPU for
606 * a long time. Someone else could advance nextMXact, and then
607 * another someone else could compute an OldestVisibleMXactId that
608 * would be after the value we are going to store when we get control
609 * back. Which would be wrong.
610 *
611 * Note that a shared lock is sufficient, because it's enough to stop
612 * someone from advancing nextMXact; and nobody else could be trying
613 * to write to our OldestMember entry, only reading (and we assume
614 * storing it is atomic.)
615 */
617
618 nextMXact = MultiXactState->nextMXact;
619
620 *MyOldestMemberMXactIdSlot() = nextMXact;
621
623
624 debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
625 MyProcNumber, nextMXact);
626 }
627}
628
629/*
630 * MultiXactIdSetOldestVisible
631 * Save the oldest MultiXactId this transaction considers possibly live.
632 *
633 * We set the OldestVisibleMXactId for a given transaction the first time
634 * it's going to inspect any MultiXactId. Once we have set this, we are
635 * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
636 * won't be truncated away.
637 *
638 * The value to set is the oldest of nextMXact and all the valid per-backend
639 * OldestMemberMXactId[] entries. Because of the locking we do, we can be
640 * certain that no subsequent call to MultiXactIdSetOldestMember can set
641 * an OldestMemberMXactId[] entry older than what we compute here. Therefore
642 * there is no live transaction, now or later, that can be a member of any
643 * MultiXactId older than the OldestVisibleMXactId we compute here.
644 */
645static void
673
674/*
675 * ReadNextMultiXactId
676 * Return the next MultiXactId to be assigned, but don't allocate it
677 */
680{
681 MultiXactId mxid;
682
683 /* XXX we could presumably do this without a lock. */
687
688 return mxid;
689}
690
691/*
692 * ReadMultiXactIdRange
693 * Get the range of IDs that may still be referenced by a relation.
694 */
695void
703
704
705/*
706 * MultiXactIdCreateFromMembers
707 * Make a new MultiXactId from the specified set of members
708 *
709 * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
710 * given TransactionIds as members. Returns the newly created MultiXactId.
711 *
712 * NB: the passed members[] array will be sorted in-place.
713 */
716{
717 MultiXactId multi;
718 MultiXactOffset offset;
720
721 debug_elog3(DEBUG2, "Create: %s",
722 mxid_to_string(InvalidMultiXactId, nmembers, members));
723
724 /*
725 * See if the same set of members already exists in our cache; if so, just
726 * re-use that MultiXactId. (Note: it might seem that looking in our
727 * cache is insufficient, and we ought to search disk to see if a
728 * duplicate definition already exists. But since we only ever create
729 * MultiXacts containing our own XID, in most cases any such MultiXacts
730 * were in fact created by us, and so will be in our cache. There are
731 * corner cases where someone else added us to a MultiXact without our
732 * knowledge, but it's not worth checking for.)
733 */
734 multi = mXactCacheGetBySet(nmembers, members);
735 if (MultiXactIdIsValid(multi))
736 {
737 debug_elog2(DEBUG2, "Create: in cache!");
738 return multi;
739 }
740
741 /* Verify that there is a single update Xid among the given members. */
742 {
743 int i;
744 bool has_update = false;
745
746 for (i = 0; i < nmembers; i++)
747 {
748 if (ISUPDATE_from_mxstatus(members[i].status))
749 {
750 if (has_update)
751 elog(ERROR, "new multixact has more than one updating member: %s",
752 mxid_to_string(InvalidMultiXactId, nmembers, members));
753 has_update = true;
754 }
755 }
756 }
757
758 /* Load the injection point before entering the critical section */
759 INJECTION_POINT_LOAD("multixact-create-from-members");
760
761 /*
762 * Assign the MXID and offsets range to use, and make sure there is space
763 * in the OFFSETs and MEMBERs files. NB: this routine does
764 * START_CRIT_SECTION().
765 *
766 * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
767 * that we've called MultiXactIdSetOldestMember here. This is because
768 * this routine is used in some places to create new MultiXactIds of which
769 * the current backend is not a member, notably during freezing of multis
770 * in vacuum. During vacuum, in particular, it would be unacceptable to
771 * keep OldestMulti set, in case it runs for long.
772 */
773 multi = GetNewMultiXactId(nmembers, &offset);
774
775 INJECTION_POINT_CACHED("multixact-create-from-members", NULL);
776
777 /* Make an XLOG entry describing the new MXID. */
778 xlrec.mid = multi;
779 xlrec.moff = offset;
780 xlrec.nmembers = nmembers;
781
782 /*
783 * XXX Note: there's a lot of padding space in MultiXactMember. We could
784 * find a more compact representation of this Xlog record -- perhaps all
785 * the status flags in one XLogRecData, then all the xids in another one?
786 * Not clear that it's worth the trouble though.
787 */
790 XLogRegisterData(members, nmembers * sizeof(MultiXactMember));
791
793
794 /* Now enter the information into the OFFSETs and MEMBERs logs */
795 RecordNewMultiXact(multi, offset, nmembers, members);
796
797 /* Done with critical section */
799
800 /* Store the new MultiXactId in the local cache, too */
801 mXactCachePut(multi, nmembers, members);
802
803 debug_elog2(DEBUG2, "Create: all done");
804
805 return multi;
806}
807
808/*
809 * RecordNewMultiXact
810 * Write info about a new multixact into the offsets and members files
811 *
812 * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
813 * use it.
814 */
815static void
817 int nmembers, MultiXactMember *members)
818{
819 int64 pageno;
821 int entryno;
822 int slotno;
826 int next_entryno;
828 MultiXactOffset next_offset;
829 LWLock *lock;
831
832 /* position of this multixid in the offsets SLRU area */
833 pageno = MultiXactIdToOffsetPage(multi);
835
836 /* position of the next multixid */
837 next = NextMultiXactId(multi);
840
841 /*
842 * Set the starting offset of this multixid's members.
843 *
844 * In the common case, it was already set by the previous
845 * RecordNewMultiXact call, as this was the next multixid of the previous
846 * multixid. But if multiple backends are generating multixids
847 * concurrently, we might race ahead and get called before the previous
848 * multixid.
849 */
852
853 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &multi);
854 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
855 offptr += entryno;
856
857 if (*offptr != offset)
858 {
859 /* should already be set to the correct value, or not at all */
860 Assert(*offptr == 0);
861 *offptr = offset;
862 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
863 }
864
865 /*
866 * Set the next multixid's offset to the end of this multixid's members.
867 */
868 if (next_pageno == pageno)
869 {
870 next_offptr = offptr + 1;
871 }
872 else
873 {
874 /* must be the first entry on the page */
876
877 /* Swap the lock for a lock on the next page */
878 LWLockRelease(lock);
881
883 next_offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
885 }
886
887 /* Like in GetNewMultiXactId(), skip over offset 0 */
888 next_offset = offset + nmembers;
889 if (next_offset == 0)
890 next_offset = 1;
891 if (*next_offptr != next_offset)
892 {
893 /* should already be set to the correct value, or not at all */
894 Assert(*next_offptr == 0);
895 *next_offptr = next_offset;
896 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
897 }
898
899 /* Release MultiXactOffset SLRU lock. */
900 LWLockRelease(lock);
901
902 prev_pageno = -1;
903
904 for (int i = 0; i < nmembers; i++, offset++)
905 {
909 int bshift;
910 int flagsoff;
911 int memberoff;
912
913 Assert(members[i].status <= MultiXactStatusUpdate);
914
915 pageno = MXOffsetToMemberPage(offset);
919
920 if (pageno != prev_pageno)
921 {
923
924 /*
925 * MultiXactMember SLRU page is changed so check if this new page
926 * fall into the different SLRU bank then release the old bank's
927 * lock and acquire lock on the new bank.
928 */
930 if (lock != prevlock)
931 {
932 if (prevlock != NULL)
934
936 prevlock = lock;
937 }
940 prev_pageno = pageno;
941 }
942
944 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
945
946 *memberptr = members[i].xid;
947
948 flagsptr = (uint32 *)
949 (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
950
952 flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
953 flagsval |= (members[i].status << bshift);
955
956 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
957 }
958
959 if (prevlock != NULL)
961}
962
963/*
964 * GetNewMultiXactId
965 * Get the next MultiXactId.
966 *
967 * Also, reserve the needed amount of space in the "members" area. The
968 * starting offset of the reserved space is returned in *offset.
969 *
970 * This may generate XLOG records for expansion of the offsets and/or members
971 * files. Unfortunately, we have to do that while holding MultiXactGenLock
972 * to avoid race conditions --- the XLOG record for zeroing a page must appear
973 * before any backend can possibly try to store data in that page!
974 *
975 * We start a critical section before advancing the shared counters. The
976 * caller must end the critical section after writing SLRU data.
977 */
978static MultiXactId
979GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
980{
982 MultiXactOffset nextOffset;
983
984 debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
985
986 /* safety check, we should never get this far in a HS standby */
987 if (RecoveryInProgress())
988 elog(ERROR, "cannot assign MultiXactIds during recovery");
989
991
992 /* Assign the MXID */
994
995 /*----------
996 * Check to see if it's safe to assign another MultiXactId. This protects
997 * against catastrophic data loss due to multixact wraparound. The basic
998 * rules are:
999 *
1000 * If we're past multiVacLimit or the safe threshold for member storage
1001 * space, or we don't know what the safe threshold for member storage is,
1002 * start trying to force autovacuum cycles.
1003 * If we're past multiWarnLimit, start issuing warnings.
1004 * If we're past multiStopLimit, refuse to create new MultiXactIds.
1005 *
1006 * Note these are pretty much the same protections in GetNewTransactionId.
1007 *----------
1008 */
1010 {
1011 /*
1012 * For safety's sake, we release MultiXactGenLock while sending
1013 * signals, warnings, etc. This is not so much because we care about
1014 * preserving concurrency in this situation, as to avoid any
1015 * possibility of deadlock while doing get_database_name(). First,
1016 * copy all the shared values we'll need in this path.
1017 */
1018 MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
1019 MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
1020 MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
1022
1024
1025 if (IsUnderPostmaster &&
1026 !MultiXactIdPrecedes(result, multiStopLimit))
1027 {
1029
1030 /*
1031 * Immediately kick autovacuum into action as we're already in
1032 * ERROR territory.
1033 */
1035
1036 /* complain even if that DB has disappeared */
1037 if (oldest_datname)
1038 ereport(ERROR,
1040 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
1042 errhint("Execute a database-wide VACUUM in that database.\n"
1043 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1044 else
1045 ereport(ERROR,
1047 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
1049 errhint("Execute a database-wide VACUUM in that database.\n"
1050 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1051 }
1052
1053 /*
1054 * To avoid swamping the postmaster with signals, we issue the autovac
1055 * request only once per 64K multis generated. This still gives
1056 * plenty of chances before we get into real trouble.
1057 */
1058 if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
1060
1061 if (!MultiXactIdPrecedes(result, multiWarnLimit))
1062 {
1064
1065 /* complain even if that DB has disappeared */
1066 if (oldest_datname)
1068 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1069 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1070 multiWrapLimit - result,
1072 multiWrapLimit - result),
1073 errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
1074 (double) (multiWrapLimit - result) / (MaxMultiXactId / 2) * 100),
1075 errhint("Execute a database-wide VACUUM in that database.\n"
1076 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1077 else
1079 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1080 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1081 multiWrapLimit - result,
1083 multiWrapLimit - result),
1084 errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
1085 (double) (multiWrapLimit - result) / (MaxMultiXactId / 2) * 100),
1086 errhint("Execute a database-wide VACUUM in that database.\n"
1087 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1088 }
1089
1090 /* Re-acquire lock and start over */
1093 }
1094
1095 /*
1096 * Make sure there is room for the next MXID in the file. Assigning this
1097 * MXID sets the next MXID's offset already.
1098 */
1100
1101 /*
1102 * Reserve the members space, similarly to above.
1103 */
1104 nextOffset = MultiXactState->nextOffset;
1105
1106 /*
1107 * Offsets are 64-bit integers and will never wrap around. Firstly, it
1108 * would take an unrealistic amount of time and resources to consume 2^64
1109 * offsets. Secondly, multixid creation is WAL-logged, so you would run
1110 * out of LSNs before reaching offset wraparound. Nevertheless, check for
1111 * wraparound as a sanity check.
1112 */
1113 if (nextOffset + nmembers < nextOffset)
1114 ereport(ERROR,
1116 errmsg("MultiXact members would wrap around")));
1117 *offset = nextOffset;
1118
1119 ExtendMultiXactMember(nextOffset, nmembers);
1120
1121 /*
1122 * Critical section from here until caller has written the data into the
1123 * just-reserved SLRU space; we don't want to error out with a partly
1124 * written MultiXact structure. (In particular, failing to write our
1125 * start offset after advancing nextMXact would effectively corrupt the
1126 * previous MultiXact.)
1127 */
1129
1130 /*
1131 * Advance counters. As in GetNewTransactionId(), this must not happen
1132 * until after file extension has succeeded!
1133 */
1135 MultiXactState->nextOffset += nmembers;
1136
1138
1139 debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64,
1140 result, *offset);
1141 return result;
1142}
1143
1144/*
1145 * GetMultiXactIdMembers
1146 * Return the set of MultiXactMembers that make up a MultiXactId
1147 *
1148 * Return value is the number of members found, or -1 if there are none,
1149 * and *members is set to a newly palloc'ed array of members. It's the
1150 * caller's responsibility to free it when done with it.
1151 *
1152 * from_pgupgrade must be passed as true if and only if only the multixact
1153 * corresponds to a value from a tuple that was locked in a 9.2-or-older
1154 * installation and later pg_upgrade'd (that is, the infomask is
1155 * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1156 * can still be running, so we return -1 just like for an empty multixact
1157 * without any further checking. It would be wrong to try to resolve such a
1158 * multixact: either the multixact is within the current valid multixact
1159 * range, in which case the returned result would be bogus, or outside that
1160 * range, in which case an error would be raised.
1161 *
1162 * In all other cases, the passed multixact must be within the known valid
1163 * range, that is, greater than or equal to oldestMultiXactId, and less than
1164 * nextMXact. Otherwise, an error is raised.
1165 *
1166 * isLockOnly must be set to true if caller is certain that the given multi
1167 * is used only to lock tuples; can be false without loss of correctness,
1168 * but passing a true means we can return quickly without checking for
1169 * old updates.
1170 */
1171int
1173 bool from_pgupgrade, bool isLockOnly)
1174{
1175 int64 pageno;
1177 int entryno;
1178 int slotno;
1180 MultiXactOffset offset;
1182 int length;
1184 MultiXactId nextMXact;
1185 MultiXactMember *ptr;
1186 LWLock *lock;
1187
1188 debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1189
1190 if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1191 {
1192 *members = NULL;
1193 return -1;
1194 }
1195
1196 /* See if the MultiXactId is in the local cache */
1197 length = mXactCacheGetById(multi, members);
1198 if (length >= 0)
1199 {
1200 debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1201 mxid_to_string(multi, length, *members));
1202 return length;
1203 }
1204
1205 /* Set our OldestVisibleMXactId[] entry if we didn't already */
1207
1208 /*
1209 * If we know the multi is used only for locking and not for updates, then
1210 * we can skip checking if the value is older than our oldest visible
1211 * multi. It cannot possibly still be running.
1212 */
1213 if (isLockOnly &&
1215 {
1216 debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
1217 *members = NULL;
1218 return -1;
1219 }
1220
1221 /*
1222 * We check known limits on MultiXact before resorting to the SLRU area.
1223 *
1224 * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1225 * useful; it has already been removed, or will be removed shortly, by
1226 * truncation. If one is passed, an error is raised.
1227 *
1228 * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1229 * implies undetected ID wraparound has occurred. This raises a hard
1230 * error.
1231 *
1232 * Shared lock is enough here since we aren't modifying any global state.
1233 * Acquire it just long enough to grab the current counter values.
1234 */
1236
1238 nextMXact = MultiXactState->nextMXact;
1239
1241
1242 if (MultiXactIdPrecedes(multi, oldestMXact))
1243 ereport(ERROR,
1245 errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1246 multi)));
1247
1248 if (!MultiXactIdPrecedes(multi, nextMXact))
1249 ereport(ERROR,
1251 errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1252 multi)));
1253
1254 /*
1255 * Find out the offset at which we need to start reading MultiXactMembers
1256 * and the number of members in the multixact. We determine the latter as
1257 * the difference between this multixact's starting offset and the next
1258 * one's.
1259 */
1260 pageno = MultiXactIdToOffsetPage(multi);
1262
1263 /* Acquire the bank lock for the page we need. */
1266
1267 /* read this multi's offset */
1268 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &multi);
1269 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1270 offptr += entryno;
1271 offset = *offptr;
1272
1273 if (offset == 0)
1274 ereport(ERROR,
1276 errmsg("MultiXact %u has invalid offset", multi)));
1277
1278 /* read next multi's offset */
1279 {
1281
1282 /* handle wraparound if needed */
1283 tmpMXact = NextMultiXactId(multi);
1284
1285 prev_pageno = pageno;
1286
1289
1290 if (pageno != prev_pageno)
1291 {
1292 LWLock *newlock;
1293
1294 /*
1295 * Since we're going to access a different SLRU page, if this page
1296 * falls under a different bank, release the old bank's lock and
1297 * acquire the lock of the new bank.
1298 */
1300 if (newlock != lock)
1301 {
1302 LWLockRelease(lock);
1304 lock = newlock;
1305 }
1307 }
1308
1309 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1310 offptr += entryno;
1312 }
1313
1314 LWLockRelease(lock);
1315 lock = NULL;
1316
1317 /* Sanity check the next offset */
1318 if (nextMXOffset == 0)
1319 ereport(ERROR,
1321 errmsg("MultiXact %u has invalid next offset", multi)));
1322 if (nextMXOffset == offset)
1323 ereport(ERROR,
1325 errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
1326 multi, offset)));
1327 if (nextMXOffset < offset)
1328 ereport(ERROR,
1330 errmsg("MultiXact %u has offset (%" PRIu64 ") greater than its next offset (%" PRIu64 ")",
1331 multi, offset, nextMXOffset)));
1332 if (nextMXOffset - offset > INT32_MAX)
1333 ereport(ERROR,
1335 errmsg("MultiXact %u has too many members (%" PRIu64 ")",
1336 multi, nextMXOffset - offset)));
1337 length = nextMXOffset - offset;
1338
1339 /* read the members */
1340 ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
1341 prev_pageno = -1;
1342 for (int i = 0; i < length; i++, offset++)
1343 {
1346 int flagsoff;
1347 int bshift;
1348 int memberoff;
1349
1350 pageno = MXOffsetToMemberPage(offset);
1352
1353 if (pageno != prev_pageno)
1354 {
1356 LWLock *newlock;
1357
1358 /*
1359 * Since we're going to access a different SLRU page, if this page
1360 * falls under a different bank, release the old bank's lock and
1361 * acquire the lock of the new bank.
1362 */
1364 if (newlock != lock)
1365 {
1366 if (lock)
1367 LWLockRelease(lock);
1369 lock = newlock;
1370 }
1373 prev_pageno = pageno;
1374 }
1375
1377 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1379
1382 flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1383
1384 ptr[i].xid = *xactptr;
1386 }
1387
1388 LWLockRelease(lock);
1389
1390 /*
1391 * Copy the result into the local cache.
1392 */
1393 mXactCachePut(multi, length, ptr);
1394
1395 debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1396 mxid_to_string(multi, length, ptr));
1397 *members = ptr;
1398 return length;
1399}
1400
1401/*
1402 * mxactMemberComparator
1403 * qsort comparison function for MultiXactMember
1404 *
1405 * We can't use wraparound comparison for XIDs because that does not respect
1406 * the triangle inequality! Any old sort order will do.
1407 */
1408static int
1409mxactMemberComparator(const void *arg1, const void *arg2)
1410{
1413
1414 if (member1.xid > member2.xid)
1415 return 1;
1416 if (member1.xid < member2.xid)
1417 return -1;
1418 if (member1.status > member2.status)
1419 return 1;
1420 if (member1.status < member2.status)
1421 return -1;
1422 return 0;
1423}
1424
1425/*
1426 * mXactCacheGetBySet
1427 * returns a MultiXactId from the cache based on the set of
1428 * TransactionIds that compose it, or InvalidMultiXactId if
1429 * none matches.
1430 *
1431 * This is helpful, for example, if two transactions want to lock a huge
1432 * table. By using the cache, the second will use the same MultiXactId
1433 * for the majority of tuples, thus keeping MultiXactId usage low (saving
1434 * both I/O and wraparound issues).
1435 *
1436 * NB: the passed members array will be sorted in-place.
1437 */
1438static MultiXactId
1440{
1441 dlist_iter iter;
1442
1443 debug_elog3(DEBUG2, "CacheGet: looking for %s",
1444 mxid_to_string(InvalidMultiXactId, nmembers, members));
1445
1446 /* sort the array so comparison is easy */
1447 qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1448
1450 {
1452 iter.cur);
1453
1454 if (entry->nmembers != nmembers)
1455 continue;
1456
1457 /*
1458 * We assume the cache entries are sorted, and that the unused bits in
1459 * "status" are zeroed.
1460 */
1461 if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1462 {
1463 debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1465 return entry->multi;
1466 }
1467 }
1468
1469 debug_elog2(DEBUG2, "CacheGet: not found :-(");
1470 return InvalidMultiXactId;
1471}
1472
1473/*
1474 * mXactCacheGetById
1475 * returns the composing MultiXactMember set from the cache for a
1476 * given MultiXactId, if present.
1477 *
1478 * If successful, *xids is set to the address of a palloc'd copy of the
1479 * MultiXactMember set. Return value is number of members, or -1 on failure.
1480 */
1481static int
1483{
1484 dlist_iter iter;
1485
1486 debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1487
1489 {
1491 iter.cur);
1492
1493 if (entry->multi == multi)
1494 {
1495 MultiXactMember *ptr;
1496 Size size;
1497
1498 size = sizeof(MultiXactMember) * entry->nmembers;
1499 ptr = (MultiXactMember *) palloc(size);
1500
1501 memcpy(ptr, entry->members, size);
1502
1503 debug_elog3(DEBUG2, "CacheGet: found %s",
1504 mxid_to_string(multi,
1505 entry->nmembers,
1506 entry->members));
1507
1508 /*
1509 * Note we modify the list while not using a modifiable iterator.
1510 * This is acceptable only because we exit the iteration
1511 * immediately afterwards.
1512 */
1514
1515 *members = ptr;
1516 return entry->nmembers;
1517 }
1518 }
1519
1520 debug_elog2(DEBUG2, "CacheGet: not found");
1521 return -1;
1522}
1523
1524/*
1525 * mXactCachePut
1526 * Add a new MultiXactId and its composing set into the local cache.
1527 */
1528static void
1529mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1530{
1531 mXactCacheEnt *entry;
1532
1533 debug_elog3(DEBUG2, "CachePut: storing %s",
1534 mxid_to_string(multi, nmembers, members));
1535
1536 if (MXactContext == NULL)
1537 {
1538 /* The cache only lives as long as the current transaction */
1539 debug_elog2(DEBUG2, "CachePut: initializing memory context");
1541 "MultiXact cache context",
1543 }
1544
1545 entry = (mXactCacheEnt *)
1547 offsetof(mXactCacheEnt, members) +
1548 nmembers * sizeof(MultiXactMember));
1549
1550 entry->multi = multi;
1551 entry->nmembers = nmembers;
1552 memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1553
1554 /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1555 qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1556
1557 dclist_push_head(&MXactCache, &entry->node);
1559 {
1560 dlist_node *node;
1561
1564
1565 entry = dclist_container(mXactCacheEnt, node, node);
1566 debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1567 entry->multi);
1568
1569 pfree(entry);
1570 }
1571}
1572
1573char *
1575{
1576 switch (status)
1577 {
1579 return "keysh";
1581 return "sh";
1583 return "fornokeyupd";
1585 return "forupd";
1587 return "nokeyupd";
1589 return "upd";
1590 default:
1591 elog(ERROR, "unrecognized multixact status %d", status);
1592 return "";
1593 }
1594}
1595
1596char *
1597mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1598{
1599 static char *str = NULL;
1601 int i;
1602
1603 if (str != NULL)
1604 pfree(str);
1605
1607
1608 appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1609 mxstatus_to_string(members[0].status));
1610
1611 for (i = 1; i < nmembers; i++)
1612 appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1613 mxstatus_to_string(members[i].status));
1614
1617 pfree(buf.data);
1618 return str;
1619}
1620
1621/*
1622 * AtEOXact_MultiXact
1623 * Handle transaction end for MultiXact
1624 *
1625 * This is called at top transaction commit or abort (we don't care which).
1626 */
1627void
1629{
1630 /*
1631 * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1632 * which should only be valid while within a transaction.
1633 *
1634 * We assume that storing a MultiXactId is atomic and so we need not take
1635 * MultiXactGenLock to do this.
1636 */
1639
1640 /*
1641 * Discard the local MultiXactId cache. Since MXactContext was created as
1642 * a child of TopTransactionContext, we needn't delete it explicitly.
1643 */
1646}
1647
1648/*
1649 * AtPrepare_MultiXact
1650 * Save multixact state at 2PC transaction prepare
1651 *
1652 * In this phase, we only store our OldestMemberMXactId value in the two-phase
1653 * state file.
1654 */
1655void
1664
1665/*
1666 * PostPrepare_MultiXact
1667 * Clean up after successful PREPARE TRANSACTION
1668 */
1669void
1671{
1673
1674 /*
1675 * Transfer our OldestMemberMXactId value to the slot reserved for the
1676 * prepared transaction.
1677 */
1680 {
1682
1683 /*
1684 * Even though storing MultiXactId is atomic, acquire lock to make
1685 * sure others see both changes, not just the reset of the slot of the
1686 * current backend. Using a volatile pointer might suffice, but this
1687 * isn't a hot spot.
1688 */
1690
1693
1695 }
1696
1697 /*
1698 * We don't need to transfer OldestVisibleMXactId value, because the
1699 * transaction is not going to be looking at any more multixacts once it's
1700 * prepared.
1701 *
1702 * We assume that storing a MultiXactId is atomic and so we need not take
1703 * MultiXactGenLock to do this.
1704 */
1706
1707 /*
1708 * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1709 */
1712}
1713
1714/*
1715 * multixact_twophase_recover
1716 * Recover the state of a prepared transaction at startup
1717 */
1718void
1720 void *recdata, uint32 len)
1721{
1724
1725 /*
1726 * Get the oldest member XID from the state file record, and set it in the
1727 * OldestMemberMXactId slot reserved for this prepared transaction.
1728 */
1729 Assert(len == sizeof(MultiXactId));
1731
1733}
1734
1735/*
1736 * multixact_twophase_postcommit
1737 * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1738 */
1739void
1749
1750/*
1751 * multixact_twophase_postabort
1752 * This is actually just the same as the COMMIT case.
1753 */
1754void
1760
1761
1762/*
1763 * Register shared memory needs for MultiXact.
1764 */
1765static void
1767{
1768 Size size;
1769
1770 /*
1771 * Calculate the size of the MultiXactState struct, and the two
1772 * per-backend MultiXactId arrays. They are carved out of the same
1773 * allocation.
1774 */
1775 size = offsetof(MultiXactStateData, perBackendXactIds);
1776 size = add_size(size,
1778 size = add_size(size,
1780 ShmemRequestStruct(.name = "Shared MultiXact State",
1781 .size = size,
1782 .ptr = (void **) &MultiXactState,
1783 );
1784
1786 .name = "multixact_offset",
1787 .Dir = "pg_multixact/offsets",
1788 .long_segment_names = false,
1789
1790 .nslots = multixact_offset_buffers,
1791
1792 .sync_handler = SYNC_HANDLER_MULTIXACT_OFFSET,
1793 .PagePrecedes = MultiXactOffsetPagePrecedes,
1794 .errdetail_for_io_error = MultiXactOffsetIoErrorDetail,
1795
1796 .buffer_tranche_id = LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1797 .bank_tranche_id = LWTRANCHE_MULTIXACTOFFSET_SLRU,
1798 );
1799
1801 .name = "multixact_member",
1802 .Dir = "pg_multixact/members",
1803 .long_segment_names = true,
1804
1805 .nslots = multixact_member_buffers,
1806
1807 .sync_handler = SYNC_HANDLER_MULTIXACT_MEMBER,
1808 .PagePrecedes = MultiXactMemberPagePrecedes,
1809 .errdetail_for_io_error = MultiXactMemberIoErrorDetail,
1810
1811 .buffer_tranche_id = LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1812 .bank_tranche_id = LWTRANCHE_MULTIXACTMEMBER_SLRU,
1813 );
1814}
1815
1816static void
1818{
1820
1821 /*
1822 * members SLRU doesn't call SimpleLruTruncate() or meet criteria for unit
1823 * tests
1824 */
1825
1826 /* Set up array pointers */
1829}
1830
1831static void
1838
1839/*
1840 * GUC check_hook for multixact_offset_buffers
1841 */
1842bool
1844{
1845 return check_slru_buffers("multixact_offset_buffers", newval);
1846}
1847
1848/*
1849 * GUC check_hook for multixact_member_buffers
1850 */
1851bool
1853{
1854 return check_slru_buffers("multixact_member_buffers", newval);
1855}
1856
1857/*
1858 * This func must be called ONCE on system install. It creates the initial
1859 * MultiXact segments. (The MultiXacts directories are assumed to have been
1860 * created by initdb, and MultiXactShmemInit must have been called already.)
1861 */
1862void
1864{
1865 /* Zero the initial pages and flush them to disk */
1868}
1869
1870/*
1871 * This must be called ONCE during postmaster or standalone-backend startup.
1872 *
1873 * StartupXLOG has already established nextMXact/nextOffset by calling
1874 * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
1875 * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
1876 * replayed WAL.
1877 */
1878void
1880{
1883 int64 pageno;
1884
1885 /*
1886 * Initialize offset's idea of the latest page number.
1887 */
1888 pageno = MultiXactIdToOffsetPage(multi);
1889 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1890 pageno);
1891
1892 /*
1893 * Initialize member's idea of the latest page number.
1894 */
1895 pageno = MXOffsetToMemberPage(offset);
1896 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1897 pageno);
1898}
1899
1900/*
1901 * This must be called ONCE at the end of startup/recovery.
1902 */
1903void
1905{
1906 MultiXactId nextMXact;
1907 MultiXactOffset offset;
1910 int64 pageno;
1911 int entryno;
1912 int flagsoff;
1913
1915 nextMXact = MultiXactState->nextMXact;
1916 offset = MultiXactState->nextOffset;
1920
1921 /* Clean up offsets state */
1922
1923 /*
1924 * (Re-)Initialize our idea of the latest page number for offsets.
1925 */
1926 pageno = MultiXactIdToOffsetPage(nextMXact);
1927 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1928 pageno);
1929
1930 /*
1931 * Set the offset of nextMXact on the offsets page. This is normally done
1932 * in RecordNewMultiXact() of the previous multixact, but let's be sure
1933 * the next page exists, if the nextMXact was reset with pg_resetwal for
1934 * example.
1935 *
1936 * Zero out the remainder of the page. See notes in TrimCLOG() for
1937 * background. Unlike CLOG, some WAL record covers every pg_multixact
1938 * SLRU mutation. Since, also unlike CLOG, we ignore the WAL rule "write
1939 * xlog before data," nextMXact successors may carry obsolete, nonzero
1940 * offset values.
1941 */
1942 entryno = MultiXactIdToOffsetEntry(nextMXact);
1943 {
1944 int slotno;
1947
1949 if (entryno == 0 || nextMXact == FirstMultiXactId)
1951 else
1952 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &nextMXact);
1953 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1954 offptr += entryno;
1955
1956 *offptr = offset;
1957 if (entryno != 0 && (entryno + 1) * sizeof(MultiXactOffset) != BLCKSZ)
1958 MemSet(offptr + 1, 0, BLCKSZ - (entryno + 1) * sizeof(MultiXactOffset));
1959
1960 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
1961 LWLockRelease(lock);
1962 }
1963
1964 /*
1965 * And the same for members.
1966 *
1967 * (Re-)Initialize our idea of the latest page number for members.
1968 */
1969 pageno = MXOffsetToMemberPage(offset);
1970 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1971 pageno);
1972
1973 /*
1974 * Zero out the remainder of the current members page. See notes in
1975 * TrimCLOG() for motivation.
1976 */
1978 if (flagsoff != 0)
1979 {
1981 int slotno;
1983 int memberoff;
1985
1989 xidptr = (TransactionId *)
1990 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1991
1993
1994 /*
1995 * Note: we don't need to zero out the flag bits in the remaining
1996 * members of the current group, because they are always reset before
1997 * writing.
1998 */
1999
2000 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
2001 LWLockRelease(lock);
2002 }
2003
2004 /* signal that we're officially up */
2008
2009 /* Now compute how far away the next multixid wraparound is. */
2011}
2012
2013/*
2014 * Get the MultiXact data to save in a checkpoint record
2015 */
2016void
2018 MultiXactId *nextMulti,
2019 MultiXactOffset *nextMultiOffset,
2020 MultiXactId *oldestMulti,
2021 Oid *oldestMultiDB)
2022{
2024 *nextMulti = MultiXactState->nextMXact;
2025 *nextMultiOffset = MultiXactState->nextOffset;
2026 *oldestMulti = MultiXactState->oldestMultiXactId;
2027 *oldestMultiDB = MultiXactState->oldestMultiXactDB;
2029
2031 "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
2032 *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
2033}
2034
2035/*
2036 * Perform a checkpoint --- either during shutdown, or on-the-fly
2037 */
2038void
2040{
2042
2043 /*
2044 * Write dirty MultiXact pages to disk. This may result in sync requests
2045 * queued for later handling by ProcessSyncRequests(), as part of the
2046 * checkpoint.
2047 */
2050
2052}
2053
2054/*
2055 * Set the next-to-be-assigned MultiXactId and offset
2056 *
2057 * This is used when we can determine the correct next ID/offset exactly
2058 * from a checkpoint record. Although this is only called during bootstrap
2059 * and XLog replay, we take the lock in case any hot-standby backends are
2060 * examining the values.
2061 */
2062void
2064 MultiXactOffset nextMultiOffset)
2065{
2066 Assert(MultiXactIdIsValid(nextMulti));
2067 debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
2068 nextMulti, nextMultiOffset);
2069
2071 MultiXactState->nextMXact = nextMulti;
2072 MultiXactState->nextOffset = nextMultiOffset;
2074}
2075
2076/*
2077 * Determine the last safe MultiXactId to allocate given the currently oldest
2078 * datminmxid (ie, the oldest MultiXactId that might exist in any database
2079 * of our cluster), and the OID of the (or a) database with that value.
2080 *
2081 * This also updates MultiXactState->oldestOffset, by looking up the offset of
2082 * MultiXactState->oldestMultiXactId.
2083 */
2084void
2086{
2087 MultiXactId multiVacLimit;
2088 MultiXactId multiWarnLimit;
2089 MultiXactId multiStopLimit;
2090 MultiXactId multiWrapLimit;
2092
2094
2095 /*
2096 * We pretend that a wrap will happen halfway through the multixact ID
2097 * space, but that's not really true, because multixacts wrap differently
2098 * from transaction IDs.
2099 */
2100 multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2101 if (multiWrapLimit < FirstMultiXactId)
2102 multiWrapLimit += FirstMultiXactId;
2103
2104 /*
2105 * We'll refuse to continue assigning MultiXactIds once we get within 3M
2106 * multi of data loss. See SetTransactionIdLimit.
2107 */
2108 multiStopLimit = multiWrapLimit - 3000000;
2109 if (multiStopLimit < FirstMultiXactId)
2110 multiStopLimit -= FirstMultiXactId;
2111
2112 /*
2113 * We'll start complaining loudly when we get within 100M multis of data
2114 * loss. This is kind of arbitrary, but if you let your gas gauge get
2115 * down to 5% of full, would you be looking for the next gas station? We
2116 * need to be fairly liberal about this number because there are lots of
2117 * scenarios where most transactions are done by automatic clients that
2118 * won't pay attention to warnings. (No, we're not gonna make this
2119 * configurable. If you know enough to configure it, you know enough to
2120 * not get in this kind of trouble in the first place.)
2121 */
2122 multiWarnLimit = multiWrapLimit - 100000000;
2123 if (multiWarnLimit < FirstMultiXactId)
2124 multiWarnLimit -= FirstMultiXactId;
2125
2126 /*
2127 * We'll start trying to force autovacuums when oldest_datminmxid gets to
2128 * be more than autovacuum_multixact_freeze_max_age mxids old.
2129 *
2130 * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2131 * so that we don't have to worry about dealing with on-the-fly changes in
2132 * its value. See SetTransactionIdLimit.
2133 */
2135 if (multiVacLimit < FirstMultiXactId)
2136 multiVacLimit += FirstMultiXactId;
2137
2138 /* Grab lock for just long enough to set the new limit values */
2142 MultiXactState->multiVacLimit = multiVacLimit;
2143 MultiXactState->multiWarnLimit = multiWarnLimit;
2144 MultiXactState->multiStopLimit = multiStopLimit;
2145 MultiXactState->multiWrapLimit = multiWrapLimit;
2148
2149 /* Log the info */
2151 (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2152 multiWrapLimit, oldest_datoid)));
2153
2154 /*
2155 * Computing the actual limits is only possible once the data directory is
2156 * in a consistent state. There's no need to compute the limits while
2157 * still replaying WAL - no decisions about new multis are made even
2158 * though multixact creations might be replayed. So we'll only do further
2159 * checks after TrimMultiXact() has been called.
2160 */
2162 return;
2163
2165
2166 /*
2167 * Offsets are 64-bits wide and never wrap around, so we don't need to
2168 * consider them for emergency autovacuum purposes. But now that we're in
2169 * a consistent state, determine MultiXactState->oldestOffset. It will be
2170 * used to adjust the freezing cutoff, to keep the offsets disk usage in
2171 * check.
2172 */
2174
2175 /*
2176 * If past the autovacuum force point, immediately signal an autovac
2177 * request. The reason for this is that autovac only processes one
2178 * database per invocation. Once it's finished cleaning up the oldest
2179 * database, it'll call here, and we'll signal the postmaster to start
2180 * another iteration immediately if there are still any old databases.
2181 */
2182 if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
2184
2185 /* Give an immediate warning if past the wrap warn point */
2186 if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2187 {
2188 char *oldest_datname;
2189
2190 /*
2191 * We can be called when not inside a transaction, for example during
2192 * StartupXLOG(). In such a case we cannot do database access, so we
2193 * must just report the oldest DB's OID.
2194 *
2195 * Note: it's also possible that get_database_name fails and returns
2196 * NULL, for example because the database just got dropped. We'll
2197 * still warn, even though the warning might now be unnecessary.
2198 */
2199 if (IsTransactionState())
2201 else
2203
2204 if (oldest_datname)
2206 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2207 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2208 multiWrapLimit - curMulti,
2210 multiWrapLimit - curMulti),
2211 errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
2212 (double) (multiWrapLimit - curMulti) / (MaxMultiXactId / 2) * 100),
2213 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2214 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2215 else
2217 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2218 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2219 multiWrapLimit - curMulti,
2221 multiWrapLimit - curMulti),
2222 errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
2223 (double) (multiWrapLimit - curMulti) / (MaxMultiXactId / 2) * 100),
2224 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2225 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2226 }
2227}
2228
2229/*
2230 * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2231 * and similarly nextOffset is at least minMultiOffset.
2232 *
2233 * This is used when we can determine minimum safe values from an XLog
2234 * record (either an on-line checkpoint or an mxact creation log entry).
2235 * Although this is only called during XLog replay, we take the lock in case
2236 * any hot-standby backends are examining the values.
2237 */
2238void
2258
2259/*
2260 * Update our oldestMultiXactId value, but only if it's more recent than what
2261 * we had.
2262 *
2263 * This may only be called during WAL replay.
2264 */
2265void
2266MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2267{
2269
2271 SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
2272}
2273
2274/*
2275 * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2276 *
2277 * NB: this is called while holding MultiXactGenLock. We want it to be very
2278 * fast most of the time; even when it's not so fast, no actual I/O need
2279 * happen unless we're forced to write out a dirty log or xlog page to make
2280 * room in shared memory.
2281 */
2282static void
2284{
2285 int64 pageno;
2286 LWLock *lock;
2287
2288 /*
2289 * No work except at first MultiXactId of a page. But beware: just after
2290 * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2291 */
2292 if (MultiXactIdToOffsetEntry(multi) != 0 &&
2293 multi != FirstMultiXactId)
2294 return;
2295
2296 pageno = MultiXactIdToOffsetPage(multi);
2298
2300
2301 /* Zero the page and make a WAL entry about it */
2304 pageno);
2305
2306 LWLockRelease(lock);
2307}
2308
2309/*
2310 * Make sure that MultiXactMember has room for the members of a newly-
2311 * allocated MultiXactId.
2312 *
2313 * Like the above routine, this is called while holding MultiXactGenLock;
2314 * same comments apply.
2315 */
2316static void
2318{
2319 /*
2320 * It's possible that the members span more than one page of the members
2321 * file, so we loop to ensure we consider each page. The coding is not
2322 * optimal if the members span several pages, but that seems unusual
2323 * enough to not worry much about.
2324 */
2325 while (nmembers > 0)
2326 {
2327 int flagsoff;
2328 int flagsbit;
2330
2331 /*
2332 * Only zero when at first entry of a page.
2333 */
2336 if (flagsoff == 0 && flagsbit == 0)
2337 {
2338 int64 pageno;
2339 LWLock *lock;
2340
2341 pageno = MXOffsetToMemberPage(offset);
2343
2345
2346 /* Zero the page and make a WAL entry about it */
2350
2351 LWLockRelease(lock);
2352 }
2353
2354 /* Compute the number of items till end of current page. */
2356
2357 /*
2358 * Advance to next page. OK if nmembers goes negative.
2359 */
2360 nmembers -= difference;
2361 offset += difference;
2362 }
2363}
2364
2365/*
2366 * GetOldestMultiXactId
2367 *
2368 * Return the oldest MultiXactId that's still possibly still seen as live by
2369 * any running transaction. Older ones might still exist on disk, but they no
2370 * longer have any running member transaction.
2371 *
2372 * It's not safe to truncate MultiXact SLRU segments on the value returned by
2373 * this function; however, it can be set as the new relminmxid for any table
2374 * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2375 * to truncate SLRUs when no table can possibly still have a referencing MXID.
2376 */
2379{
2381
2382 /*
2383 * This is the oldest valid value among all the OldestMemberMXactId[] and
2384 * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2385 */
2388 for (int i = 0; i < NumMemberSlots; i++)
2389 {
2391
2396 }
2397 for (int i = 0; i < NumVisibleSlots; i++)
2398 {
2400
2405 }
2406
2408
2409 return oldestMXact;
2410}
2411
2412/*
2413 * Calculate the oldest member offset and install it in MultiXactState, where
2414 * it can be used to adjust multixid freezing cutoffs.
2415 */
2416static void
2418{
2419 MultiXactId oldestMultiXactId;
2420 MultiXactId nextMXact;
2421 MultiXactOffset oldestOffset = 0; /* placate compiler */
2422 MultiXactOffset nextOffset;
2423 bool oldestOffsetKnown = false;
2424
2425 /*
2426 * NB: Have to prevent concurrent truncation, we might otherwise try to
2427 * lookup an oldestMulti that's concurrently getting truncated away.
2428 */
2430
2431 /* Read relevant fields from shared memory. */
2433 oldestMultiXactId = MultiXactState->oldestMultiXactId;
2434 nextMXact = MultiXactState->nextMXact;
2435 nextOffset = MultiXactState->nextOffset;
2438
2439 /*
2440 * Determine the offset of the oldest multixact. Normally, we can read
2441 * the offset from the multixact itself, but there's an important special
2442 * case: if there are no multixacts in existence at all, oldestMXact
2443 * obviously can't point to one. It will instead point to the multixact
2444 * ID that will be assigned the next time one is needed.
2445 */
2446 if (oldestMultiXactId == nextMXact)
2447 {
2448 /*
2449 * When the next multixact gets created, it will be stored at the next
2450 * offset.
2451 */
2452 oldestOffset = nextOffset;
2453 oldestOffsetKnown = true;
2454 }
2455 else
2456 {
2457 /*
2458 * Look up the offset at which the oldest existing multixact's members
2459 * are stored. If we cannot find it, be careful not to fail, and
2460 * leave oldestOffset unchanged. oldestOffset is initialized to zero
2461 * at system startup, which prevents truncating members until a proper
2462 * value is calculated.
2463 *
2464 * (We had bugs in early releases of PostgreSQL 9.3.X and 9.4.X where
2465 * the supposedly-earliest multixact might not really exist. Those
2466 * should be long gone by now, so this should not fail, but let's
2467 * still be defensive.)
2468 */
2470 find_multixact_start(oldestMultiXactId, &oldestOffset);
2471
2474 (errmsg_internal("oldest MultiXactId member is at offset %" PRIu64,
2475 oldestOffset)));
2476 else
2477 ereport(LOG,
2478 (errmsg("MultiXact member truncation is disabled because oldest checkpointed MultiXact %u does not exist on disk",
2479 oldestMultiXactId)));
2480 }
2481
2483
2484 /* Install the computed value */
2486 {
2488 MultiXactState->oldestOffset = oldestOffset;
2490 }
2491}
2492
2493/*
2494 * Find the starting offset of the given MultiXactId.
2495 *
2496 * Returns false if the file containing the multi does not exist on disk.
2497 * Otherwise, returns true and sets *result to the starting member offset.
2498 *
2499 * This function does not prevent concurrent truncation, so if that's
2500 * required, the caller has to protect against that.
2501 */
2502static bool
2504{
2505 MultiXactOffset offset;
2506 int64 pageno;
2507 int entryno;
2508 int slotno;
2510
2512
2513 pageno = MultiXactIdToOffsetPage(multi);
2515
2516 /*
2517 * Write out dirty data, so PhysicalPageExists can work correctly.
2518 */
2521
2523 return false;
2524
2525 /* lock is acquired by SimpleLruReadPage_ReadOnly */
2527 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2528 offptr += entryno;
2529 offset = *offptr;
2531
2532 *result = offset;
2533 return true;
2534}
2535
2536/*
2537 * GetMultiXactInfo
2538 *
2539 * Returns information about the current MultiXact state, as of:
2540 * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
2541 * nextOffset: Next-to-be-assigned offset
2542 * oldestMultiXactId: Oldest MultiXact ID still in use
2543 * oldestOffset: Oldest offset still in use
2544 */
2545void
2547 MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
2548{
2550
2552 *nextOffset = MultiXactState->nextOffset;
2553 *oldestMultiXactId = MultiXactState->oldestMultiXactId;
2555 *oldestOffset = MultiXactState->oldestOffset;
2557
2558 *multixacts = nextMultiXactId - *oldestMultiXactId;
2559}
2560
2561/*
2562 * Multixact members can be removed once the multixacts that refer to them
2563 * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2564 * vacuum_multixact_freeze_table_age work together to make sure we never have
2565 * too many multixacts; we hope that, at least under normal circumstances,
2566 * this will also be sufficient to keep us from using too many offsets.
2567 * However, if the average multixact has many members, we might accumulate a
2568 * large amount of members, consuming disk space, while still using few enough
2569 * multixids that the multixid limits fail to trigger relminmxid advancement
2570 * by VACUUM.
2571 *
2572 * To prevent that, if the members space usage exceeds a threshold
2573 * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
2574 * autovacuum_multixact_freeze_max_age to a value just less than the number of
2575 * multixacts in use. We hope that this will quickly trigger autovacuuming on
2576 * the table or tables with the oldest relminmxid, thus allowing datminmxid
2577 * values to advance and removing some members.
2578 *
2579 * As the amount of the member space in use grows, we become more aggressive
2580 * in clamping this value. That not only causes autovacuum to ramp up, but
2581 * also makes any manual vacuums the user issues more aggressive. This
2582 * happens because vacuum_get_cutoffs() will clamp the freeze table and the
2583 * minimum freeze age cutoffs based on the effective
2584 * autovacuum_multixact_freeze_max_age this function returns. At the extreme,
2585 * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we clamp
2586 * freeze_max_age to zero, and every vacuum of any table will freeze every
2587 * multixact.
2588 */
2589int
2591{
2594 double fraction;
2595 int result;
2596 MultiXactId oldestMultiXactId;
2597 MultiXactOffset oldestOffset;
2598 MultiXactOffset nextOffset;
2599 uint64 members;
2600
2601 /* Read the current offsets and multixact usage. */
2602 GetMultiXactInfo(&multixacts, &nextOffset, &oldestMultiXactId, &oldestOffset);
2603 members = nextOffset - oldestOffset;
2604
2605 /* If member space utilization is low, no special action is required. */
2606 if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
2608
2609 /*
2610 * Compute a target for relminmxid advancement. The number of multixacts
2611 * we try to eliminate from the system is based on how far we are past
2612 * MULTIXACT_MEMBER_LOW_THRESHOLD.
2613 *
2614 * The way this formula works is that when members is exactly at the low
2615 * threshold, fraction = 0.0, and we set freeze_max_age equal to
2616 * mxid_age(oldestMultiXactId). As members grows further, towards the
2617 * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
2618 * shrinks from mxid_age(oldestMultiXactId) to 0. Beyond the high
2619 * threshold, fraction > 1.0 and the result is clamped to 0.
2620 */
2623
2624 /* fraction could be > 1.0, but lowest possible freeze age is zero */
2625 if (fraction >= 1.0)
2626 return 0;
2627
2630
2631 /*
2632 * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
2633 * autovacuum less aggressive than it would otherwise be.
2634 */
2636}
2637
2638
2639/*
2640 * Delete members segments older than newOldestOffset
2641 */
2642static void
2648
2649/*
2650 * Delete offsets segments older than newOldestMulti
2651 */
2652static void
2654{
2655 /*
2656 * We step back one multixact to avoid passing a cutoff page that hasn't
2657 * been created yet in the rare case that oldestMulti would be the first
2658 * item on a page and oldestMulti == nextMulti. In that case, if we
2659 * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
2660 * detection.
2661 */
2664}
2665
2666/*
2667 * Remove all MultiXactOffset and MultiXactMember segments before the oldest
2668 * ones still of interest.
2669 *
2670 * This is only called on a primary as part of vacuum (via
2671 * vac_truncate_clog()). During recovery truncation is done by replaying
2672 * truncation WAL records logged here.
2673 *
2674 * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
2675 * is one of the databases preventing newOldestMulti from increasing.
2676 */
2677void
2679{
2680 MultiXactId oldestMulti;
2681 MultiXactId nextMulti;
2683 MultiXactOffset nextOffset;
2684
2688
2689 /*
2690 * We can only allow one truncation to happen at once. Otherwise parts of
2691 * members might vanish while we're doing lookups or similar. There's no
2692 * need to have an interlock with creating new multis or such, since those
2693 * are constrained by the limits (which only grow, never shrink).
2694 */
2696
2698 nextMulti = MultiXactState->nextMXact;
2699 nextOffset = MultiXactState->nextOffset;
2700 oldestMulti = MultiXactState->oldestMultiXactId;
2702
2703 /*
2704 * Make sure to only attempt truncation if there's values to truncate
2705 * away. In normal processing values shouldn't go backwards, but there's
2706 * some corner cases (due to bugs) where that's possible.
2707 */
2709 {
2711 return;
2712 }
2713
2714 /*
2715 * Compute up to where to truncate MultiXactMember. Lookup the
2716 * corresponding member offset for newOldestMulti for that.
2717 */
2718 if (newOldestMulti == nextMulti)
2719 {
2720 /* there are NO MultiXacts */
2721 newOldestOffset = nextOffset;
2722 }
2724 {
2725 ereport(LOG,
2726 (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
2727 newOldestMulti)));
2729 return;
2730 }
2731
2732 /*
2733 * On crash, MultiXactIdCreateFromMembers() can leave behind multixids
2734 * that were not yet written out and hence have zero offset on disk. If
2735 * such a multixid becomes oldestMulti, we won't be able to look up its
2736 * offset. That should be rare, so we don't try to do anything smart about
2737 * it. Just skip the truncation, and hope that by the next truncation
2738 * attempt, oldestMulti has advanced to a valid multixid.
2739 */
2740 if (newOldestOffset == 0)
2741 {
2742 ereport(LOG,
2743 (errmsg("cannot truncate up to MultiXact %u because it has invalid offset, skipping truncation",
2744 newOldestMulti)));
2746 return;
2747 }
2748
2749 elog(DEBUG1, "performing multixact truncation: "
2750 "oldestMulti %u (offsets segment %" PRIx64 "), "
2751 "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2756
2757 /*
2758 * Do truncation, and the WAL logging of the truncation, in a critical
2759 * section. That way offsets/members cannot get out of sync anymore, i.e.
2760 * once consistent the newOldestMulti will always exist in members, even
2761 * if we crashed in the wrong moment.
2762 */
2764
2765 /*
2766 * Prevent checkpoints from being scheduled concurrently. This is critical
2767 * because otherwise a truncation record might not be replayed after a
2768 * crash/basebackup, even though the state of the data directory would
2769 * require it.
2770 */
2773
2774 /* WAL log truncation */
2776
2777 /*
2778 * Update in-memory limits before performing the truncation, while inside
2779 * the critical section: Have to do it before truncation, to prevent
2780 * concurrent lookups of those values. Has to be inside the critical
2781 * section as otherwise a future call to this function would error out,
2782 * while looking up the oldest member in offsets, if our caller crashes
2783 * before updating the limits.
2784 */
2790
2791 /* First truncate members */
2793
2794 /* Then offsets */
2796
2798
2801}
2802
2803/*
2804 * Decide whether a MultiXactOffset page number is "older" for truncation
2805 * purposes. Analogous to CLOGPagePrecedes().
2806 *
2807 * Offsetting the values is optional, because MultiXactIdPrecedes() has
2808 * translational symmetry.
2809 */
2810static bool
2825
2826/*
2827 * Decide whether a MultiXactMember page number is "older" for truncation
2828 * purposes. There is no "invalid offset number" and members never wrap
2829 * around, so use the numbers verbatim.
2830 */
2831static bool
2836
2837static int
2839{
2841
2842 return errdetail("Could not access offset of multixact %u.", multixid);
2843}
2844
2845static int
2847{
2849
2850 if (MultiXactIdIsValid(context->multi))
2851 return errdetail("Could not access member of multixact %u at offset %" PRIu64 ".",
2852 context->multi, context->offset);
2853 else
2854 return errdetail("Could not access multixact member at offset %" PRIu64 ".",
2855 context->offset);
2856}
2857
2858/*
2859 * Decide which of two MultiXactIds is earlier.
2860 *
2861 * XXX do we need to do something special for InvalidMultiXactId?
2862 * (Doesn't look like it.)
2863 */
2864bool
2866{
2867 int32 diff = (int32) (multi1 - multi2);
2868
2869 return (diff < 0);
2870}
2871
2872/*
2873 * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
2874 *
2875 * XXX do we need to do something special for InvalidMultiXactId?
2876 * (Doesn't look like it.)
2877 */
2878bool
2880{
2881 int32 diff = (int32) (multi1 - multi2);
2882
2883 return (diff <= 0);
2884}
2885
2886
2887/*
2888 * Write a TRUNCATE xlog record
2889 *
2890 * We must flush the xlog record to disk before returning --- see notes in
2891 * TruncateCLOG().
2892 */
2893static void
2895 MultiXactId oldestMulti,
2896 MultiXactOffset oldestOffset)
2897{
2900
2901 xlrec.oldestMultiDB = oldestMultiDB;
2902 xlrec.oldestMulti = oldestMulti;
2903 xlrec.oldestOffset = oldestOffset;
2904
2909}
2910
2911/*
2912 * MULTIXACT resource manager's routines
2913 */
2914void
2916{
2917 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2918
2919 /* Backup blocks are not used in multixact records */
2921
2922 if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
2923 {
2924 int64 pageno;
2925
2926 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
2928 }
2929 else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
2930 {
2931 int64 pageno;
2932
2933 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
2935 }
2936 else if (info == XLOG_MULTIXACT_CREATE_ID)
2937 {
2941 int i;
2942
2943 /* Store the data back into the SLRU files */
2944 RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
2945 xlrec->members);
2946
2947 /* Make sure nextMXact/nextOffset are beyond what this record has */
2949 xlrec->moff + xlrec->nmembers);
2950
2951 /*
2952 * Make sure nextXid is beyond any XID mentioned in the record. This
2953 * should be unnecessary, since any XID found here ought to have other
2954 * evidence in the XLOG, but let's be safe.
2955 */
2956 max_xid = XLogRecGetXid(record);
2957 for (i = 0; i < xlrec->nmembers; i++)
2958 {
2959 if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
2960 max_xid = xlrec->members[i].xid;
2961 }
2962
2964 }
2965 else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
2966 {
2968
2969 memcpy(&xlrec, XLogRecGetData(record),
2971
2972 elog(DEBUG1, "replaying multixact truncation: "
2973 "oldestMulti %u (offsets segment %" PRIx64 "), "
2974 "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2975 xlrec.oldestMulti,
2976 MultiXactIdToOffsetSegment(xlrec.oldestMulti),
2977 xlrec.oldestOffset,
2978 MXOffsetToMemberSegment(xlrec.oldestOffset));
2979
2980 /* should not be required, but more than cheap enough */
2982
2983 /*
2984 * Advance the horizon values, so they're current at the end of
2985 * recovery.
2986 */
2987 SetMultiXactIdLimit(xlrec.oldestMulti, xlrec.oldestMultiDB);
2988
2989 PerformMembersTruncation(xlrec.oldestOffset);
2990 PerformOffsetsTruncation(xlrec.oldestMulti);
2991
2993 }
2994 else
2995 elog(PANIC, "multixact_redo: unknown op code %u", info);
2996}
2997
2998/*
2999 * Entrypoint for sync.c to sync offsets files.
3000 */
3001int
3002multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
3003{
3004 return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
3005}
3006
3007/*
3008 * Entrypoint for sync.c to sync members files.
3009 */
3010int
3011multixactmemberssyncfiletag(const FileTag *ftag, char *path)
3012{
3013 return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
3014}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
int autovacuum_multixact_freeze_max_age
Definition autovacuum.c:136
static int32 next
Definition blutils.c:225
#define Min(x, y)
Definition c.h:1091
uint8_t uint8
Definition c.h:622
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
TransactionId MultiXactId
Definition c.h:746
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:558
uint64 MultiXactOffset
Definition c.h:748
int32_t int32
Definition c.h:620
uint64_t uint64
Definition c.h:625
uint16_t uint16
Definition c.h:623
uint32_t uint32
Definition c.h:624
#define MemSet(start, val, len)
Definition c.h:1107
uint32 TransactionId
Definition c.h:736
size_t Size
Definition c.h:689
uint32 result
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
Datum arg
Definition elog.c:1322
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:32
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:37
int int int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:30
#define PANIC
Definition elog.h:44
#define DEBUG1
Definition elog.h:31
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
#define palloc_array(type, count)
Definition fe_memutils.h:76
Datum difference(PG_FUNCTION_ARGS)
int multixact_offset_buffers
Definition globals.c:166
ProcNumber MyProcNumber
Definition globals.c:92
bool IsUnderPostmaster
Definition globals.c:122
int multixact_member_buffers
Definition globals.c:165
int MaxBackends
Definition globals.c:149
#define newval
GucSource
Definition guc.h:112
const char * str
#define dclist_container(type, membername, ptr)
Definition ilist.h:947
static uint32 dclist_count(const dclist_head *head)
Definition ilist.h:932
static void dclist_move_head(dclist_head *head, dlist_node *node)
Definition ilist.h:808
static dlist_node * dclist_tail_node(dclist_head *head)
Definition ilist.h:920
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition ilist.h:763
#define DCLIST_STATIC_INIT(name)
Definition ilist.h:282
static void dclist_push_head(dclist_head *head, dlist_node *node)
Definition ilist.h:693
static void dclist_init(dclist_head *head)
Definition ilist.h:671
#define dclist_foreach(iter, lhead)
Definition ilist.h:970
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
int j
Definition isn.c:78
int i
Definition isn.c:77
char * get_database_name(Oid dbid)
Definition lsyscache.c:1312
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition mcxt.c:1768
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
MemoryContext TopTransactionContext
Definition mcxt.c:171
void pfree(void *pointer)
Definition mcxt.c:1616
MemoryContext TopMemoryContext
Definition mcxt.c:166
void * palloc(Size size)
Definition mcxt.c:1387
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition memutils.h:170
#define START_CRIT_SECTION()
Definition miscadmin.h:152
#define END_CRIT_SECTION()
Definition miscadmin.h:154
#define NumMemberSlots
Definition multixact.c:221
static MultiXactId PreviousMultiXactId(MultiXactId multi)
Definition multixact.c:109
static SlruDesc MultiXactMemberSlruDesc
Definition multixact.c:123
static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2)
Definition multixact.c:2832
static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
Definition multixact.c:979
static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
Definition multixact.c:1482
MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
Definition multixact.c:411
static void MultiXactShmemInit(void *arg)
Definition multixact.c:1817
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
Definition multixact.c:2317
void ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
Definition multixact.c:696
static MultiXactId * MyOldestVisibleMXactIdSlot(void)
Definition multixact.c:268
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2865
char * mxstatus_to_string(MultiXactStatus status)
Definition multixact.c:1574
void multixact_redo(XLogReaderState *record)
Definition multixact.c:2915
static void PerformOffsetsTruncation(MultiXactId newOldestMulti)
Definition multixact.c:2653
void multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1740
#define debug_elog5(a, b, c, d, e)
Definition multixact.c:313
static void MultiXactIdSetOldestVisible(void)
Definition multixact.c:646
int multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
Definition multixact.c:3002
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result)
Definition multixact.c:2503
void PostPrepare_MultiXact(FullTransactionId fxid)
Definition multixact.c:1670
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition multixact.c:2063
#define MultiXactMemberCtl
Definition multixact.c:126
static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId endTruncOff, MultiXactOffset endTruncMemb)
Definition multixact.c:2894
static SlruDesc MultiXactOffsetSlruDesc
Definition multixact.c:122
void AtPrepare_MultiXact(void)
Definition multixact.c:1656
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2879
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition multixact.c:2266
static void mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition multixact.c:1529
void GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *nextOffset, MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
Definition multixact.c:2546
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition multixact.c:522
static void MultiXactShmemRequest(void *arg)
Definition multixact.c:1766
void MultiXactIdSetOldestMember(void)
Definition multixact.c:596
#define MULTIXACT_MEMBER_LOW_THRESHOLD
Definition multixact.c:99
static MemoryContext MXactContext
Definition multixact.c:301
static MultiXactId * OldestVisibleMXactId
Definition multixact.c:227
static int mxactMemberComparator(const void *arg1, const void *arg2)
Definition multixact.c:1409
static void ExtendMultiXactOffset(MultiXactId multi)
Definition multixact.c:2283
#define MultiXactOffsetCtl
Definition multixact.c:125
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition multixact.c:2017
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members)
Definition multixact.c:816
int multixactmemberssyncfiletag(const FileTag *ftag, char *path)
Definition multixact.c:3011
#define MAX_CACHE_ENTRIES
Definition multixact.c:299
static MultiXactId NextMultiXactId(MultiXactId multi)
Definition multixact.c:103
MultiXactId GetOldestMultiXactId(void)
Definition multixact.c:2378
void CheckPointMultiXact(void)
Definition multixact.c:2039
static int MultiXactOffsetIoErrorDetail(const void *opaque_data)
Definition multixact.c:2838
static MultiXactId * PreparedXactOldestMemberMXactIdSlot(ProcNumber procno)
Definition multixact.c:252
MultiXactId MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
Definition multixact.c:715
static void MultiXactShmemAttach(void *arg)
Definition multixact.c:1832
static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members)
Definition multixact.c:1439
static dclist_head MXactCache
Definition multixact.c:300
void TrimMultiXact(void)
Definition multixact.c:1904
#define debug_elog3(a, b, c)
Definition multixact.c:311
char * mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition multixact.c:1597
#define debug_elog4(a, b, c, d)
Definition multixact.c:312
void multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1755
static void PerformMembersTruncation(MultiXactOffset newOldestOffset)
Definition multixact.c:2643
static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
Definition multixact.c:2811
static MultiXactId * MyOldestMemberMXactIdSlot(void)
Definition multixact.c:240
int MultiXactMemberFreezeThreshold(void)
Definition multixact.c:2590
static void SetOldestOffset(void)
Definition multixact.c:2417
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition multixact.c:2239
static MultiXactId * OldestMemberMXactId
Definition multixact.c:226
static MultiXactStateData * MultiXactState
Definition multixact.c:225
#define NumVisibleSlots
Definition multixact.c:222
MultiXactId ReadNextMultiXactId(void)
Definition multixact.c:679
void BootStrapMultiXact(void)
Definition multixact.c:1863
#define debug_elog6(a, b, c, d, e, f)
Definition multixact.c:314
void multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1719
static int MultiXactMemberIoErrorDetail(const void *opaque_data)
Definition multixact.c:2846
MultiXactId MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, TransactionId xid2, MultiXactStatus status2)
Definition multixact.c:358
void TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
Definition multixact.c:2678
bool check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
Definition multixact.c:1843
bool check_multixact_member_buffers(int *newval, void **extra, GucSource source)
Definition multixact.c:1852
const ShmemCallbacks MultiXactShmemCallbacks
Definition multixact.c:233
void AtEOXact_MultiXact(void)
Definition multixact.c:1628
#define MULTIXACT_MEMBER_HIGH_THRESHOLD
Definition multixact.c:100
#define debug_elog2(a, b)
Definition multixact.c:310
void StartupMultiXact(void)
Definition multixact.c:1879
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition multixact.c:2085
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition multixact.c:1172
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define XLOG_MULTIXACT_ZERO_MEM_PAGE
Definition multixact.h:68
#define XLOG_MULTIXACT_ZERO_OFF_PAGE
Definition multixact.h:67
#define FirstMultiXactId
Definition multixact.h:26
MultiXactStatus
Definition multixact.h:37
@ MultiXactStatusForShare
Definition multixact.h:39
@ MultiXactStatusForNoKeyUpdate
Definition multixact.h:40
@ MultiXactStatusNoKeyUpdate
Definition multixact.h:43
@ MultiXactStatusUpdate
Definition multixact.h:45
@ MultiXactStatusForUpdate
Definition multixact.h:41
@ MultiXactStatusForKeyShare
Definition multixact.h:38
#define ISUPDATE_from_mxstatus(status)
Definition multixact.h:51
#define InvalidMultiXactId
Definition multixact.h:25
#define XLOG_MULTIXACT_TRUNCATE_ID
Definition multixact.h:70
#define SizeOfMultiXactCreate
Definition multixact.h:80
#define SizeOfMultiXactTruncate
Definition multixact.h:93
#define XLOG_MULTIXACT_CREATE_ID
Definition multixact.h:69
#define MaxMultiXactId
Definition multixact.h:27
static int64 MultiXactIdToOffsetSegment(MultiXactId multi)
static int64 MXOffsetToMemberSegment(MultiXactOffset offset)
#define MXACT_MEMBER_BITS_PER_XACT
static int MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
#define MXACT_MEMBER_XACT_BITMASK
static int64 MXOffsetToMemberPage(MultiXactOffset32 offset)
#define MULTIXACT_OFFSETS_PER_PAGE
static int MXOffsetToMemberOffset(MultiXactOffset32 offset)
static int MultiXactIdToOffsetEntry(MultiXactId multi)
static int64 MultiXactIdToOffsetPage(MultiXactId multi)
#define MULTIXACT_MEMBERS_PER_PAGE
static int MXOffsetToFlagsOffset(MultiXactOffset32 offset)
static char * errmsg
#define ERRCODE_DATA_CORRUPTED
const void size_t len
static rewind_source * source
Definition pg_rewind.c:89
static char buf[DEFAULT_XLOG_SEG_SIZE]
void SendPostmasterSignal(PMSignalReason reason)
Definition pmsignal.c:164
@ PMSIGNAL_START_AUTOVAC_LAUNCHER
Definition pmsignal.h:39
#define qsort(a, b, c, d)
Definition port.h:495
unsigned int Oid
static int fb(int x)
#define FIRST_PREPARED_XACT_PROC_NUMBER
Definition proc.h:529
#define DELAY_CHKPT_START
Definition proc.h:139
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1393
int ProcNumber
Definition procnumber.h:24
Size add_size(Size s1, Size s2)
Definition shmem.c:1048
Size mul_size(Size s1, Size s2)
Definition shmem.c:1063
#define ShmemRequestStruct(...)
Definition shmem.h:176
int SimpleLruReadPage_ReadOnly(SlruDesc *ctl, int64 pageno, const void *opaque_data)
Definition slru.c:654
void SimpleLruTruncate(SlruDesc *ctl, int64 cutoffPage)
Definition slru.c:1458
void SimpleLruZeroAndWritePage(SlruDesc *ctl, int64 pageno)
Definition slru.c:466
int SimpleLruZeroPage(SlruDesc *ctl, int64 pageno)
Definition slru.c:397
bool SimpleLruDoesPhysicalPageExist(SlruDesc *ctl, int64 pageno)
Definition slru.c:795
void SimpleLruWriteAll(SlruDesc *ctl, bool allow_redirtied)
Definition slru.c:1372
int SimpleLruReadPage(SlruDesc *ctl, int64 pageno, bool write_ok, const void *opaque_data)
Definition slru.c:550
bool check_slru_buffers(const char *name, int *newval)
Definition slru.c:377
int SlruSyncFileTag(SlruDesc *ctl, const FileTag *ftag, char *path)
Definition slru.c:1884
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition slru.h:233
#define SimpleLruRequest(...)
Definition slru.h:218
static LWLock * SimpleLruGetBankLock(SlruDesc *ctl, int64 pageno)
Definition slru.h:207
PGPROC * MyProc
Definition proc.c:71
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
Definition sync.h:51
TransactionId xid
Definition multixact.h:57
MultiXactStatus status
Definition multixact.h:58
MultiXactId multiWrapLimit
Definition multixact.c:164
MultiXactId multiStopLimit
Definition multixact.c:163
MultiXactId multiWarnLimit
Definition multixact.c:162
MultiXactId multiVacLimit
Definition multixact.c:161
MultiXactOffset nextOffset
Definition multixact.c:141
MultiXactId nextMXact
Definition multixact.c:138
MultiXactId oldestMultiXactId
Definition multixact.c:151
MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER]
Definition multixact.c:215
MultiXactOffset oldestOffset
Definition multixact.c:158
int delayChkptFlags
Definition proc.h:260
ShmemRequestCallback request_fn
Definition shmem.h:133
dlist_node * cur
Definition ilist.h:179
MultiXactId multi
Definition multixact.c:293
dlist_node node
Definition multixact.c:295
MultiXactMember members[FLEXIBLE_ARRAY_MEMBER]
Definition multixact.c:296
@ SYNC_HANDLER_MULTIXACT_MEMBER
Definition sync.h:41
@ SYNC_HANDLER_MULTIXACT_OFFSET
Definition sync.h:40
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
ProcNumber TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
Definition twophase.c:914
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition twophase.c:1277
#define TWOPHASE_RM_MULTIXACT_ID
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition varsup.c:299
const char * name
bool IsTransactionState(void)
Definition xact.c:389
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:943
bool RecoveryInProgress(void)
Definition xlog.c:6830
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2801
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
Definition xloginsert.c:547
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:482
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:372
void XLogBeginInsert(void)
Definition xloginsert.c:153
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:410
#define XLogRecGetData(decoder)
Definition xlogreader.h:415
#define XLogRecGetXid(decoder)
Definition xlogreader.h:412
#define XLogRecHasAnyBlockRefs(decoder)
Definition xlogreader.h:417
bool InRecovery
Definition xlogutils.c:50