PostgreSQL Source Code git master
Loading...
Searching...
No Matches
multixact.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * multixact.c
4 * PostgreSQL multi-transaction-log manager
5 *
6 * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 * TransactionId and a set of flag bits. The name is a bit historical:
10 * originally, a MultiXactId consisted of more than one TransactionId (except
11 * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 * legitimate to have MultiXactIds that only include a single Xid.
13 *
14 * The meaning of the flag bits is opaque to this module, but they are mostly
15 * used in heapam.c to identify lock modes that each of the member transactions
16 * is holding on any given tuple. This module just contains support to store
17 * and retrieve the arrays.
18 *
19 * We use two SLRU areas, one for storing the offsets at which the data
20 * starts for each MultiXactId in the other one. This trick allows us to
21 * store variable length arrays of TransactionIds. (We could alternatively
22 * use one area containing counts and TransactionIds, with valid MultiXactId
23 * values pointing at slots containing counts; but that way seems less robust
24 * since it would get completely confused if someone inquired about a bogus
25 * MultiXactId that pointed to an intermediate slot containing an XID.)
26 *
27 * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 * MEMBERs page is initialized to zeroes, as well as an
29 * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 * This module ignores the WAL rule "write xlog before data," because it
31 * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 * rule. The only way for the MXID to be referenced from any data page is for
33 * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 * an XLOG record that must follow ours. The normal LSN interlock between the
35 * data page and that XLOG record will ensure that our XLOG record reaches
36 * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 * module's XLOG records completely rebuild the data entered since the last
40 * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 * before each checkpoint is considered complete.
42 *
43 * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 * crashes and ensure that MXID and offset numbering increases monotonically
45 * across a crash. We do this in the same way as it's done for transaction
46 * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 * could need to worry about, and we just make sure that at the end of
48 * replay, the next-MXID and next-offset counters are at least as large as
49 * anything we saw during replay.
50 *
51 * We are able to remove segments no longer necessary by carefully tracking
52 * each table's used values: during vacuum, any multixact older than a certain
53 * value is removed; the cutoff value is stored in pg_class. The minimum value
54 * across all tables in each database is stored in pg_database, and the global
55 * minimum across all databases is part of pg_control and is kept in shared
56 * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 *
58 * When new multixactid values are to be created, care is taken that the
59 * counter does not fall within the wraparound horizon considering the global
60 * minimum value.
61 *
62 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
63 * Portions Copyright (c) 1994, Regents of the University of California
64 *
65 * src/backend/access/transam/multixact.c
66 *
67 *-------------------------------------------------------------------------
68 */
69#include "postgres.h"
70
71#include "access/multixact.h"
73#include "access/slru.h"
74#include "access/twophase.h"
76#include "access/xlog.h"
77#include "access/xloginsert.h"
78#include "access/xlogutils.h"
79#include "miscadmin.h"
80#include "pg_trace.h"
81#include "pgstat.h"
83#include "storage/pmsignal.h"
84#include "storage/proc.h"
85#include "storage/procarray.h"
86#include "utils/guc_hooks.h"
88#include "utils/lsyscache.h"
89#include "utils/memutils.h"
90
91
92/*
93 * Thresholds used to keep members disk usage in check when multixids have a
94 * lot of members. When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
95 * starts freezing multixids more aggressively, even if the normal multixid
96 * age limits haven't been reached yet.
97 */
98#define MULTIXACT_MEMBER_LOW_THRESHOLD UINT64CONST(2000000000)
99#define MULTIXACT_MEMBER_HIGH_THRESHOLD UINT64CONST(4000000000)
100
101static inline MultiXactId
103{
104 return multi == MaxMultiXactId ? FirstMultiXactId : multi + 1;
105}
106
107static inline MultiXactId
109{
110 return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
111}
112
113/*
114 * Links to shared-memory data structures for MultiXact control
115 */
118
119#define MultiXactOffsetCtl (&MultiXactOffsetCtlData)
120#define MultiXactMemberCtl (&MultiXactMemberCtlData)
121
122/*
123 * MultiXact state shared across all backends. All this state is protected
124 * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
125 * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
126 * concurrency's sake, we avoid holding more than one of these locks at a
127 * time.)
128 */
129typedef struct MultiXactStateData
130{
131 /* next-to-be-assigned MultiXactId */
133
134 /* next-to-be-assigned offset */
136
137 /* Have we completed multixact startup? */
139
140 /*
141 * Oldest multixact that is still potentially referenced by a relation.
142 * Anything older than this should not be consulted. These values are
143 * updated by vacuum.
144 */
147
148 /*
149 * Oldest multixact offset that is potentially referenced by a multixact
150 * referenced by a relation.
151 */
153
154 /* support for anti-wraparound measures */
159
160 /*
161 * Per-backend data starts here. We have two arrays stored in the area
162 * immediately following the MultiXactStateData struct:
163 *
164 * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
165 * transaction(s) could possibly be a member of, or InvalidMultiXactId
166 * when the backend has no live transaction that could possibly be a
167 * member of a MultiXact. Each backend sets its entry to the current
168 * nextMXact counter just before first acquiring a shared lock in a given
169 * transaction, and clears it at transaction end. (This works because only
170 * during or after acquiring a shared lock could an XID possibly become a
171 * member of a MultiXact, and that MultiXact would have to be created
172 * during or after the lock acquisition.)
173 *
174 * In the OldestMemberMXactId array, there's a slot for all normal
175 * backends (0..MaxBackends-1) followed by a slot for max_prepared_xacts
176 * prepared transactions.
177 *
178 * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
179 * current transaction(s) think is potentially live, or InvalidMultiXactId
180 * when not in a transaction or not in a transaction that's paid any
181 * attention to MultiXacts yet. This is computed when first needed in a
182 * given transaction, and cleared at transaction end. We can compute it
183 * as the minimum of the valid OldestMemberMXactId[] entries at the time
184 * we compute it (using nextMXact if none are valid). Each backend is
185 * required not to attempt to access any SLRU data for MultiXactIds older
186 * than its own OldestVisibleMXactId[] setting; this is necessary because
187 * the relevant SLRU data can be concurrently truncated away.
188 *
189 * In the OldestVisibleMXactId array, there's a slot for all normal
190 * backends (0..MaxBackends-1) only. No slots for prepared transactions.
191 *
192 * The oldest valid value among all of the OldestMemberMXactId[] and
193 * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
194 * possible value still having any live member transaction -- OldestMxact.
195 * Any value older than that is typically removed from tuple headers, or
196 * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
197 * remove an individual MultiXact xmax whose value is >= its OldestMxact
198 * cutoff, though typically only when no individual member XID is still
199 * running. See FreezeMultiXactId for full details.
200 *
201 * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
202 * or the oldest extant Multi remaining in the table is used as the new
203 * pg_class.relminmxid value (whichever is earlier). The minimum of all
204 * relminmxid values in each database is stored in pg_database.datminmxid.
205 * In turn, the minimum of all of those values is stored in pg_control.
206 * This is used as the truncation point for pg_multixact when unneeded
207 * segments get removed by vac_truncate_clog() during vacuuming.
208 */
211
212/*
213 * Sizes of OldestMemberMXactId and OldestVisibleMXactId arrays.
214 */
215#define NumMemberSlots (MaxBackends + max_prepared_xacts)
216#define NumVisibleSlots MaxBackends
217
218/* Pointers to the state data in shared memory */
222
223
224static inline MultiXactId *
226{
227 /*
228 * The first MaxBackends entries in the OldestMemberMXactId array are
229 * reserved for regular backends. MyProcNumber should index into one of
230 * them.
231 */
234}
235
236static inline MultiXactId *
238{
240
243
244 /*
245 * The first MaxBackends entries in the OldestMemberMXactId array are
246 * reserved for regular backends. Prepared xacts come after them.
247 */
250}
251
252static inline MultiXactId *
258
259/*
260 * Definitions for the backend-local MultiXactId cache.
261 *
262 * We use this cache to store known MultiXacts, so we don't need to go to
263 * SLRU areas every time.
264 *
265 * The cache lasts for the duration of a single transaction, the rationale
266 * for this being that most entries will contain our own TransactionId and
267 * so they will be uninteresting by the time our next transaction starts.
268 * (XXX not clear that this is correct --- other members of the MultiXact
269 * could hang around longer than we did. However, it's not clear what a
270 * better policy for flushing old cache entries would be.) FIXME actually
271 * this is plain wrong now that multixact's may contain update Xids.
272 *
273 * We allocate the cache entries in a memory context that is deleted at
274 * transaction end, so we don't need to do retail freeing of entries.
275 */
283
284#define MAX_CACHE_ENTRIES 256
287
288#ifdef MULTIXACT_DEBUG
289#define debug_elog2(a,b) elog(a,b)
290#define debug_elog3(a,b,c) elog(a,b,c)
291#define debug_elog4(a,b,c,d) elog(a,b,c,d)
292#define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
293#define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
294#else
295#define debug_elog2(a,b)
296#define debug_elog3(a,b,c)
297#define debug_elog4(a,b,c,d)
298#define debug_elog5(a,b,c,d,e)
299#define debug_elog6(a,b,c,d,e,f)
300#endif
301
302/* internal MultiXactId management */
303static void MultiXactIdSetOldestVisible(void);
304static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
305 int nmembers, MultiXactMember *members);
306static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
307
308/* MultiXact cache management */
309static int mxactMemberComparator(const void *arg1, const void *arg2);
310static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
311static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
312static void mXactCachePut(MultiXactId multi, int nmembers,
313 MultiXactMember *members);
314
315/* management of SLRU infrastructure */
316
317/* opaque_data type for MultiXactMemberIoErrorDetail */
323
326static int MultiXactOffsetIoErrorDetail(const void *opaque_data);
327static int MultiXactMemberIoErrorDetail(const void *opaque_data);
328static void ExtendMultiXactOffset(MultiXactId multi);
329static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
330static void SetOldestOffset(void);
331static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
332static void WriteMTruncateXlogRec(Oid oldestMultiDB,
335
336
337/*
338 * MultiXactIdCreate
339 * Construct a MultiXactId representing two TransactionIds.
340 *
341 * The two XIDs must be different, or be requesting different statuses.
342 *
343 * NB - we don't worry about our local MultiXactId cache here, because that
344 * is handled by the lower-level routines.
345 */
349{
351 MultiXactMember members[2];
352
355
357
358 /* MultiXactIdSetOldestMember() must have been called already. */
360
361 /*
362 * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
363 * are still running. In typical usage, xid2 will be our own XID and the
364 * caller just did a check on xid1, so it'd be wasted effort.
365 */
366
367 members[0].xid = xid1;
368 members[0].status = status1;
369 members[1].xid = xid2;
370 members[1].status = status2;
371
373
374 debug_elog3(DEBUG2, "Create: %s",
375 mxid_to_string(newMulti, 2, members));
376
377 return newMulti;
378}
379
380/*
381 * MultiXactIdExpand
382 * Add a TransactionId to a pre-existing MultiXactId.
383 *
384 * If the TransactionId is already a member of the passed MultiXactId with the
385 * same status, just return it as-is.
386 *
387 * Note that we do NOT actually modify the membership of a pre-existing
388 * MultiXactId; instead we create a new one. This is necessary to avoid
389 * a race condition against code trying to wait for one MultiXactId to finish;
390 * see notes in heapam.c.
391 *
392 * NB - we don't worry about our local MultiXactId cache here, because that
393 * is handled by the lower-level routines.
394 *
395 * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
396 * one upgraded by pg_upgrade from a cluster older than this feature) are not
397 * passed in.
398 */
401{
403 MultiXactMember *members;
405 int nmembers;
406 int i;
407 int j;
408
411
412 /* MultiXactIdSetOldestMember() must have been called already. */
414
415 debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
416 multi, xid, mxstatus_to_string(status));
417
418 /*
419 * Note: we don't allow for old multis here. The reason is that the only
420 * caller of this function does a check that the multixact is no longer
421 * running.
422 */
423 nmembers = GetMultiXactIdMembers(multi, &members, false, false);
424
425 if (nmembers < 0)
426 {
427 MultiXactMember member;
428
429 /*
430 * The MultiXactId is obsolete. This can only happen if all the
431 * MultiXactId members stop running between the caller checking and
432 * passing it to us. It would be better to return that fact to the
433 * caller, but it would complicate the API and it's unlikely to happen
434 * too often, so just deal with it by creating a singleton MultiXact.
435 */
436 member.xid = xid;
437 member.status = status;
439
440 debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
441 multi, newMulti);
442 return newMulti;
443 }
444
445 /*
446 * If the TransactionId is already a member of the MultiXactId with the
447 * same status, just return the existing MultiXactId.
448 */
449 for (i = 0; i < nmembers; i++)
450 {
451 if (TransactionIdEquals(members[i].xid, xid) &&
452 (members[i].status == status))
453 {
454 debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
455 xid, multi);
456 pfree(members);
457 return multi;
458 }
459 }
460
461 /*
462 * Determine which of the members of the MultiXactId are still of
463 * interest. This is any running transaction, and also any transaction
464 * that grabbed something stronger than just a lock and was committed. (An
465 * update that aborted is of no interest here; and having more than one
466 * update Xid in a multixact would cause errors elsewhere.)
467 *
468 * Removing dead members is not just an optimization: freezing of tuples
469 * whose Xmax are multis depends on this behavior.
470 *
471 * Note we have the same race condition here as above: j could be 0 at the
472 * end of the loop.
473 */
474 newMembers = palloc_array(MultiXactMember, nmembers + 1);
475
476 for (i = 0, j = 0; i < nmembers; i++)
477 {
478 if (TransactionIdIsInProgress(members[i].xid) ||
479 (ISUPDATE_from_mxstatus(members[i].status) &&
480 TransactionIdDidCommit(members[i].xid)))
481 {
482 newMembers[j].xid = members[i].xid;
483 newMembers[j++].status = members[i].status;
484 }
485 }
486
487 newMembers[j].xid = xid;
488 newMembers[j++].status = status;
490
491 pfree(members);
493
494 debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
495
496 return newMulti;
497}
498
499/*
500 * MultiXactIdIsRunning
501 * Returns whether a MultiXactId is "running".
502 *
503 * We return true if at least one member of the given MultiXactId is still
504 * running. Note that a "false" result is certain not to change,
505 * because it is not legal to add members to an existing MultiXactId.
506 *
507 * Caller is expected to have verified that the multixact does not come from
508 * a pg_upgraded share-locked tuple.
509 */
510bool
512{
513 MultiXactMember *members;
514 int nmembers;
515 int i;
516
517 debug_elog3(DEBUG2, "IsRunning %u?", multi);
518
519 /*
520 * "false" here means we assume our callers have checked that the given
521 * multi cannot possibly come from a pg_upgraded database.
522 */
523 nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
524
525 if (nmembers <= 0)
526 {
527 debug_elog2(DEBUG2, "IsRunning: no members");
528 return false;
529 }
530
531 /*
532 * Checking for myself is cheap compared to looking in shared memory;
533 * return true if any live subtransaction of the current top-level
534 * transaction is a member.
535 *
536 * This is not needed for correctness, it's just a fast path.
537 */
538 for (i = 0; i < nmembers; i++)
539 {
540 if (TransactionIdIsCurrentTransactionId(members[i].xid))
541 {
542 debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
543 pfree(members);
544 return true;
545 }
546 }
547
548 /*
549 * This could be made faster by having another entry point in procarray.c,
550 * walking the PGPROC array only once for all the members. But in most
551 * cases nmembers should be small enough that it doesn't much matter.
552 */
553 for (i = 0; i < nmembers; i++)
554 {
555 if (TransactionIdIsInProgress(members[i].xid))
556 {
557 debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
558 i, members[i].xid);
559 pfree(members);
560 return true;
561 }
562 }
563
564 pfree(members);
565
566 debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
567
568 return false;
569}
570
571/*
572 * MultiXactIdSetOldestMember
573 * Save the oldest MultiXactId this transaction could be a member of.
574 *
575 * We set the OldestMemberMXactId for a given transaction the first time it's
576 * going to do some operation that might require a MultiXactId (tuple lock,
577 * update or delete). We need to do this even if we end up using a
578 * TransactionId instead of a MultiXactId, because there is a chance that
579 * another transaction would add our XID to a MultiXactId.
580 *
581 * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
582 * be called just before doing any such possibly-MultiXactId-able operation.
583 */
584void
586{
588 {
589 MultiXactId nextMXact;
590
591 /*
592 * You might think we don't need to acquire a lock here, since
593 * fetching and storing of TransactionIds is probably atomic, but in
594 * fact we do: suppose we pick up nextMXact and then lose the CPU for
595 * a long time. Someone else could advance nextMXact, and then
596 * another someone else could compute an OldestVisibleMXactId that
597 * would be after the value we are going to store when we get control
598 * back. Which would be wrong.
599 *
600 * Note that a shared lock is sufficient, because it's enough to stop
601 * someone from advancing nextMXact; and nobody else could be trying
602 * to write to our OldestMember entry, only reading (and we assume
603 * storing it is atomic.)
604 */
606
607 nextMXact = MultiXactState->nextMXact;
608
609 *MyOldestMemberMXactIdSlot() = nextMXact;
610
612
613 debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
614 MyProcNumber, nextMXact);
615 }
616}
617
618/*
619 * MultiXactIdSetOldestVisible
620 * Save the oldest MultiXactId this transaction considers possibly live.
621 *
622 * We set the OldestVisibleMXactId for a given transaction the first time
623 * it's going to inspect any MultiXactId. Once we have set this, we are
624 * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
625 * won't be truncated away.
626 *
627 * The value to set is the oldest of nextMXact and all the valid per-backend
628 * OldestMemberMXactId[] entries. Because of the locking we do, we can be
629 * certain that no subsequent call to MultiXactIdSetOldestMember can set
630 * an OldestMemberMXactId[] entry older than what we compute here. Therefore
631 * there is no live transaction, now or later, that can be a member of any
632 * MultiXactId older than the OldestVisibleMXactId we compute here.
633 */
634static void
662
663/*
664 * ReadNextMultiXactId
665 * Return the next MultiXactId to be assigned, but don't allocate it
666 */
669{
671
672 /* XXX we could presumably do this without a lock. */
676
677 return mxid;
678}
679
680/*
681 * ReadMultiXactIdRange
682 * Get the range of IDs that may still be referenced by a relation.
683 */
684void
692
693
694/*
695 * MultiXactIdCreateFromMembers
696 * Make a new MultiXactId from the specified set of members
697 *
698 * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
699 * given TransactionIds as members. Returns the newly created MultiXactId.
700 *
701 * NB: the passed members[] array will be sorted in-place.
702 */
705{
706 MultiXactId multi;
707 MultiXactOffset offset;
709
710 debug_elog3(DEBUG2, "Create: %s",
711 mxid_to_string(InvalidMultiXactId, nmembers, members));
712
713 /*
714 * See if the same set of members already exists in our cache; if so, just
715 * re-use that MultiXactId. (Note: it might seem that looking in our
716 * cache is insufficient, and we ought to search disk to see if a
717 * duplicate definition already exists. But since we only ever create
718 * MultiXacts containing our own XID, in most cases any such MultiXacts
719 * were in fact created by us, and so will be in our cache. There are
720 * corner cases where someone else added us to a MultiXact without our
721 * knowledge, but it's not worth checking for.)
722 */
723 multi = mXactCacheGetBySet(nmembers, members);
724 if (MultiXactIdIsValid(multi))
725 {
726 debug_elog2(DEBUG2, "Create: in cache!");
727 return multi;
728 }
729
730 /* Verify that there is a single update Xid among the given members. */
731 {
732 int i;
733 bool has_update = false;
734
735 for (i = 0; i < nmembers; i++)
736 {
737 if (ISUPDATE_from_mxstatus(members[i].status))
738 {
739 if (has_update)
740 elog(ERROR, "new multixact has more than one updating member: %s",
741 mxid_to_string(InvalidMultiXactId, nmembers, members));
742 has_update = true;
743 }
744 }
745 }
746
747 /* Load the injection point before entering the critical section */
748 INJECTION_POINT_LOAD("multixact-create-from-members");
749
750 /*
751 * Assign the MXID and offsets range to use, and make sure there is space
752 * in the OFFSETs and MEMBERs files. NB: this routine does
753 * START_CRIT_SECTION().
754 *
755 * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
756 * that we've called MultiXactIdSetOldestMember here. This is because
757 * this routine is used in some places to create new MultiXactIds of which
758 * the current backend is not a member, notably during freezing of multis
759 * in vacuum. During vacuum, in particular, it would be unacceptable to
760 * keep OldestMulti set, in case it runs for long.
761 */
762 multi = GetNewMultiXactId(nmembers, &offset);
763
764 INJECTION_POINT_CACHED("multixact-create-from-members", NULL);
765
766 /* Make an XLOG entry describing the new MXID. */
767 xlrec.mid = multi;
768 xlrec.moff = offset;
769 xlrec.nmembers = nmembers;
770
771 /*
772 * XXX Note: there's a lot of padding space in MultiXactMember. We could
773 * find a more compact representation of this Xlog record -- perhaps all
774 * the status flags in one XLogRecData, then all the xids in another one?
775 * Not clear that it's worth the trouble though.
776 */
779 XLogRegisterData(members, nmembers * sizeof(MultiXactMember));
780
782
783 /* Now enter the information into the OFFSETs and MEMBERs logs */
784 RecordNewMultiXact(multi, offset, nmembers, members);
785
786 /* Done with critical section */
788
789 /* Store the new MultiXactId in the local cache, too */
790 mXactCachePut(multi, nmembers, members);
791
792 debug_elog2(DEBUG2, "Create: all done");
793
794 return multi;
795}
796
797/*
798 * RecordNewMultiXact
799 * Write info about a new multixact into the offsets and members files
800 *
801 * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
802 * use it.
803 */
804static void
806 int nmembers, MultiXactMember *members)
807{
808 int64 pageno;
810 int entryno;
811 int slotno;
815 int next_entryno;
817 MultiXactOffset next_offset;
818 LWLock *lock;
820
821 /* position of this multixid in the offsets SLRU area */
822 pageno = MultiXactIdToOffsetPage(multi);
824
825 /* position of the next multixid */
826 next = NextMultiXactId(multi);
829
830 /*
831 * Set the starting offset of this multixid's members.
832 *
833 * In the common case, it was already set by the previous
834 * RecordNewMultiXact call, as this was the next multixid of the previous
835 * multixid. But if multiple backends are generating multixids
836 * concurrently, we might race ahead and get called before the previous
837 * multixid.
838 */
841
842 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &multi);
843 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
844 offptr += entryno;
845
846 if (*offptr != offset)
847 {
848 /* should already be set to the correct value, or not at all */
849 Assert(*offptr == 0);
850 *offptr = offset;
851 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
852 }
853
854 /*
855 * Set the next multixid's offset to the end of this multixid's members.
856 */
857 if (next_pageno == pageno)
858 {
859 next_offptr = offptr + 1;
860 }
861 else
862 {
863 /* must be the first entry on the page */
865
866 /* Swap the lock for a lock on the next page */
867 LWLockRelease(lock);
870
872 next_offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
874 }
875
876 /* Like in GetNewMultiXactId(), skip over offset 0 */
877 next_offset = offset + nmembers;
878 if (next_offset == 0)
879 next_offset = 1;
880 if (*next_offptr != next_offset)
881 {
882 /* should already be set to the correct value, or not at all */
883 Assert(*next_offptr == 0);
884 *next_offptr = next_offset;
885 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
886 }
887
888 /* Release MultiXactOffset SLRU lock. */
889 LWLockRelease(lock);
890
891 prev_pageno = -1;
892
893 for (int i = 0; i < nmembers; i++, offset++)
894 {
898 int bshift;
899 int flagsoff;
900 int memberoff;
901
902 Assert(members[i].status <= MultiXactStatusUpdate);
903
904 pageno = MXOffsetToMemberPage(offset);
908
909 if (pageno != prev_pageno)
910 {
912
913 /*
914 * MultiXactMember SLRU page is changed so check if this new page
915 * fall into the different SLRU bank then release the old bank's
916 * lock and acquire lock on the new bank.
917 */
919 if (lock != prevlock)
920 {
921 if (prevlock != NULL)
923
925 prevlock = lock;
926 }
929 prev_pageno = pageno;
930 }
931
933 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
934
935 *memberptr = members[i].xid;
936
937 flagsptr = (uint32 *)
938 (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
939
941 flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
942 flagsval |= (members[i].status << bshift);
944
945 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
946 }
947
948 if (prevlock != NULL)
950}
951
952/*
953 * GetNewMultiXactId
954 * Get the next MultiXactId.
955 *
956 * Also, reserve the needed amount of space in the "members" area. The
957 * starting offset of the reserved space is returned in *offset.
958 *
959 * This may generate XLOG records for expansion of the offsets and/or members
960 * files. Unfortunately, we have to do that while holding MultiXactGenLock
961 * to avoid race conditions --- the XLOG record for zeroing a page must appear
962 * before any backend can possibly try to store data in that page!
963 *
964 * We start a critical section before advancing the shared counters. The
965 * caller must end the critical section after writing SLRU data.
966 */
967static MultiXactId
968GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
969{
970 MultiXactId result;
971 MultiXactOffset nextOffset;
972
973 debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
974
975 /* safety check, we should never get this far in a HS standby */
976 if (RecoveryInProgress())
977 elog(ERROR, "cannot assign MultiXactIds during recovery");
978
980
981 /* Assign the MXID */
982 result = MultiXactState->nextMXact;
983
984 /*----------
985 * Check to see if it's safe to assign another MultiXactId. This protects
986 * against catastrophic data loss due to multixact wraparound. The basic
987 * rules are:
988 *
989 * If we're past multiVacLimit or the safe threshold for member storage
990 * space, or we don't know what the safe threshold for member storage is,
991 * start trying to force autovacuum cycles.
992 * If we're past multiWarnLimit, start issuing warnings.
993 * If we're past multiStopLimit, refuse to create new MultiXactIds.
994 *
995 * Note these are pretty much the same protections in GetNewTransactionId.
996 *----------
997 */
999 {
1000 /*
1001 * For safety's sake, we release MultiXactGenLock while sending
1002 * signals, warnings, etc. This is not so much because we care about
1003 * preserving concurrency in this situation, as to avoid any
1004 * possibility of deadlock while doing get_database_name(). First,
1005 * copy all the shared values we'll need in this path.
1006 */
1007 MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
1008 MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
1009 MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
1011
1013
1014 if (IsUnderPostmaster &&
1015 !MultiXactIdPrecedes(result, multiStopLimit))
1016 {
1018
1019 /*
1020 * Immediately kick autovacuum into action as we're already in
1021 * ERROR territory.
1022 */
1024
1025 /* complain even if that DB has disappeared */
1026 if (oldest_datname)
1027 ereport(ERROR,
1029 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
1031 errhint("Execute a database-wide VACUUM in that database.\n"
1032 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1033 else
1034 ereport(ERROR,
1036 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
1038 errhint("Execute a database-wide VACUUM in that database.\n"
1039 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1040 }
1041
1042 /*
1043 * To avoid swamping the postmaster with signals, we issue the autovac
1044 * request only once per 64K multis generated. This still gives
1045 * plenty of chances before we get into real trouble.
1046 */
1047 if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
1049
1050 if (!MultiXactIdPrecedes(result, multiWarnLimit))
1051 {
1053
1054 /* complain even if that DB has disappeared */
1055 if (oldest_datname)
1057 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1058 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1059 multiWrapLimit - result,
1061 multiWrapLimit - result),
1062 errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
1063 (double) (multiWrapLimit - result) / (MaxMultiXactId / 2) * 100),
1064 errhint("Execute a database-wide VACUUM in that database.\n"
1065 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1066 else
1068 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1069 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1070 multiWrapLimit - result,
1072 multiWrapLimit - result),
1073 errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
1074 (double) (multiWrapLimit - result) / (MaxMultiXactId / 2) * 100),
1075 errhint("Execute a database-wide VACUUM in that database.\n"
1076 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1077 }
1078
1079 /* Re-acquire lock and start over */
1081 result = MultiXactState->nextMXact;
1082 }
1083
1084 /*
1085 * Make sure there is room for the next MXID in the file. Assigning this
1086 * MXID sets the next MXID's offset already.
1087 */
1089
1090 /*
1091 * Reserve the members space, similarly to above.
1092 */
1093 nextOffset = MultiXactState->nextOffset;
1094
1095 /*
1096 * Offsets are 64-bit integers and will never wrap around. Firstly, it
1097 * would take an unrealistic amount of time and resources to consume 2^64
1098 * offsets. Secondly, multixid creation is WAL-logged, so you would run
1099 * out of LSNs before reaching offset wraparound. Nevertheless, check for
1100 * wraparound as a sanity check.
1101 */
1102 if (nextOffset + nmembers < nextOffset)
1103 ereport(ERROR,
1105 errmsg("MultiXact members would wrap around")));
1106 *offset = nextOffset;
1107
1108 ExtendMultiXactMember(nextOffset, nmembers);
1109
1110 /*
1111 * Critical section from here until caller has written the data into the
1112 * just-reserved SLRU space; we don't want to error out with a partly
1113 * written MultiXact structure. (In particular, failing to write our
1114 * start offset after advancing nextMXact would effectively corrupt the
1115 * previous MultiXact.)
1116 */
1118
1119 /*
1120 * Advance counters. As in GetNewTransactionId(), this must not happen
1121 * until after file extension has succeeded!
1122 */
1124 MultiXactState->nextOffset += nmembers;
1125
1127
1128 debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64,
1129 result, *offset);
1130 return result;
1131}
1132
1133/*
1134 * GetMultiXactIdMembers
1135 * Return the set of MultiXactMembers that make up a MultiXactId
1136 *
1137 * Return value is the number of members found, or -1 if there are none,
1138 * and *members is set to a newly palloc'ed array of members. It's the
1139 * caller's responsibility to free it when done with it.
1140 *
1141 * from_pgupgrade must be passed as true if and only if only the multixact
1142 * corresponds to a value from a tuple that was locked in a 9.2-or-older
1143 * installation and later pg_upgrade'd (that is, the infomask is
1144 * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1145 * can still be running, so we return -1 just like for an empty multixact
1146 * without any further checking. It would be wrong to try to resolve such a
1147 * multixact: either the multixact is within the current valid multixact
1148 * range, in which case the returned result would be bogus, or outside that
1149 * range, in which case an error would be raised.
1150 *
1151 * In all other cases, the passed multixact must be within the known valid
1152 * range, that is, greater than or equal to oldestMultiXactId, and less than
1153 * nextMXact. Otherwise, an error is raised.
1154 *
1155 * isLockOnly must be set to true if caller is certain that the given multi
1156 * is used only to lock tuples; can be false without loss of correctness,
1157 * but passing a true means we can return quickly without checking for
1158 * old updates.
1159 */
1160int
1162 bool from_pgupgrade, bool isLockOnly)
1163{
1164 int64 pageno;
1166 int entryno;
1167 int slotno;
1169 MultiXactOffset offset;
1171 int length;
1173 MultiXactId nextMXact;
1174 MultiXactMember *ptr;
1175 LWLock *lock;
1176
1177 debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1178
1179 if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1180 {
1181 *members = NULL;
1182 return -1;
1183 }
1184
1185 /* See if the MultiXactId is in the local cache */
1186 length = mXactCacheGetById(multi, members);
1187 if (length >= 0)
1188 {
1189 debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1190 mxid_to_string(multi, length, *members));
1191 return length;
1192 }
1193
1194 /* Set our OldestVisibleMXactId[] entry if we didn't already */
1196
1197 /*
1198 * If we know the multi is used only for locking and not for updates, then
1199 * we can skip checking if the value is older than our oldest visible
1200 * multi. It cannot possibly still be running.
1201 */
1202 if (isLockOnly &&
1204 {
1205 debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
1206 *members = NULL;
1207 return -1;
1208 }
1209
1210 /*
1211 * We check known limits on MultiXact before resorting to the SLRU area.
1212 *
1213 * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1214 * useful; it has already been removed, or will be removed shortly, by
1215 * truncation. If one is passed, an error is raised.
1216 *
1217 * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1218 * implies undetected ID wraparound has occurred. This raises a hard
1219 * error.
1220 *
1221 * Shared lock is enough here since we aren't modifying any global state.
1222 * Acquire it just long enough to grab the current counter values.
1223 */
1225
1227 nextMXact = MultiXactState->nextMXact;
1228
1230
1231 if (MultiXactIdPrecedes(multi, oldestMXact))
1232 ereport(ERROR,
1234 errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1235 multi)));
1236
1237 if (!MultiXactIdPrecedes(multi, nextMXact))
1238 ereport(ERROR,
1240 errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1241 multi)));
1242
1243 /*
1244 * Find out the offset at which we need to start reading MultiXactMembers
1245 * and the number of members in the multixact. We determine the latter as
1246 * the difference between this multixact's starting offset and the next
1247 * one's.
1248 */
1249 pageno = MultiXactIdToOffsetPage(multi);
1251
1252 /* Acquire the bank lock for the page we need. */
1255
1256 /* read this multi's offset */
1257 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &multi);
1258 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1259 offptr += entryno;
1260 offset = *offptr;
1261
1262 if (offset == 0)
1263 ereport(ERROR,
1265 errmsg("MultiXact %u has invalid offset", multi)));
1266
1267 /* read next multi's offset */
1268 {
1270
1271 /* handle wraparound if needed */
1272 tmpMXact = NextMultiXactId(multi);
1273
1274 prev_pageno = pageno;
1275
1278
1279 if (pageno != prev_pageno)
1280 {
1281 LWLock *newlock;
1282
1283 /*
1284 * Since we're going to access a different SLRU page, if this page
1285 * falls under a different bank, release the old bank's lock and
1286 * acquire the lock of the new bank.
1287 */
1289 if (newlock != lock)
1290 {
1291 LWLockRelease(lock);
1293 lock = newlock;
1294 }
1296 }
1297
1298 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1299 offptr += entryno;
1301 }
1302
1303 LWLockRelease(lock);
1304 lock = NULL;
1305
1306 /* Sanity check the next offset */
1307 if (nextMXOffset == 0)
1308 ereport(ERROR,
1310 errmsg("MultiXact %u has invalid next offset", multi)));
1311 if (nextMXOffset == offset)
1312 ereport(ERROR,
1314 errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
1315 multi, offset)));
1316 if (nextMXOffset < offset)
1317 ereport(ERROR,
1319 errmsg("MultiXact %u has offset (%" PRIu64 ") greater than its next offset (%" PRIu64 ")",
1320 multi, offset, nextMXOffset)));
1321 if (nextMXOffset - offset > INT32_MAX)
1322 ereport(ERROR,
1324 errmsg("MultiXact %u has too many members (%" PRIu64 ")",
1325 multi, nextMXOffset - offset)));
1326 length = nextMXOffset - offset;
1327
1328 /* read the members */
1329 ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
1330 prev_pageno = -1;
1331 for (int i = 0; i < length; i++, offset++)
1332 {
1335 int flagsoff;
1336 int bshift;
1337 int memberoff;
1338
1339 pageno = MXOffsetToMemberPage(offset);
1341
1342 if (pageno != prev_pageno)
1343 {
1345 LWLock *newlock;
1346
1347 /*
1348 * Since we're going to access a different SLRU page, if this page
1349 * falls under a different bank, release the old bank's lock and
1350 * acquire the lock of the new bank.
1351 */
1353 if (newlock != lock)
1354 {
1355 if (lock)
1356 LWLockRelease(lock);
1358 lock = newlock;
1359 }
1362 prev_pageno = pageno;
1363 }
1364
1366 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1368
1371 flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1372
1373 ptr[i].xid = *xactptr;
1375 }
1376
1377 LWLockRelease(lock);
1378
1379 /*
1380 * Copy the result into the local cache.
1381 */
1382 mXactCachePut(multi, length, ptr);
1383
1384 debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1385 mxid_to_string(multi, length, ptr));
1386 *members = ptr;
1387 return length;
1388}
1389
1390/*
1391 * mxactMemberComparator
1392 * qsort comparison function for MultiXactMember
1393 *
1394 * We can't use wraparound comparison for XIDs because that does not respect
1395 * the triangle inequality! Any old sort order will do.
1396 */
1397static int
1398mxactMemberComparator(const void *arg1, const void *arg2)
1399{
1402
1403 if (member1.xid > member2.xid)
1404 return 1;
1405 if (member1.xid < member2.xid)
1406 return -1;
1407 if (member1.status > member2.status)
1408 return 1;
1409 if (member1.status < member2.status)
1410 return -1;
1411 return 0;
1412}
1413
1414/*
1415 * mXactCacheGetBySet
1416 * returns a MultiXactId from the cache based on the set of
1417 * TransactionIds that compose it, or InvalidMultiXactId if
1418 * none matches.
1419 *
1420 * This is helpful, for example, if two transactions want to lock a huge
1421 * table. By using the cache, the second will use the same MultiXactId
1422 * for the majority of tuples, thus keeping MultiXactId usage low (saving
1423 * both I/O and wraparound issues).
1424 *
1425 * NB: the passed members array will be sorted in-place.
1426 */
1427static MultiXactId
1429{
1430 dlist_iter iter;
1431
1432 debug_elog3(DEBUG2, "CacheGet: looking for %s",
1433 mxid_to_string(InvalidMultiXactId, nmembers, members));
1434
1435 /* sort the array so comparison is easy */
1436 qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1437
1439 {
1441 iter.cur);
1442
1443 if (entry->nmembers != nmembers)
1444 continue;
1445
1446 /*
1447 * We assume the cache entries are sorted, and that the unused bits in
1448 * "status" are zeroed.
1449 */
1450 if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1451 {
1452 debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1454 return entry->multi;
1455 }
1456 }
1457
1458 debug_elog2(DEBUG2, "CacheGet: not found :-(");
1459 return InvalidMultiXactId;
1460}
1461
1462/*
1463 * mXactCacheGetById
1464 * returns the composing MultiXactMember set from the cache for a
1465 * given MultiXactId, if present.
1466 *
1467 * If successful, *xids is set to the address of a palloc'd copy of the
1468 * MultiXactMember set. Return value is number of members, or -1 on failure.
1469 */
1470static int
1472{
1473 dlist_iter iter;
1474
1475 debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1476
1478 {
1480 iter.cur);
1481
1482 if (entry->multi == multi)
1483 {
1484 MultiXactMember *ptr;
1485 Size size;
1486
1487 size = sizeof(MultiXactMember) * entry->nmembers;
1488 ptr = (MultiXactMember *) palloc(size);
1489
1490 memcpy(ptr, entry->members, size);
1491
1492 debug_elog3(DEBUG2, "CacheGet: found %s",
1493 mxid_to_string(multi,
1494 entry->nmembers,
1495 entry->members));
1496
1497 /*
1498 * Note we modify the list while not using a modifiable iterator.
1499 * This is acceptable only because we exit the iteration
1500 * immediately afterwards.
1501 */
1503
1504 *members = ptr;
1505 return entry->nmembers;
1506 }
1507 }
1508
1509 debug_elog2(DEBUG2, "CacheGet: not found");
1510 return -1;
1511}
1512
1513/*
1514 * mXactCachePut
1515 * Add a new MultiXactId and its composing set into the local cache.
1516 */
1517static void
1518mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1519{
1520 mXactCacheEnt *entry;
1521
1522 debug_elog3(DEBUG2, "CachePut: storing %s",
1523 mxid_to_string(multi, nmembers, members));
1524
1525 if (MXactContext == NULL)
1526 {
1527 /* The cache only lives as long as the current transaction */
1528 debug_elog2(DEBUG2, "CachePut: initializing memory context");
1530 "MultiXact cache context",
1532 }
1533
1534 entry = (mXactCacheEnt *)
1536 offsetof(mXactCacheEnt, members) +
1537 nmembers * sizeof(MultiXactMember));
1538
1539 entry->multi = multi;
1540 entry->nmembers = nmembers;
1541 memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1542
1543 /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1544 qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1545
1546 dclist_push_head(&MXactCache, &entry->node);
1548 {
1549 dlist_node *node;
1550
1553
1554 entry = dclist_container(mXactCacheEnt, node, node);
1555 debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1556 entry->multi);
1557
1558 pfree(entry);
1559 }
1560}
1561
1562char *
1564{
1565 switch (status)
1566 {
1568 return "keysh";
1570 return "sh";
1572 return "fornokeyupd";
1574 return "forupd";
1576 return "nokeyupd";
1578 return "upd";
1579 default:
1580 elog(ERROR, "unrecognized multixact status %d", status);
1581 return "";
1582 }
1583}
1584
1585char *
1586mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1587{
1588 static char *str = NULL;
1590 int i;
1591
1592 if (str != NULL)
1593 pfree(str);
1594
1596
1597 appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1598 mxstatus_to_string(members[0].status));
1599
1600 for (i = 1; i < nmembers; i++)
1601 appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1602 mxstatus_to_string(members[i].status));
1603
1606 pfree(buf.data);
1607 return str;
1608}
1609
1610/*
1611 * AtEOXact_MultiXact
1612 * Handle transaction end for MultiXact
1613 *
1614 * This is called at top transaction commit or abort (we don't care which).
1615 */
1616void
1618{
1619 /*
1620 * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1621 * which should only be valid while within a transaction.
1622 *
1623 * We assume that storing a MultiXactId is atomic and so we need not take
1624 * MultiXactGenLock to do this.
1625 */
1628
1629 /*
1630 * Discard the local MultiXactId cache. Since MXactContext was created as
1631 * a child of TopTransactionContext, we needn't delete it explicitly.
1632 */
1635}
1636
1637/*
1638 * AtPrepare_MultiXact
1639 * Save multixact state at 2PC transaction prepare
1640 *
1641 * In this phase, we only store our OldestMemberMXactId value in the two-phase
1642 * state file.
1643 */
1644void
1653
1654/*
1655 * PostPrepare_MultiXact
1656 * Clean up after successful PREPARE TRANSACTION
1657 */
1658void
1660{
1662
1663 /*
1664 * Transfer our OldestMemberMXactId value to the slot reserved for the
1665 * prepared transaction.
1666 */
1669 {
1671
1672 /*
1673 * Even though storing MultiXactId is atomic, acquire lock to make
1674 * sure others see both changes, not just the reset of the slot of the
1675 * current backend. Using a volatile pointer might suffice, but this
1676 * isn't a hot spot.
1677 */
1679
1682
1684 }
1685
1686 /*
1687 * We don't need to transfer OldestVisibleMXactId value, because the
1688 * transaction is not going to be looking at any more multixacts once it's
1689 * prepared.
1690 *
1691 * We assume that storing a MultiXactId is atomic and so we need not take
1692 * MultiXactGenLock to do this.
1693 */
1695
1696 /*
1697 * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1698 */
1701}
1702
1703/*
1704 * multixact_twophase_recover
1705 * Recover the state of a prepared transaction at startup
1706 */
1707void
1709 void *recdata, uint32 len)
1710{
1713
1714 /*
1715 * Get the oldest member XID from the state file record, and set it in the
1716 * OldestMemberMXactId slot reserved for this prepared transaction.
1717 */
1718 Assert(len == sizeof(MultiXactId));
1720
1722}
1723
1724/*
1725 * multixact_twophase_postcommit
1726 * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1727 */
1728void
1738
1739/*
1740 * multixact_twophase_postabort
1741 * This is actually just the same as the COMMIT case.
1742 */
1743void
1749
1750/*
1751 * Initialization of shared memory for MultiXact.
1752 *
1753 * MultiXactSharedStateShmemSize() calculates the size of the MultiXactState
1754 * struct, and the two per-backend MultiXactId arrays. They are carved out of
1755 * the same allocation. MultiXactShmemSize() additionally includes the memory
1756 * needed for the two SLRU areas.
1757 */
1758static Size
1760{
1761 Size size;
1762
1763 size = offsetof(MultiXactStateData, perBackendXactIds);
1764 size = add_size(size,
1766 size = add_size(size,
1768 return size;
1769}
1770
1771Size
1773{
1774 Size size;
1775
1779
1780 return size;
1781}
1782
1783void
1785{
1786 bool found;
1787
1788 debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
1789
1792 MultiXactOffsetCtl->errdetail_for_io_error = MultiXactOffsetIoErrorDetail;
1793 MultiXactMemberCtl->errdetail_for_io_error = MultiXactMemberIoErrorDetail;
1794
1796 "multixact_offset", multixact_offset_buffers, 0,
1797 "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1800 false);
1803 "multixact_member", multixact_member_buffers, 0,
1804 "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1807 true);
1808 /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
1809
1810 /* Initialize our shared state struct */
1811 MultiXactState = ShmemInitStruct("Shared MultiXact State",
1813 &found);
1814 if (!IsUnderPostmaster)
1815 {
1816 Assert(!found);
1817
1818 /* Make sure we zero out the per-backend state */
1820 }
1821 else
1822 Assert(found);
1823
1824 /*
1825 * Set up array pointers.
1826 */
1829}
1830
1831/*
1832 * GUC check_hook for multixact_offset_buffers
1833 */
1834bool
1836{
1837 return check_slru_buffers("multixact_offset_buffers", newval);
1838}
1839
1840/*
1841 * GUC check_hook for multixact_member_buffers
1842 */
1843bool
1845{
1846 return check_slru_buffers("multixact_member_buffers", newval);
1847}
1848
1849/*
1850 * This func must be called ONCE on system install. It creates the initial
1851 * MultiXact segments. (The MultiXacts directories are assumed to have been
1852 * created by initdb, and MultiXactShmemInit must have been called already.)
1853 */
1854void
1856{
1857 /* Zero the initial pages and flush them to disk */
1860}
1861
1862/*
1863 * This must be called ONCE during postmaster or standalone-backend startup.
1864 *
1865 * StartupXLOG has already established nextMXact/nextOffset by calling
1866 * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
1867 * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
1868 * replayed WAL.
1869 */
1870void
1872{
1875 int64 pageno;
1876
1877 /*
1878 * Initialize offset's idea of the latest page number.
1879 */
1880 pageno = MultiXactIdToOffsetPage(multi);
1881 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1882 pageno);
1883
1884 /*
1885 * Initialize member's idea of the latest page number.
1886 */
1887 pageno = MXOffsetToMemberPage(offset);
1888 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1889 pageno);
1890}
1891
1892/*
1893 * This must be called ONCE at the end of startup/recovery.
1894 */
1895void
1897{
1898 MultiXactId nextMXact;
1899 MultiXactOffset offset;
1902 int64 pageno;
1903 int entryno;
1904 int flagsoff;
1905
1907 nextMXact = MultiXactState->nextMXact;
1908 offset = MultiXactState->nextOffset;
1912
1913 /* Clean up offsets state */
1914
1915 /*
1916 * (Re-)Initialize our idea of the latest page number for offsets.
1917 */
1918 pageno = MultiXactIdToOffsetPage(nextMXact);
1919 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1920 pageno);
1921
1922 /*
1923 * Set the offset of nextMXact on the offsets page. This is normally done
1924 * in RecordNewMultiXact() of the previous multixact, but let's be sure
1925 * the next page exists, if the nextMXact was reset with pg_resetwal for
1926 * example.
1927 *
1928 * Zero out the remainder of the page. See notes in TrimCLOG() for
1929 * background. Unlike CLOG, some WAL record covers every pg_multixact
1930 * SLRU mutation. Since, also unlike CLOG, we ignore the WAL rule "write
1931 * xlog before data," nextMXact successors may carry obsolete, nonzero
1932 * offset values.
1933 */
1934 entryno = MultiXactIdToOffsetEntry(nextMXact);
1935 {
1936 int slotno;
1939
1941 if (entryno == 0 || nextMXact == FirstMultiXactId)
1943 else
1944 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &nextMXact);
1945 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1946 offptr += entryno;
1947
1948 *offptr = offset;
1949 if (entryno != 0 && (entryno + 1) * sizeof(MultiXactOffset) != BLCKSZ)
1950 MemSet(offptr + 1, 0, BLCKSZ - (entryno + 1) * sizeof(MultiXactOffset));
1951
1952 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
1953 LWLockRelease(lock);
1954 }
1955
1956 /*
1957 * And the same for members.
1958 *
1959 * (Re-)Initialize our idea of the latest page number for members.
1960 */
1961 pageno = MXOffsetToMemberPage(offset);
1962 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1963 pageno);
1964
1965 /*
1966 * Zero out the remainder of the current members page. See notes in
1967 * TrimCLOG() for motivation.
1968 */
1970 if (flagsoff != 0)
1971 {
1973 int slotno;
1975 int memberoff;
1977
1981 xidptr = (TransactionId *)
1982 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1983
1985
1986 /*
1987 * Note: we don't need to zero out the flag bits in the remaining
1988 * members of the current group, because they are always reset before
1989 * writing.
1990 */
1991
1992 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
1993 LWLockRelease(lock);
1994 }
1995
1996 /* signal that we're officially up */
2000
2001 /* Now compute how far away the next multixid wraparound is. */
2003}
2004
2005/*
2006 * Get the MultiXact data to save in a checkpoint record
2007 */
2008void
2010 MultiXactId *nextMulti,
2011 MultiXactOffset *nextMultiOffset,
2012 MultiXactId *oldestMulti,
2013 Oid *oldestMultiDB)
2014{
2016 *nextMulti = MultiXactState->nextMXact;
2017 *nextMultiOffset = MultiXactState->nextOffset;
2018 *oldestMulti = MultiXactState->oldestMultiXactId;
2019 *oldestMultiDB = MultiXactState->oldestMultiXactDB;
2021
2023 "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
2024 *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
2025}
2026
2027/*
2028 * Perform a checkpoint --- either during shutdown, or on-the-fly
2029 */
2030void
2032{
2034
2035 /*
2036 * Write dirty MultiXact pages to disk. This may result in sync requests
2037 * queued for later handling by ProcessSyncRequests(), as part of the
2038 * checkpoint.
2039 */
2042
2044}
2045
2046/*
2047 * Set the next-to-be-assigned MultiXactId and offset
2048 *
2049 * This is used when we can determine the correct next ID/offset exactly
2050 * from a checkpoint record. Although this is only called during bootstrap
2051 * and XLog replay, we take the lock in case any hot-standby backends are
2052 * examining the values.
2053 */
2054void
2056 MultiXactOffset nextMultiOffset)
2057{
2058 Assert(MultiXactIdIsValid(nextMulti));
2059 debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
2060 nextMulti, nextMultiOffset);
2061
2063 MultiXactState->nextMXact = nextMulti;
2064 MultiXactState->nextOffset = nextMultiOffset;
2066}
2067
2068/*
2069 * Determine the last safe MultiXactId to allocate given the currently oldest
2070 * datminmxid (ie, the oldest MultiXactId that might exist in any database
2071 * of our cluster), and the OID of the (or a) database with that value.
2072 *
2073 * This also updates MultiXactState->oldestOffset, by looking up the offset of
2074 * MultiXactState->oldestMultiXactId.
2075 */
2076void
2078{
2079 MultiXactId multiVacLimit;
2080 MultiXactId multiWarnLimit;
2081 MultiXactId multiStopLimit;
2082 MultiXactId multiWrapLimit;
2084
2086
2087 /*
2088 * We pretend that a wrap will happen halfway through the multixact ID
2089 * space, but that's not really true, because multixacts wrap differently
2090 * from transaction IDs.
2091 */
2092 multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2093 if (multiWrapLimit < FirstMultiXactId)
2094 multiWrapLimit += FirstMultiXactId;
2095
2096 /*
2097 * We'll refuse to continue assigning MultiXactIds once we get within 3M
2098 * multi of data loss. See SetTransactionIdLimit.
2099 */
2100 multiStopLimit = multiWrapLimit - 3000000;
2101 if (multiStopLimit < FirstMultiXactId)
2102 multiStopLimit -= FirstMultiXactId;
2103
2104 /*
2105 * We'll start complaining loudly when we get within 100M multis of data
2106 * loss. This is kind of arbitrary, but if you let your gas gauge get
2107 * down to 5% of full, would you be looking for the next gas station? We
2108 * need to be fairly liberal about this number because there are lots of
2109 * scenarios where most transactions are done by automatic clients that
2110 * won't pay attention to warnings. (No, we're not gonna make this
2111 * configurable. If you know enough to configure it, you know enough to
2112 * not get in this kind of trouble in the first place.)
2113 */
2114 multiWarnLimit = multiWrapLimit - 100000000;
2115 if (multiWarnLimit < FirstMultiXactId)
2116 multiWarnLimit -= FirstMultiXactId;
2117
2118 /*
2119 * We'll start trying to force autovacuums when oldest_datminmxid gets to
2120 * be more than autovacuum_multixact_freeze_max_age mxids old.
2121 *
2122 * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2123 * so that we don't have to worry about dealing with on-the-fly changes in
2124 * its value. See SetTransactionIdLimit.
2125 */
2127 if (multiVacLimit < FirstMultiXactId)
2128 multiVacLimit += FirstMultiXactId;
2129
2130 /* Grab lock for just long enough to set the new limit values */
2134 MultiXactState->multiVacLimit = multiVacLimit;
2135 MultiXactState->multiWarnLimit = multiWarnLimit;
2136 MultiXactState->multiStopLimit = multiStopLimit;
2137 MultiXactState->multiWrapLimit = multiWrapLimit;
2140
2141 /* Log the info */
2143 (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2144 multiWrapLimit, oldest_datoid)));
2145
2146 /*
2147 * Computing the actual limits is only possible once the data directory is
2148 * in a consistent state. There's no need to compute the limits while
2149 * still replaying WAL - no decisions about new multis are made even
2150 * though multixact creations might be replayed. So we'll only do further
2151 * checks after TrimMultiXact() has been called.
2152 */
2154 return;
2155
2157
2158 /*
2159 * Offsets are 64-bits wide and never wrap around, so we don't need to
2160 * consider them for emergency autovacuum purposes. But now that we're in
2161 * a consistent state, determine MultiXactState->oldestOffset. It will be
2162 * used to adjust the freezing cutoff, to keep the offsets disk usage in
2163 * check.
2164 */
2166
2167 /*
2168 * If past the autovacuum force point, immediately signal an autovac
2169 * request. The reason for this is that autovac only processes one
2170 * database per invocation. Once it's finished cleaning up the oldest
2171 * database, it'll call here, and we'll signal the postmaster to start
2172 * another iteration immediately if there are still any old databases.
2173 */
2174 if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
2176
2177 /* Give an immediate warning if past the wrap warn point */
2178 if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2179 {
2180 char *oldest_datname;
2181
2182 /*
2183 * We can be called when not inside a transaction, for example during
2184 * StartupXLOG(). In such a case we cannot do database access, so we
2185 * must just report the oldest DB's OID.
2186 *
2187 * Note: it's also possible that get_database_name fails and returns
2188 * NULL, for example because the database just got dropped. We'll
2189 * still warn, even though the warning might now be unnecessary.
2190 */
2191 if (IsTransactionState())
2193 else
2195
2196 if (oldest_datname)
2198 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2199 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2200 multiWrapLimit - curMulti,
2202 multiWrapLimit - curMulti),
2203 errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
2204 (double) (multiWrapLimit - curMulti) / (MaxMultiXactId / 2) * 100),
2205 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2206 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2207 else
2209 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2210 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2211 multiWrapLimit - curMulti,
2213 multiWrapLimit - curMulti),
2214 errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
2215 (double) (multiWrapLimit - curMulti) / (MaxMultiXactId / 2) * 100),
2216 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2217 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2218 }
2219}
2220
2221/*
2222 * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2223 * and similarly nextOffset is at least minMultiOffset.
2224 *
2225 * This is used when we can determine minimum safe values from an XLog
2226 * record (either an on-line checkpoint or an mxact creation log entry).
2227 * Although this is only called during XLog replay, we take the lock in case
2228 * any hot-standby backends are examining the values.
2229 */
2230void
2250
2251/*
2252 * Update our oldestMultiXactId value, but only if it's more recent than what
2253 * we had.
2254 *
2255 * This may only be called during WAL replay.
2256 */
2257void
2258MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2259{
2261
2263 SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
2264}
2265
2266/*
2267 * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2268 *
2269 * NB: this is called while holding MultiXactGenLock. We want it to be very
2270 * fast most of the time; even when it's not so fast, no actual I/O need
2271 * happen unless we're forced to write out a dirty log or xlog page to make
2272 * room in shared memory.
2273 */
2274static void
2276{
2277 int64 pageno;
2278 LWLock *lock;
2279
2280 /*
2281 * No work except at first MultiXactId of a page. But beware: just after
2282 * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2283 */
2284 if (MultiXactIdToOffsetEntry(multi) != 0 &&
2285 multi != FirstMultiXactId)
2286 return;
2287
2288 pageno = MultiXactIdToOffsetPage(multi);
2290
2292
2293 /* Zero the page and make a WAL entry about it */
2296 pageno);
2297
2298 LWLockRelease(lock);
2299}
2300
2301/*
2302 * Make sure that MultiXactMember has room for the members of a newly-
2303 * allocated MultiXactId.
2304 *
2305 * Like the above routine, this is called while holding MultiXactGenLock;
2306 * same comments apply.
2307 */
2308static void
2310{
2311 /*
2312 * It's possible that the members span more than one page of the members
2313 * file, so we loop to ensure we consider each page. The coding is not
2314 * optimal if the members span several pages, but that seems unusual
2315 * enough to not worry much about.
2316 */
2317 while (nmembers > 0)
2318 {
2319 int flagsoff;
2320 int flagsbit;
2322
2323 /*
2324 * Only zero when at first entry of a page.
2325 */
2328 if (flagsoff == 0 && flagsbit == 0)
2329 {
2330 int64 pageno;
2331 LWLock *lock;
2332
2333 pageno = MXOffsetToMemberPage(offset);
2335
2337
2338 /* Zero the page and make a WAL entry about it */
2342
2343 LWLockRelease(lock);
2344 }
2345
2346 /* Compute the number of items till end of current page. */
2348
2349 /*
2350 * Advance to next page. OK if nmembers goes negative.
2351 */
2352 nmembers -= difference;
2353 offset += difference;
2354 }
2355}
2356
2357/*
2358 * GetOldestMultiXactId
2359 *
2360 * Return the oldest MultiXactId that's still possibly still seen as live by
2361 * any running transaction. Older ones might still exist on disk, but they no
2362 * longer have any running member transaction.
2363 *
2364 * It's not safe to truncate MultiXact SLRU segments on the value returned by
2365 * this function; however, it can be set as the new relminmxid for any table
2366 * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2367 * to truncate SLRUs when no table can possibly still have a referencing MXID.
2368 */
2371{
2373
2374 /*
2375 * This is the oldest valid value among all the OldestMemberMXactId[] and
2376 * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2377 */
2380 for (int i = 0; i < NumMemberSlots; i++)
2381 {
2383
2388 }
2389 for (int i = 0; i < NumVisibleSlots; i++)
2390 {
2392
2397 }
2398
2400
2401 return oldestMXact;
2402}
2403
2404/*
2405 * Calculate the oldest member offset and install it in MultiXactState, where
2406 * it can be used to adjust multixid freezing cutoffs.
2407 */
2408static void
2410{
2411 MultiXactId oldestMultiXactId;
2412 MultiXactId nextMXact;
2413 MultiXactOffset oldestOffset = 0; /* placate compiler */
2414 MultiXactOffset nextOffset;
2415 bool oldestOffsetKnown = false;
2416
2417 /*
2418 * NB: Have to prevent concurrent truncation, we might otherwise try to
2419 * lookup an oldestMulti that's concurrently getting truncated away.
2420 */
2422
2423 /* Read relevant fields from shared memory. */
2425 oldestMultiXactId = MultiXactState->oldestMultiXactId;
2426 nextMXact = MultiXactState->nextMXact;
2427 nextOffset = MultiXactState->nextOffset;
2430
2431 /*
2432 * Determine the offset of the oldest multixact. Normally, we can read
2433 * the offset from the multixact itself, but there's an important special
2434 * case: if there are no multixacts in existence at all, oldestMXact
2435 * obviously can't point to one. It will instead point to the multixact
2436 * ID that will be assigned the next time one is needed.
2437 */
2438 if (oldestMultiXactId == nextMXact)
2439 {
2440 /*
2441 * When the next multixact gets created, it will be stored at the next
2442 * offset.
2443 */
2444 oldestOffset = nextOffset;
2445 oldestOffsetKnown = true;
2446 }
2447 else
2448 {
2449 /*
2450 * Look up the offset at which the oldest existing multixact's members
2451 * are stored. If we cannot find it, be careful not to fail, and
2452 * leave oldestOffset unchanged. oldestOffset is initialized to zero
2453 * at system startup, which prevents truncating members until a proper
2454 * value is calculated.
2455 *
2456 * (We had bugs in early releases of PostgreSQL 9.3.X and 9.4.X where
2457 * the supposedly-earliest multixact might not really exist. Those
2458 * should be long gone by now, so this should not fail, but let's
2459 * still be defensive.)
2460 */
2462 find_multixact_start(oldestMultiXactId, &oldestOffset);
2463
2466 (errmsg_internal("oldest MultiXactId member is at offset %" PRIu64,
2467 oldestOffset)));
2468 else
2469 ereport(LOG,
2470 (errmsg("MultiXact member truncation is disabled because oldest checkpointed MultiXact %u does not exist on disk",
2471 oldestMultiXactId)));
2472 }
2473
2475
2476 /* Install the computed value */
2478 {
2480 MultiXactState->oldestOffset = oldestOffset;
2482 }
2483}
2484
2485/*
2486 * Find the starting offset of the given MultiXactId.
2487 *
2488 * Returns false if the file containing the multi does not exist on disk.
2489 * Otherwise, returns true and sets *result to the starting member offset.
2490 *
2491 * This function does not prevent concurrent truncation, so if that's
2492 * required, the caller has to protect against that.
2493 */
2494static bool
2496{
2497 MultiXactOffset offset;
2498 int64 pageno;
2499 int entryno;
2500 int slotno;
2502
2504
2505 pageno = MultiXactIdToOffsetPage(multi);
2507
2508 /*
2509 * Write out dirty data, so PhysicalPageExists can work correctly.
2510 */
2513
2515 return false;
2516
2517 /* lock is acquired by SimpleLruReadPage_ReadOnly */
2519 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2520 offptr += entryno;
2521 offset = *offptr;
2523
2524 *result = offset;
2525 return true;
2526}
2527
2528/*
2529 * GetMultiXactInfo
2530 *
2531 * Returns information about the current MultiXact state, as of:
2532 * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
2533 * nextOffset: Next-to-be-assigned offset
2534 * oldestMultiXactId: Oldest MultiXact ID still in use
2535 * oldestOffset: Oldest offset still in use
2536 */
2537void
2539 MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
2540{
2542
2544 *nextOffset = MultiXactState->nextOffset;
2545 *oldestMultiXactId = MultiXactState->oldestMultiXactId;
2547 *oldestOffset = MultiXactState->oldestOffset;
2549
2550 *multixacts = nextMultiXactId - *oldestMultiXactId;
2551}
2552
2553/*
2554 * Multixact members can be removed once the multixacts that refer to them
2555 * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2556 * vacuum_multixact_freeze_table_age work together to make sure we never have
2557 * too many multixacts; we hope that, at least under normal circumstances,
2558 * this will also be sufficient to keep us from using too many offsets.
2559 * However, if the average multixact has many members, we might accumulate a
2560 * large amount of members, consuming disk space, while still using few enough
2561 * multixids that the multixid limits fail to trigger relminmxid advancement
2562 * by VACUUM.
2563 *
2564 * To prevent that, if the members space usage exceeds a threshold
2565 * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
2566 * autovacuum_multixact_freeze_max_age to a value just less than the number of
2567 * multixacts in use. We hope that this will quickly trigger autovacuuming on
2568 * the table or tables with the oldest relminmxid, thus allowing datminmxid
2569 * values to advance and removing some members.
2570 *
2571 * As the amount of the member space in use grows, we become more aggressive
2572 * in clamping this value. That not only causes autovacuum to ramp up, but
2573 * also makes any manual vacuums the user issues more aggressive. This
2574 * happens because vacuum_get_cutoffs() will clamp the freeze table and the
2575 * minimum freeze age cutoffs based on the effective
2576 * autovacuum_multixact_freeze_max_age this function returns. At the extreme,
2577 * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we clamp
2578 * freeze_max_age to zero, and every vacuum of any table will freeze every
2579 * multixact.
2580 */
2581int
2583{
2586 double fraction;
2587 int result;
2588 MultiXactId oldestMultiXactId;
2589 MultiXactOffset oldestOffset;
2590 MultiXactOffset nextOffset;
2591 uint64 members;
2592
2593 /* Read the current offsets and multixact usage. */
2594 GetMultiXactInfo(&multixacts, &nextOffset, &oldestMultiXactId, &oldestOffset);
2595 members = nextOffset - oldestOffset;
2596
2597 /* If member space utilization is low, no special action is required. */
2598 if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
2600
2601 /*
2602 * Compute a target for relminmxid advancement. The number of multixacts
2603 * we try to eliminate from the system is based on how far we are past
2604 * MULTIXACT_MEMBER_LOW_THRESHOLD.
2605 *
2606 * The way this formula works is that when members is exactly at the low
2607 * threshold, fraction = 0.0, and we set freeze_max_age equal to
2608 * mxid_age(oldestMultiXactId). As members grows further, towards the
2609 * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
2610 * shrinks from mxid_age(oldestMultiXactId) to 0. Beyond the high
2611 * threshold, fraction > 1.0 and the result is clamped to 0.
2612 */
2615
2616 /* fraction could be > 1.0, but lowest possible freeze age is zero */
2617 if (fraction >= 1.0)
2618 return 0;
2619
2621 result = multixacts - victim_multixacts;
2622
2623 /*
2624 * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
2625 * autovacuum less aggressive than it would otherwise be.
2626 */
2628}
2629
2630
2631/*
2632 * Delete members segments older than newOldestOffset
2633 */
2634static void
2640
2641/*
2642 * Delete offsets segments older than newOldestMulti
2643 */
2644static void
2646{
2647 /*
2648 * We step back one multixact to avoid passing a cutoff page that hasn't
2649 * been created yet in the rare case that oldestMulti would be the first
2650 * item on a page and oldestMulti == nextMulti. In that case, if we
2651 * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
2652 * detection.
2653 */
2656}
2657
2658/*
2659 * Remove all MultiXactOffset and MultiXactMember segments before the oldest
2660 * ones still of interest.
2661 *
2662 * This is only called on a primary as part of vacuum (via
2663 * vac_truncate_clog()). During recovery truncation is done by replaying
2664 * truncation WAL records logged here.
2665 *
2666 * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
2667 * is one of the databases preventing newOldestMulti from increasing.
2668 */
2669void
2671{
2672 MultiXactId oldestMulti;
2673 MultiXactId nextMulti;
2675 MultiXactOffset nextOffset;
2676
2680
2681 /*
2682 * We can only allow one truncation to happen at once. Otherwise parts of
2683 * members might vanish while we're doing lookups or similar. There's no
2684 * need to have an interlock with creating new multis or such, since those
2685 * are constrained by the limits (which only grow, never shrink).
2686 */
2688
2690 nextMulti = MultiXactState->nextMXact;
2691 nextOffset = MultiXactState->nextOffset;
2692 oldestMulti = MultiXactState->oldestMultiXactId;
2694
2695 /*
2696 * Make sure to only attempt truncation if there's values to truncate
2697 * away. In normal processing values shouldn't go backwards, but there's
2698 * some corner cases (due to bugs) where that's possible.
2699 */
2701 {
2703 return;
2704 }
2705
2706 /*
2707 * Compute up to where to truncate MultiXactMember. Lookup the
2708 * corresponding member offset for newOldestMulti for that.
2709 */
2710 if (newOldestMulti == nextMulti)
2711 {
2712 /* there are NO MultiXacts */
2713 newOldestOffset = nextOffset;
2714 }
2716 {
2717 ereport(LOG,
2718 (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
2719 newOldestMulti)));
2721 return;
2722 }
2723
2724 /*
2725 * On crash, MultiXactIdCreateFromMembers() can leave behind multixids
2726 * that were not yet written out and hence have zero offset on disk. If
2727 * such a multixid becomes oldestMulti, we won't be able to look up its
2728 * offset. That should be rare, so we don't try to do anything smart about
2729 * it. Just skip the truncation, and hope that by the next truncation
2730 * attempt, oldestMulti has advanced to a valid multixid.
2731 */
2732 if (newOldestOffset == 0)
2733 {
2734 ereport(LOG,
2735 (errmsg("cannot truncate up to MultiXact %u because it has invalid offset, skipping truncation",
2736 newOldestMulti)));
2738 return;
2739 }
2740
2741 elog(DEBUG1, "performing multixact truncation: "
2742 "oldestMulti %u (offsets segment %" PRIx64 "), "
2743 "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2748
2749 /*
2750 * Do truncation, and the WAL logging of the truncation, in a critical
2751 * section. That way offsets/members cannot get out of sync anymore, i.e.
2752 * once consistent the newOldestMulti will always exist in members, even
2753 * if we crashed in the wrong moment.
2754 */
2756
2757 /*
2758 * Prevent checkpoints from being scheduled concurrently. This is critical
2759 * because otherwise a truncation record might not be replayed after a
2760 * crash/basebackup, even though the state of the data directory would
2761 * require it.
2762 */
2765
2766 /* WAL log truncation */
2768
2769 /*
2770 * Update in-memory limits before performing the truncation, while inside
2771 * the critical section: Have to do it before truncation, to prevent
2772 * concurrent lookups of those values. Has to be inside the critical
2773 * section as otherwise a future call to this function would error out,
2774 * while looking up the oldest member in offsets, if our caller crashes
2775 * before updating the limits.
2776 */
2782
2783 /* First truncate members */
2785
2786 /* Then offsets */
2788
2790
2793}
2794
2795/*
2796 * Decide whether a MultiXactOffset page number is "older" for truncation
2797 * purposes. Analogous to CLOGPagePrecedes().
2798 *
2799 * Offsetting the values is optional, because MultiXactIdPrecedes() has
2800 * translational symmetry.
2801 */
2802static bool
2817
2818/*
2819 * Decide whether a MultiXactMember page number is "older" for truncation
2820 * purposes. There is no "invalid offset number" and members never wrap
2821 * around, so use the numbers verbatim.
2822 */
2823static bool
2828
2829static int
2831{
2833
2834 return errdetail("Could not access offset of multixact %u.", multixid);
2835}
2836
2837static int
2839{
2841
2842 if (MultiXactIdIsValid(context->multi))
2843 return errdetail("Could not access member of multixact %u at offset %" PRIu64 ".",
2844 context->multi, context->offset);
2845 else
2846 return errdetail("Could not access multixact member at offset %" PRIu64 ".",
2847 context->offset);
2848}
2849
2850/*
2851 * Decide which of two MultiXactIds is earlier.
2852 *
2853 * XXX do we need to do something special for InvalidMultiXactId?
2854 * (Doesn't look like it.)
2855 */
2856bool
2858{
2859 int32 diff = (int32) (multi1 - multi2);
2860
2861 return (diff < 0);
2862}
2863
2864/*
2865 * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
2866 *
2867 * XXX do we need to do something special for InvalidMultiXactId?
2868 * (Doesn't look like it.)
2869 */
2870bool
2872{
2873 int32 diff = (int32) (multi1 - multi2);
2874
2875 return (diff <= 0);
2876}
2877
2878
2879/*
2880 * Write a TRUNCATE xlog record
2881 *
2882 * We must flush the xlog record to disk before returning --- see notes in
2883 * TruncateCLOG().
2884 */
2885static void
2887 MultiXactId oldestMulti,
2888 MultiXactOffset oldestOffset)
2889{
2892
2893 xlrec.oldestMultiDB = oldestMultiDB;
2894 xlrec.oldestMulti = oldestMulti;
2895 xlrec.oldestOffset = oldestOffset;
2896
2901}
2902
2903/*
2904 * MULTIXACT resource manager's routines
2905 */
2906void
2908{
2909 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2910
2911 /* Backup blocks are not used in multixact records */
2913
2914 if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
2915 {
2916 int64 pageno;
2917
2918 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
2920 }
2921 else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
2922 {
2923 int64 pageno;
2924
2925 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
2927 }
2928 else if (info == XLOG_MULTIXACT_CREATE_ID)
2929 {
2933 int i;
2934
2935 /* Store the data back into the SLRU files */
2936 RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
2937 xlrec->members);
2938
2939 /* Make sure nextMXact/nextOffset are beyond what this record has */
2941 xlrec->moff + xlrec->nmembers);
2942
2943 /*
2944 * Make sure nextXid is beyond any XID mentioned in the record. This
2945 * should be unnecessary, since any XID found here ought to have other
2946 * evidence in the XLOG, but let's be safe.
2947 */
2948 max_xid = XLogRecGetXid(record);
2949 for (i = 0; i < xlrec->nmembers; i++)
2950 {
2951 if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
2952 max_xid = xlrec->members[i].xid;
2953 }
2954
2956 }
2957 else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
2958 {
2960 int64 pageno;
2961
2962 memcpy(&xlrec, XLogRecGetData(record),
2964
2965 elog(DEBUG1, "replaying multixact truncation: "
2966 "oldestMulti %u (offsets segment %" PRIx64 "), "
2967 "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2968 xlrec.oldestMulti,
2969 MultiXactIdToOffsetSegment(xlrec.oldestMulti),
2970 xlrec.oldestOffset,
2971 MXOffsetToMemberSegment(xlrec.oldestOffset));
2972
2973 /* should not be required, but more than cheap enough */
2975
2976 /*
2977 * Advance the horizon values, so they're current at the end of
2978 * recovery.
2979 */
2980 SetMultiXactIdLimit(xlrec.oldestMulti, xlrec.oldestMultiDB);
2981
2982 PerformMembersTruncation(xlrec.oldestOffset);
2983
2984 /*
2985 * During XLOG replay, latest_page_number isn't necessarily set up
2986 * yet; insert a suitable value to bypass the sanity test in
2987 * SimpleLruTruncate.
2988 */
2989 pageno = MultiXactIdToOffsetPage(xlrec.oldestMulti);
2990 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2991 pageno);
2992 PerformOffsetsTruncation(xlrec.oldestMulti);
2993
2995 }
2996 else
2997 elog(PANIC, "multixact_redo: unknown op code %u", info);
2998}
2999
3000/*
3001 * Entrypoint for sync.c to sync offsets files.
3002 */
3003int
3004multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
3005{
3006 return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
3007}
3008
3009/*
3010 * Entrypoint for sync.c to sync members files.
3011 */
3012int
3013multixactmemberssyncfiletag(const FileTag *ftag, char *path)
3014{
3015 return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
3016}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
int autovacuum_multixact_freeze_max_age
Definition autovacuum.c:132
static int32 next
Definition blutils.c:225
#define Min(x, y)
Definition c.h:1093
uint8_t uint8
Definition c.h:616
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
TransactionId MultiXactId
Definition c.h:748
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:552
uint64 MultiXactOffset
Definition c.h:750
int32_t int32
Definition c.h:614
uint64_t uint64
Definition c.h:619
uint16_t uint16
Definition c.h:617
uint32_t uint32
Definition c.h:618
#define MemSet(start, val, len)
Definition c.h:1109
uint32 TransactionId
Definition c.h:738
size_t Size
Definition c.h:691
int errcode(int sqlerrcode)
Definition elog.c:874
#define LOG
Definition elog.h:31
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:36
int int int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:29
#define PANIC
Definition elog.h:42
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
Datum difference(PG_FUNCTION_ARGS)
int multixact_offset_buffers
Definition globals.c:163
ProcNumber MyProcNumber
Definition globals.c:90
bool IsUnderPostmaster
Definition globals.c:120
int multixact_member_buffers
Definition globals.c:162
int MaxBackends
Definition globals.c:146
#define newval
GucSource
Definition guc.h:112
const char * str
#define dclist_container(type, membername, ptr)
Definition ilist.h:947
static uint32 dclist_count(const dclist_head *head)
Definition ilist.h:932
static void dclist_move_head(dclist_head *head, dlist_node *node)
Definition ilist.h:808
static dlist_node * dclist_tail_node(dclist_head *head)
Definition ilist.h:920
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition ilist.h:763
#define DCLIST_STATIC_INIT(name)
Definition ilist.h:282
static void dclist_push_head(dclist_head *head, dlist_node *node)
Definition ilist.h:693
static void dclist_init(dclist_head *head)
Definition ilist.h:671
#define dclist_foreach(iter, lhead)
Definition ilist.h:970
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
int j
Definition isn.c:78
int i
Definition isn.c:77
char * get_database_name(Oid dbid)
Definition lsyscache.c:1312
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1177
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1794
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition mcxt.c:1768
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
MemoryContext TopTransactionContext
Definition mcxt.c:171
void pfree(void *pointer)
Definition mcxt.c:1616
MemoryContext TopMemoryContext
Definition mcxt.c:166
void * palloc(Size size)
Definition mcxt.c:1387
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition memutils.h:170
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
#define NumMemberSlots
Definition multixact.c:215
static MultiXactId PreviousMultiXactId(MultiXactId multi)
Definition multixact.c:108
static SlruCtlData MultiXactOffsetCtlData
Definition multixact.c:116
void MultiXactShmemInit(void)
Definition multixact.c:1784
static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2)
Definition multixact.c:2824
static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
Definition multixact.c:968
static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
Definition multixact.c:1471
MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
Definition multixact.c:400
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
Definition multixact.c:2309
void ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
Definition multixact.c:685
static MultiXactId * MyOldestVisibleMXactIdSlot(void)
Definition multixact.c:253
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2857
char * mxstatus_to_string(MultiXactStatus status)
Definition multixact.c:1563
void multixact_redo(XLogReaderState *record)
Definition multixact.c:2907
static void PerformOffsetsTruncation(MultiXactId newOldestMulti)
Definition multixact.c:2645
void multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1729
#define debug_elog5(a, b, c, d, e)
Definition multixact.c:298
static void MultiXactIdSetOldestVisible(void)
Definition multixact.c:635
int multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
Definition multixact.c:3004
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result)
Definition multixact.c:2495
void PostPrepare_MultiXact(FullTransactionId fxid)
Definition multixact.c:1659
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition multixact.c:2055
#define MultiXactMemberCtl
Definition multixact.c:120
static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId endTruncOff, MultiXactOffset endTruncMemb)
Definition multixact.c:2886
void AtPrepare_MultiXact(void)
Definition multixact.c:1645
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2871
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition multixact.c:2258
static void mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition multixact.c:1518
void GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *nextOffset, MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
Definition multixact.c:2538
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition multixact.c:511
void MultiXactIdSetOldestMember(void)
Definition multixact.c:585
#define MULTIXACT_MEMBER_LOW_THRESHOLD
Definition multixact.c:98
static MemoryContext MXactContext
Definition multixact.c:286
static MultiXactId * OldestVisibleMXactId
Definition multixact.c:221
static int mxactMemberComparator(const void *arg1, const void *arg2)
Definition multixact.c:1398
static void ExtendMultiXactOffset(MultiXactId multi)
Definition multixact.c:2275
Size MultiXactShmemSize(void)
Definition multixact.c:1772
#define MultiXactOffsetCtl
Definition multixact.c:119
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition multixact.c:2009
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members)
Definition multixact.c:805
int multixactmemberssyncfiletag(const FileTag *ftag, char *path)
Definition multixact.c:3013
#define MAX_CACHE_ENTRIES
Definition multixact.c:284
static MultiXactId NextMultiXactId(MultiXactId multi)
Definition multixact.c:102
MultiXactId GetOldestMultiXactId(void)
Definition multixact.c:2370
void CheckPointMultiXact(void)
Definition multixact.c:2031
static int MultiXactOffsetIoErrorDetail(const void *opaque_data)
Definition multixact.c:2830
static MultiXactId * PreparedXactOldestMemberMXactIdSlot(ProcNumber procno)
Definition multixact.c:237
MultiXactId MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
Definition multixact.c:704
static Size MultiXactSharedStateShmemSize(void)
Definition multixact.c:1759
static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members)
Definition multixact.c:1428
static dclist_head MXactCache
Definition multixact.c:285
void TrimMultiXact(void)
Definition multixact.c:1896
#define debug_elog3(a, b, c)
Definition multixact.c:296
char * mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition multixact.c:1586
#define debug_elog4(a, b, c, d)
Definition multixact.c:297
void multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1744
static void PerformMembersTruncation(MultiXactOffset newOldestOffset)
Definition multixact.c:2635
static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
Definition multixact.c:2803
static MultiXactId * MyOldestMemberMXactIdSlot(void)
Definition multixact.c:225
int MultiXactMemberFreezeThreshold(void)
Definition multixact.c:2582
static void SetOldestOffset(void)
Definition multixact.c:2409
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition multixact.c:2231
static MultiXactId * OldestMemberMXactId
Definition multixact.c:220
static MultiXactStateData * MultiXactState
Definition multixact.c:219
#define NumVisibleSlots
Definition multixact.c:216
MultiXactId ReadNextMultiXactId(void)
Definition multixact.c:668
void BootStrapMultiXact(void)
Definition multixact.c:1855
#define debug_elog6(a, b, c, d, e, f)
Definition multixact.c:299
void multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1708
static int MultiXactMemberIoErrorDetail(const void *opaque_data)
Definition multixact.c:2838
MultiXactId MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, TransactionId xid2, MultiXactStatus status2)
Definition multixact.c:347
void TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
Definition multixact.c:2670
bool check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
Definition multixact.c:1835
bool check_multixact_member_buffers(int *newval, void **extra, GucSource source)
Definition multixact.c:1844
void AtEOXact_MultiXact(void)
Definition multixact.c:1617
#define MULTIXACT_MEMBER_HIGH_THRESHOLD
Definition multixact.c:99
static SlruCtlData MultiXactMemberCtlData
Definition multixact.c:117
#define debug_elog2(a, b)
Definition multixact.c:295
void StartupMultiXact(void)
Definition multixact.c:1871
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition multixact.c:2077
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition multixact.c:1161
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define XLOG_MULTIXACT_ZERO_MEM_PAGE
Definition multixact.h:68
#define XLOG_MULTIXACT_ZERO_OFF_PAGE
Definition multixact.h:67
#define FirstMultiXactId
Definition multixact.h:26
MultiXactStatus
Definition multixact.h:37
@ MultiXactStatusForShare
Definition multixact.h:39
@ MultiXactStatusForNoKeyUpdate
Definition multixact.h:40
@ MultiXactStatusNoKeyUpdate
Definition multixact.h:43
@ MultiXactStatusUpdate
Definition multixact.h:45
@ MultiXactStatusForUpdate
Definition multixact.h:41
@ MultiXactStatusForKeyShare
Definition multixact.h:38
#define ISUPDATE_from_mxstatus(status)
Definition multixact.h:51
#define InvalidMultiXactId
Definition multixact.h:25
#define XLOG_MULTIXACT_TRUNCATE_ID
Definition multixact.h:70
#define SizeOfMultiXactCreate
Definition multixact.h:80
#define SizeOfMultiXactTruncate
Definition multixact.h:93
#define XLOG_MULTIXACT_CREATE_ID
Definition multixact.h:69
#define MaxMultiXactId
Definition multixact.h:27
static int64 MultiXactIdToOffsetSegment(MultiXactId multi)
static int64 MXOffsetToMemberSegment(MultiXactOffset offset)
#define MXACT_MEMBER_BITS_PER_XACT
static int MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
#define MXACT_MEMBER_XACT_BITMASK
static int64 MXOffsetToMemberPage(MultiXactOffset32 offset)
#define MULTIXACT_OFFSETS_PER_PAGE
static int MXOffsetToMemberOffset(MultiXactOffset32 offset)
static int MultiXactIdToOffsetEntry(MultiXactId multi)
static int64 MultiXactIdToOffsetPage(MultiXactId multi)
#define MULTIXACT_MEMBERS_PER_PAGE
static int MXOffsetToFlagsOffset(MultiXactOffset32 offset)
static char * errmsg
#define ERRCODE_DATA_CORRUPTED
const void size_t len
static rewind_source * source
Definition pg_rewind.c:89
static char buf[DEFAULT_XLOG_SEG_SIZE]
void SendPostmasterSignal(PMSignalReason reason)
Definition pmsignal.c:165
@ PMSIGNAL_START_AUTOVAC_LAUNCHER
Definition pmsignal.h:39
#define qsort(a, b, c, d)
Definition port.h:495
unsigned int Oid
static int fb(int x)
#define FIRST_PREPARED_XACT_PROC_NUMBER
Definition proc.h:526
#define DELAY_CHKPT_START
Definition proc.h:136
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1401
int ProcNumber
Definition procnumber.h:24
Size add_size(Size s1, Size s2)
Definition shmem.c:485
Size mul_size(Size s1, Size s2)
Definition shmem.c:500
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:381
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
Definition slru.c:254
void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
Definition slru.c:1355
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, const void *opaque_data)
Definition slru.c:533
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
Definition slru.c:778
int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
Definition slru.c:1864
int SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
Definition slru.c:380
void SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno)
Definition slru.c:449
void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
Definition slru.c:1441
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, const void *opaque_data)
Definition slru.c:637
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition slru.c:200
bool check_slru_buffers(const char *name, int *newval)
Definition slru.c:360
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition slru.h:171
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition slru.h:196
PGPROC * MyProc
Definition proc.c:68
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
Definition sync.h:51
TransactionId xid
Definition multixact.h:57
MultiXactStatus status
Definition multixact.h:58
MultiXactId multiWrapLimit
Definition multixact.c:158
MultiXactId multiStopLimit
Definition multixact.c:157
MultiXactId multiWarnLimit
Definition multixact.c:156
MultiXactId multiVacLimit
Definition multixact.c:155
MultiXactOffset nextOffset
Definition multixact.c:135
MultiXactId nextMXact
Definition multixact.c:132
MultiXactId oldestMultiXactId
Definition multixact.c:145
MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER]
Definition multixact.c:209
MultiXactOffset oldestOffset
Definition multixact.c:152
int delayChkptFlags
Definition proc.h:257
dlist_node * cur
Definition ilist.h:179
MultiXactId multi
Definition multixact.c:278
dlist_node node
Definition multixact.c:280
MultiXactMember members[FLEXIBLE_ARRAY_MEMBER]
Definition multixact.c:281
@ SYNC_HANDLER_MULTIXACT_MEMBER
Definition sync.h:41
@ SYNC_HANDLER_MULTIXACT_OFFSET
Definition sync.h:40
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
ProcNumber TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
Definition twophase.c:911
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition twophase.c:1274
#define TWOPHASE_RM_MULTIXACT_ID
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition varsup.c:308
bool IsTransactionState(void)
Definition xact.c:389
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:943
bool RecoveryInProgress(void)
Definition xlog.c:6444
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2767
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
Definition xloginsert.c:544
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:479
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:369
void XLogBeginInsert(void)
Definition xloginsert.c:153
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:409
#define XLogRecGetData(decoder)
Definition xlogreader.h:414
#define XLogRecGetXid(decoder)
Definition xlogreader.h:411
#define XLogRecHasAnyBlockRefs(decoder)
Definition xlogreader.h:416
bool InRecovery
Definition xlogutils.c:50