PostgreSQL Source Code git master
Loading...
Searching...
No Matches
multixact.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * multixact.c
4 * PostgreSQL multi-transaction-log manager
5 *
6 * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 * TransactionId and a set of flag bits. The name is a bit historical:
10 * originally, a MultiXactId consisted of more than one TransactionId (except
11 * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 * legitimate to have MultiXactIds that only include a single Xid.
13 *
14 * The meaning of the flag bits is opaque to this module, but they are mostly
15 * used in heapam.c to identify lock modes that each of the member transactions
16 * is holding on any given tuple. This module just contains support to store
17 * and retrieve the arrays.
18 *
19 * We use two SLRU areas, one for storing the offsets at which the data
20 * starts for each MultiXactId in the other one. This trick allows us to
21 * store variable length arrays of TransactionIds. (We could alternatively
22 * use one area containing counts and TransactionIds, with valid MultiXactId
23 * values pointing at slots containing counts; but that way seems less robust
24 * since it would get completely confused if someone inquired about a bogus
25 * MultiXactId that pointed to an intermediate slot containing an XID.)
26 *
27 * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 * MEMBERs page is initialized to zeroes, as well as an
29 * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 * This module ignores the WAL rule "write xlog before data," because it
31 * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 * rule. The only way for the MXID to be referenced from any data page is for
33 * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 * an XLOG record that must follow ours. The normal LSN interlock between the
35 * data page and that XLOG record will ensure that our XLOG record reaches
36 * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 * module's XLOG records completely rebuild the data entered since the last
40 * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 * before each checkpoint is considered complete.
42 *
43 * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 * crashes and ensure that MXID and offset numbering increases monotonically
45 * across a crash. We do this in the same way as it's done for transaction
46 * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 * could need to worry about, and we just make sure that at the end of
48 * replay, the next-MXID and next-offset counters are at least as large as
49 * anything we saw during replay.
50 *
51 * We are able to remove segments no longer necessary by carefully tracking
52 * each table's used values: during vacuum, any multixact older than a certain
53 * value is removed; the cutoff value is stored in pg_class. The minimum value
54 * across all tables in each database is stored in pg_database, and the global
55 * minimum across all databases is part of pg_control and is kept in shared
56 * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 *
58 * When new multixactid values are to be created, care is taken that the
59 * counter does not fall within the wraparound horizon considering the global
60 * minimum value.
61 *
62 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
63 * Portions Copyright (c) 1994, Regents of the University of California
64 *
65 * src/backend/access/transam/multixact.c
66 *
67 *-------------------------------------------------------------------------
68 */
69#include "postgres.h"
70
71#include "access/multixact.h"
73#include "access/slru.h"
74#include "access/twophase.h"
76#include "access/xlog.h"
77#include "access/xloginsert.h"
78#include "access/xlogutils.h"
79#include "miscadmin.h"
80#include "pg_trace.h"
81#include "pgstat.h"
83#include "storage/pmsignal.h"
84#include "storage/proc.h"
85#include "storage/procarray.h"
86#include "utils/guc_hooks.h"
88#include "utils/lsyscache.h"
89#include "utils/memutils.h"
90
91
92/*
93 * Thresholds used to keep members disk usage in check when multixids have a
94 * lot of members. When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
95 * starts freezing multixids more aggressively, even if the normal multixid
96 * age limits haven't been reached yet.
97 */
98#define MULTIXACT_MEMBER_LOW_THRESHOLD UINT64CONST(2000000000)
99#define MULTIXACT_MEMBER_HIGH_THRESHOLD UINT64CONST(4000000000)
100
101static inline MultiXactId
103{
104 return multi == MaxMultiXactId ? FirstMultiXactId : multi + 1;
105}
106
107static inline MultiXactId
109{
110 return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
111}
112
113/*
114 * Links to shared-memory data structures for MultiXact control
115 */
118
119#define MultiXactOffsetCtl (&MultiXactOffsetCtlData)
120#define MultiXactMemberCtl (&MultiXactMemberCtlData)
121
122/*
123 * MultiXact state shared across all backends. All this state is protected
124 * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
125 * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
126 * concurrency's sake, we avoid holding more than one of these locks at a
127 * time.)
128 */
129typedef struct MultiXactStateData
130{
131 /* next-to-be-assigned MultiXactId */
133
134 /* next-to-be-assigned offset */
136
137 /* Have we completed multixact startup? */
139
140 /*
141 * Oldest multixact that is still potentially referenced by a relation.
142 * Anything older than this should not be consulted. These values are
143 * updated by vacuum.
144 */
147
148 /*
149 * Oldest multixact offset that is potentially referenced by a multixact
150 * referenced by a relation.
151 */
153
154 /* support for anti-wraparound measures */
159
160 /*
161 * Per-backend data starts here. We have two arrays stored in the area
162 * immediately following the MultiXactStateData struct. Each is indexed by
163 * ProcNumber.
164 *
165 * In both arrays, there's a slot for all normal backends
166 * (0..MaxBackends-1) followed by a slot for max_prepared_xacts prepared
167 * transactions.
168 *
169 * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
170 * transaction(s) could possibly be a member of, or InvalidMultiXactId
171 * when the backend has no live transaction that could possibly be a
172 * member of a MultiXact. Each backend sets its entry to the current
173 * nextMXact counter just before first acquiring a shared lock in a given
174 * transaction, and clears it at transaction end. (This works because only
175 * during or after acquiring a shared lock could an XID possibly become a
176 * member of a MultiXact, and that MultiXact would have to be created
177 * during or after the lock acquisition.)
178 *
179 * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
180 * current transaction(s) think is potentially live, or InvalidMultiXactId
181 * when not in a transaction or not in a transaction that's paid any
182 * attention to MultiXacts yet. This is computed when first needed in a
183 * given transaction, and cleared at transaction end. We can compute it
184 * as the minimum of the valid OldestMemberMXactId[] entries at the time
185 * we compute it (using nextMXact if none are valid). Each backend is
186 * required not to attempt to access any SLRU data for MultiXactIds older
187 * than its own OldestVisibleMXactId[] setting; this is necessary because
188 * the relevant SLRU data can be concurrently truncated away.
189 *
190 * The oldest valid value among all of the OldestMemberMXactId[] and
191 * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
192 * possible value still having any live member transaction -- OldestMxact.
193 * Any value older than that is typically removed from tuple headers, or
194 * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
195 * remove an individual MultiXact xmax whose value is >= its OldestMxact
196 * cutoff, though typically only when no individual member XID is still
197 * running. See FreezeMultiXactId for full details.
198 *
199 * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
200 * or the oldest extant Multi remaining in the table is used as the new
201 * pg_class.relminmxid value (whichever is earlier). The minimum of all
202 * relminmxid values in each database is stored in pg_database.datminmxid.
203 * In turn, the minimum of all of those values is stored in pg_control.
204 * This is used as the truncation point for pg_multixact when unneeded
205 * segments get removed by vac_truncate_clog() during vacuuming.
206 */
209
210/*
211 * Size of OldestMemberMXactId and OldestVisibleMXactId arrays.
212 */
213#define MaxOldestSlot (MaxBackends + max_prepared_xacts)
214
215/* Pointers to the state data in shared memory */
219
220
221/*
222 * Definitions for the backend-local MultiXactId cache.
223 *
224 * We use this cache to store known MultiXacts, so we don't need to go to
225 * SLRU areas every time.
226 *
227 * The cache lasts for the duration of a single transaction, the rationale
228 * for this being that most entries will contain our own TransactionId and
229 * so they will be uninteresting by the time our next transaction starts.
230 * (XXX not clear that this is correct --- other members of the MultiXact
231 * could hang around longer than we did. However, it's not clear what a
232 * better policy for flushing old cache entries would be.) FIXME actually
233 * this is plain wrong now that multixact's may contain update Xids.
234 *
235 * We allocate the cache entries in a memory context that is deleted at
236 * transaction end, so we don't need to do retail freeing of entries.
237 */
245
246#define MAX_CACHE_ENTRIES 256
249
250#ifdef MULTIXACT_DEBUG
251#define debug_elog2(a,b) elog(a,b)
252#define debug_elog3(a,b,c) elog(a,b,c)
253#define debug_elog4(a,b,c,d) elog(a,b,c,d)
254#define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
255#define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
256#else
257#define debug_elog2(a,b)
258#define debug_elog3(a,b,c)
259#define debug_elog4(a,b,c,d)
260#define debug_elog5(a,b,c,d,e)
261#define debug_elog6(a,b,c,d,e,f)
262#endif
263
264/* internal MultiXactId management */
265static void MultiXactIdSetOldestVisible(void);
266static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
267 int nmembers, MultiXactMember *members);
268static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
269
270/* MultiXact cache management */
271static int mxactMemberComparator(const void *arg1, const void *arg2);
272static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
273static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
274static void mXactCachePut(MultiXactId multi, int nmembers,
275 MultiXactMember *members);
276
277/* management of SLRU infrastructure */
280static void ExtendMultiXactOffset(MultiXactId multi);
281static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
282static void SetOldestOffset(void);
283static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
284static void WriteMTruncateXlogRec(Oid oldestMultiDB,
287
288
289/*
290 * MultiXactIdCreate
291 * Construct a MultiXactId representing two TransactionIds.
292 *
293 * The two XIDs must be different, or be requesting different statuses.
294 *
295 * NB - we don't worry about our local MultiXactId cache here, because that
296 * is handled by the lower-level routines.
297 */
301{
303 MultiXactMember members[2];
304
307
309
310 /* MultiXactIdSetOldestMember() must have been called already. */
312
313 /*
314 * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
315 * are still running. In typical usage, xid2 will be our own XID and the
316 * caller just did a check on xid1, so it'd be wasted effort.
317 */
318
319 members[0].xid = xid1;
320 members[0].status = status1;
321 members[1].xid = xid2;
322 members[1].status = status2;
323
325
326 debug_elog3(DEBUG2, "Create: %s",
327 mxid_to_string(newMulti, 2, members));
328
329 return newMulti;
330}
331
332/*
333 * MultiXactIdExpand
334 * Add a TransactionId to a pre-existing MultiXactId.
335 *
336 * If the TransactionId is already a member of the passed MultiXactId with the
337 * same status, just return it as-is.
338 *
339 * Note that we do NOT actually modify the membership of a pre-existing
340 * MultiXactId; instead we create a new one. This is necessary to avoid
341 * a race condition against code trying to wait for one MultiXactId to finish;
342 * see notes in heapam.c.
343 *
344 * NB - we don't worry about our local MultiXactId cache here, because that
345 * is handled by the lower-level routines.
346 *
347 * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
348 * one upgraded by pg_upgrade from a cluster older than this feature) are not
349 * passed in.
350 */
353{
355 MultiXactMember *members;
357 int nmembers;
358 int i;
359 int j;
360
363
364 /* MultiXactIdSetOldestMember() must have been called already. */
366
367 debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
368 multi, xid, mxstatus_to_string(status));
369
370 /*
371 * Note: we don't allow for old multis here. The reason is that the only
372 * caller of this function does a check that the multixact is no longer
373 * running.
374 */
375 nmembers = GetMultiXactIdMembers(multi, &members, false, false);
376
377 if (nmembers < 0)
378 {
379 MultiXactMember member;
380
381 /*
382 * The MultiXactId is obsolete. This can only happen if all the
383 * MultiXactId members stop running between the caller checking and
384 * passing it to us. It would be better to return that fact to the
385 * caller, but it would complicate the API and it's unlikely to happen
386 * too often, so just deal with it by creating a singleton MultiXact.
387 */
388 member.xid = xid;
389 member.status = status;
391
392 debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
393 multi, newMulti);
394 return newMulti;
395 }
396
397 /*
398 * If the TransactionId is already a member of the MultiXactId with the
399 * same status, just return the existing MultiXactId.
400 */
401 for (i = 0; i < nmembers; i++)
402 {
403 if (TransactionIdEquals(members[i].xid, xid) &&
404 (members[i].status == status))
405 {
406 debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
407 xid, multi);
408 pfree(members);
409 return multi;
410 }
411 }
412
413 /*
414 * Determine which of the members of the MultiXactId are still of
415 * interest. This is any running transaction, and also any transaction
416 * that grabbed something stronger than just a lock and was committed. (An
417 * update that aborted is of no interest here; and having more than one
418 * update Xid in a multixact would cause errors elsewhere.)
419 *
420 * Removing dead members is not just an optimization: freezing of tuples
421 * whose Xmax are multis depends on this behavior.
422 *
423 * Note we have the same race condition here as above: j could be 0 at the
424 * end of the loop.
425 */
426 newMembers = palloc_array(MultiXactMember, nmembers + 1);
427
428 for (i = 0, j = 0; i < nmembers; i++)
429 {
430 if (TransactionIdIsInProgress(members[i].xid) ||
431 (ISUPDATE_from_mxstatus(members[i].status) &&
432 TransactionIdDidCommit(members[i].xid)))
433 {
434 newMembers[j].xid = members[i].xid;
435 newMembers[j++].status = members[i].status;
436 }
437 }
438
439 newMembers[j].xid = xid;
440 newMembers[j++].status = status;
442
443 pfree(members);
445
446 debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
447
448 return newMulti;
449}
450
451/*
452 * MultiXactIdIsRunning
453 * Returns whether a MultiXactId is "running".
454 *
455 * We return true if at least one member of the given MultiXactId is still
456 * running. Note that a "false" result is certain not to change,
457 * because it is not legal to add members to an existing MultiXactId.
458 *
459 * Caller is expected to have verified that the multixact does not come from
460 * a pg_upgraded share-locked tuple.
461 */
462bool
464{
465 MultiXactMember *members;
466 int nmembers;
467 int i;
468
469 debug_elog3(DEBUG2, "IsRunning %u?", multi);
470
471 /*
472 * "false" here means we assume our callers have checked that the given
473 * multi cannot possibly come from a pg_upgraded database.
474 */
475 nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
476
477 if (nmembers <= 0)
478 {
479 debug_elog2(DEBUG2, "IsRunning: no members");
480 return false;
481 }
482
483 /*
484 * Checking for myself is cheap compared to looking in shared memory;
485 * return true if any live subtransaction of the current top-level
486 * transaction is a member.
487 *
488 * This is not needed for correctness, it's just a fast path.
489 */
490 for (i = 0; i < nmembers; i++)
491 {
492 if (TransactionIdIsCurrentTransactionId(members[i].xid))
493 {
494 debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
495 pfree(members);
496 return true;
497 }
498 }
499
500 /*
501 * This could be made faster by having another entry point in procarray.c,
502 * walking the PGPROC array only once for all the members. But in most
503 * cases nmembers should be small enough that it doesn't much matter.
504 */
505 for (i = 0; i < nmembers; i++)
506 {
507 if (TransactionIdIsInProgress(members[i].xid))
508 {
509 debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
510 i, members[i].xid);
511 pfree(members);
512 return true;
513 }
514 }
515
516 pfree(members);
517
518 debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
519
520 return false;
521}
522
523/*
524 * MultiXactIdSetOldestMember
525 * Save the oldest MultiXactId this transaction could be a member of.
526 *
527 * We set the OldestMemberMXactId for a given transaction the first time it's
528 * going to do some operation that might require a MultiXactId (tuple lock,
529 * update or delete). We need to do this even if we end up using a
530 * TransactionId instead of a MultiXactId, because there is a chance that
531 * another transaction would add our XID to a MultiXactId.
532 *
533 * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
534 * be called just before doing any such possibly-MultiXactId-able operation.
535 */
536void
538{
540 {
541 MultiXactId nextMXact;
542
543 /*
544 * You might think we don't need to acquire a lock here, since
545 * fetching and storing of TransactionIds is probably atomic, but in
546 * fact we do: suppose we pick up nextMXact and then lose the CPU for
547 * a long time. Someone else could advance nextMXact, and then
548 * another someone else could compute an OldestVisibleMXactId that
549 * would be after the value we are going to store when we get control
550 * back. Which would be wrong.
551 *
552 * Note that a shared lock is sufficient, because it's enough to stop
553 * someone from advancing nextMXact; and nobody else could be trying
554 * to write to our OldestMember entry, only reading (and we assume
555 * storing it is atomic.)
556 */
558
559 nextMXact = MultiXactState->nextMXact;
560
562
564
565 debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
566 MyProcNumber, nextMXact);
567 }
568}
569
570/*
571 * MultiXactIdSetOldestVisible
572 * Save the oldest MultiXactId this transaction considers possibly live.
573 *
574 * We set the OldestVisibleMXactId for a given transaction the first time
575 * it's going to inspect any MultiXactId. Once we have set this, we are
576 * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
577 * won't be truncated away.
578 *
579 * The value to set is the oldest of nextMXact and all the valid per-backend
580 * OldestMemberMXactId[] entries. Because of the locking we do, we can be
581 * certain that no subsequent call to MultiXactIdSetOldestMember can set
582 * an OldestMemberMXactId[] entry older than what we compute here. Therefore
583 * there is no live transaction, now or later, that can be a member of any
584 * MultiXactId older than the OldestVisibleMXactId we compute here.
585 */
586static void
614
615/*
616 * ReadNextMultiXactId
617 * Return the next MultiXactId to be assigned, but don't allocate it
618 */
621{
623
624 /* XXX we could presumably do this without a lock. */
628
629 return mxid;
630}
631
632/*
633 * ReadMultiXactIdRange
634 * Get the range of IDs that may still be referenced by a relation.
635 */
636void
644
645
646/*
647 * MultiXactIdCreateFromMembers
648 * Make a new MultiXactId from the specified set of members
649 *
650 * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
651 * given TransactionIds as members. Returns the newly created MultiXactId.
652 *
653 * NB: the passed members[] array will be sorted in-place.
654 */
657{
658 MultiXactId multi;
659 MultiXactOffset offset;
661
662 debug_elog3(DEBUG2, "Create: %s",
663 mxid_to_string(InvalidMultiXactId, nmembers, members));
664
665 /*
666 * See if the same set of members already exists in our cache; if so, just
667 * re-use that MultiXactId. (Note: it might seem that looking in our
668 * cache is insufficient, and we ought to search disk to see if a
669 * duplicate definition already exists. But since we only ever create
670 * MultiXacts containing our own XID, in most cases any such MultiXacts
671 * were in fact created by us, and so will be in our cache. There are
672 * corner cases where someone else added us to a MultiXact without our
673 * knowledge, but it's not worth checking for.)
674 */
675 multi = mXactCacheGetBySet(nmembers, members);
676 if (MultiXactIdIsValid(multi))
677 {
678 debug_elog2(DEBUG2, "Create: in cache!");
679 return multi;
680 }
681
682 /* Verify that there is a single update Xid among the given members. */
683 {
684 int i;
685 bool has_update = false;
686
687 for (i = 0; i < nmembers; i++)
688 {
689 if (ISUPDATE_from_mxstatus(members[i].status))
690 {
691 if (has_update)
692 elog(ERROR, "new multixact has more than one updating member: %s",
693 mxid_to_string(InvalidMultiXactId, nmembers, members));
694 has_update = true;
695 }
696 }
697 }
698
699 /* Load the injection point before entering the critical section */
700 INJECTION_POINT_LOAD("multixact-create-from-members");
701
702 /*
703 * Assign the MXID and offsets range to use, and make sure there is space
704 * in the OFFSETs and MEMBERs files. NB: this routine does
705 * START_CRIT_SECTION().
706 *
707 * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
708 * that we've called MultiXactIdSetOldestMember here. This is because
709 * this routine is used in some places to create new MultiXactIds of which
710 * the current backend is not a member, notably during freezing of multis
711 * in vacuum. During vacuum, in particular, it would be unacceptable to
712 * keep OldestMulti set, in case it runs for long.
713 */
714 multi = GetNewMultiXactId(nmembers, &offset);
715
716 INJECTION_POINT_CACHED("multixact-create-from-members", NULL);
717
718 /* Make an XLOG entry describing the new MXID. */
719 xlrec.mid = multi;
720 xlrec.moff = offset;
721 xlrec.nmembers = nmembers;
722
723 /*
724 * XXX Note: there's a lot of padding space in MultiXactMember. We could
725 * find a more compact representation of this Xlog record -- perhaps all
726 * the status flags in one XLogRecData, then all the xids in another one?
727 * Not clear that it's worth the trouble though.
728 */
731 XLogRegisterData(members, nmembers * sizeof(MultiXactMember));
732
734
735 /* Now enter the information into the OFFSETs and MEMBERs logs */
736 RecordNewMultiXact(multi, offset, nmembers, members);
737
738 /* Done with critical section */
740
741 /* Store the new MultiXactId in the local cache, too */
742 mXactCachePut(multi, nmembers, members);
743
744 debug_elog2(DEBUG2, "Create: all done");
745
746 return multi;
747}
748
749/*
750 * RecordNewMultiXact
751 * Write info about a new multixact into the offsets and members files
752 *
753 * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
754 * use it.
755 */
756static void
758 int nmembers, MultiXactMember *members)
759{
760 int64 pageno;
762 int entryno;
763 int slotno;
767 int next_entryno;
770 LWLock *lock;
772
773 /* position of this multixid in the offsets SLRU area */
774 pageno = MultiXactIdToOffsetPage(multi);
776
777 /* position of the next multixid */
778 next = NextMultiXactId(multi);
781
782 /*
783 * Set the starting offset of this multixid's members.
784 *
785 * In the common case, it was already be set by the previous
786 * RecordNewMultiXact call, as this was the next multixid of the previous
787 * multixid. But if multiple backends are generating multixids
788 * concurrently, we might race ahead and get called before the previous
789 * multixid.
790 */
793
794 /*
795 * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
796 * to complain about if there's any I/O error. This is kinda bogus, but
797 * since the errors will always give the full pathname, it should be clear
798 * enough that a MultiXactId is really involved. Perhaps someday we'll
799 * take the trouble to generalize the slru.c error reporting code.
800 */
801 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
802 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
803 offptr += entryno;
804
805 if (*offptr != offset)
806 {
807 /* should already be set to the correct value, or not at all */
808 Assert(*offptr == 0);
809 *offptr = offset;
810 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
811 }
812
813 /*
814 * Set the next multixid's offset to the end of this multixid's members.
815 */
816 if (next_pageno == pageno)
817 {
818 next_offptr = offptr + 1;
819 }
820 else
821 {
822 /* must be the first entry on the page */
824
825 /* Swap the lock for a lock on the next page */
826 LWLockRelease(lock);
829
831 next_offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
833 }
834
835 /* Like in GetNewMultiXactId(), skip over offset 0 */
836 next_offset = offset + nmembers;
837 if (next_offset == 0)
838 next_offset = 1;
839 if (*next_offptr != next_offset)
840 {
841 /* should already be set to the correct value, or not at all */
842 Assert(*next_offptr == 0);
844 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
845 }
846
847 /* Release MultiXactOffset SLRU lock. */
848 LWLockRelease(lock);
849
850 prev_pageno = -1;
851
852 for (int i = 0; i < nmembers; i++, offset++)
853 {
857 int bshift;
858 int flagsoff;
859 int memberoff;
860
861 Assert(members[i].status <= MultiXactStatusUpdate);
862
863 pageno = MXOffsetToMemberPage(offset);
867
868 if (pageno != prev_pageno)
869 {
870 /*
871 * MultiXactMember SLRU page is changed so check if this new page
872 * fall into the different SLRU bank then release the old bank's
873 * lock and acquire lock on the new bank.
874 */
876 if (lock != prevlock)
877 {
878 if (prevlock != NULL)
880
882 prevlock = lock;
883 }
884 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
885 prev_pageno = pageno;
886 }
887
889 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
890
891 *memberptr = members[i].xid;
892
893 flagsptr = (uint32 *)
894 (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
895
897 flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
898 flagsval |= (members[i].status << bshift);
900
901 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
902 }
903
904 if (prevlock != NULL)
906}
907
908/*
909 * GetNewMultiXactId
910 * Get the next MultiXactId.
911 *
912 * Also, reserve the needed amount of space in the "members" area. The
913 * starting offset of the reserved space is returned in *offset.
914 *
915 * This may generate XLOG records for expansion of the offsets and/or members
916 * files. Unfortunately, we have to do that while holding MultiXactGenLock
917 * to avoid race conditions --- the XLOG record for zeroing a page must appear
918 * before any backend can possibly try to store data in that page!
919 *
920 * We start a critical section before advancing the shared counters. The
921 * caller must end the critical section after writing SLRU data.
922 */
923static MultiXactId
924GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
925{
926 MultiXactId result;
927 MultiXactOffset nextOffset;
928
929 debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
930
931 /* safety check, we should never get this far in a HS standby */
932 if (RecoveryInProgress())
933 elog(ERROR, "cannot assign MultiXactIds during recovery");
934
936
937 /* Assign the MXID */
938 result = MultiXactState->nextMXact;
939
940 /*----------
941 * Check to see if it's safe to assign another MultiXactId. This protects
942 * against catastrophic data loss due to multixact wraparound. The basic
943 * rules are:
944 *
945 * If we're past multiVacLimit or the safe threshold for member storage
946 * space, or we don't know what the safe threshold for member storage is,
947 * start trying to force autovacuum cycles.
948 * If we're past multiWarnLimit, start issuing warnings.
949 * If we're past multiStopLimit, refuse to create new MultiXactIds.
950 *
951 * Note these are pretty much the same protections in GetNewTransactionId.
952 *----------
953 */
955 {
956 /*
957 * For safety's sake, we release MultiXactGenLock while sending
958 * signals, warnings, etc. This is not so much because we care about
959 * preserving concurrency in this situation, as to avoid any
960 * possibility of deadlock while doing get_database_name(). First,
961 * copy all the shared values we'll need in this path.
962 */
963 MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
964 MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
965 MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
967
969
970 if (IsUnderPostmaster &&
971 !MultiXactIdPrecedes(result, multiStopLimit))
972 {
974
975 /*
976 * Immediately kick autovacuum into action as we're already in
977 * ERROR territory.
978 */
980
981 /* complain even if that DB has disappeared */
982 if (oldest_datname)
985 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
987 errhint("Execute a database-wide VACUUM in that database.\n"
988 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
989 else
992 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
994 errhint("Execute a database-wide VACUUM in that database.\n"
995 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
996 }
997
998 /*
999 * To avoid swamping the postmaster with signals, we issue the autovac
1000 * request only once per 64K multis generated. This still gives
1001 * plenty of chances before we get into real trouble.
1002 */
1003 if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
1005
1006 if (!MultiXactIdPrecedes(result, multiWarnLimit))
1007 {
1009
1010 /* complain even if that DB has disappeared */
1011 if (oldest_datname)
1013 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1014 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1015 multiWrapLimit - result,
1017 multiWrapLimit - result),
1018 errhint("Execute a database-wide VACUUM in that database.\n"
1019 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1020 else
1022 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1023 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1024 multiWrapLimit - result,
1026 multiWrapLimit - result),
1027 errhint("Execute a database-wide VACUUM in that database.\n"
1028 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1029 }
1030
1031 /* Re-acquire lock and start over */
1033 result = MultiXactState->nextMXact;
1034 }
1035
1036 /*
1037 * Make sure there is room for the next MXID in the file. Assigning this
1038 * MXID sets the next MXID's offset already.
1039 */
1041
1042 /*
1043 * Reserve the members space, similarly to above.
1044 */
1045 nextOffset = MultiXactState->nextOffset;
1046
1047 /*
1048 * Offsets are 64-bit integers and will never wrap around. Firstly, it
1049 * would take an unrealistic amount of time and resources to consume 2^64
1050 * offsets. Secondly, multixid creation is WAL-logged, so you would run
1051 * out of LSNs before reaching offset wraparound. Nevertheless, check for
1052 * wraparound as a sanity check.
1053 */
1054 if (nextOffset + nmembers < nextOffset)
1055 ereport(ERROR,
1057 errmsg("MultiXact members would wrap around")));
1058 *offset = nextOffset;
1059
1060 ExtendMultiXactMember(nextOffset, nmembers);
1061
1062 /*
1063 * Critical section from here until caller has written the data into the
1064 * just-reserved SLRU space; we don't want to error out with a partly
1065 * written MultiXact structure. (In particular, failing to write our
1066 * start offset after advancing nextMXact would effectively corrupt the
1067 * previous MultiXact.)
1068 */
1070
1071 /*
1072 * Advance counters. As in GetNewTransactionId(), this must not happen
1073 * until after file extension has succeeded!
1074 */
1076 MultiXactState->nextOffset += nmembers;
1077
1079
1080 debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64,
1081 result, *offset);
1082 return result;
1083}
1084
1085/*
1086 * GetMultiXactIdMembers
1087 * Return the set of MultiXactMembers that make up a MultiXactId
1088 *
1089 * Return value is the number of members found, or -1 if there are none,
1090 * and *members is set to a newly palloc'ed array of members. It's the
1091 * caller's responsibility to free it when done with it.
1092 *
1093 * from_pgupgrade must be passed as true if and only if only the multixact
1094 * corresponds to a value from a tuple that was locked in a 9.2-or-older
1095 * installation and later pg_upgrade'd (that is, the infomask is
1096 * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1097 * can still be running, so we return -1 just like for an empty multixact
1098 * without any further checking. It would be wrong to try to resolve such a
1099 * multixact: either the multixact is within the current valid multixact
1100 * range, in which case the returned result would be bogus, or outside that
1101 * range, in which case an error would be raised.
1102 *
1103 * In all other cases, the passed multixact must be within the known valid
1104 * range, that is, greater than or equal to oldestMultiXactId, and less than
1105 * nextMXact. Otherwise, an error is raised.
1106 *
1107 * isLockOnly must be set to true if caller is certain that the given multi
1108 * is used only to lock tuples; can be false without loss of correctness,
1109 * but passing a true means we can return quickly without checking for
1110 * old updates.
1111 */
1112int
1114 bool from_pgupgrade, bool isLockOnly)
1115{
1116 int64 pageno;
1118 int entryno;
1119 int slotno;
1121 MultiXactOffset offset;
1123 int length;
1125 MultiXactId nextMXact;
1126 MultiXactMember *ptr;
1127 LWLock *lock;
1128
1129 debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1130
1131 if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1132 {
1133 *members = NULL;
1134 return -1;
1135 }
1136
1137 /* See if the MultiXactId is in the local cache */
1138 length = mXactCacheGetById(multi, members);
1139 if (length >= 0)
1140 {
1141 debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1142 mxid_to_string(multi, length, *members));
1143 return length;
1144 }
1145
1146 /* Set our OldestVisibleMXactId[] entry if we didn't already */
1148
1149 /*
1150 * If we know the multi is used only for locking and not for updates, then
1151 * we can skip checking if the value is older than our oldest visible
1152 * multi. It cannot possibly still be running.
1153 */
1154 if (isLockOnly &&
1156 {
1157 debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
1158 *members = NULL;
1159 return -1;
1160 }
1161
1162 /*
1163 * We check known limits on MultiXact before resorting to the SLRU area.
1164 *
1165 * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1166 * useful; it has already been removed, or will be removed shortly, by
1167 * truncation. If one is passed, an error is raised.
1168 *
1169 * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1170 * implies undetected ID wraparound has occurred. This raises a hard
1171 * error.
1172 *
1173 * Shared lock is enough here since we aren't modifying any global state.
1174 * Acquire it just long enough to grab the current counter values.
1175 */
1177
1179 nextMXact = MultiXactState->nextMXact;
1180
1182
1183 if (MultiXactIdPrecedes(multi, oldestMXact))
1184 ereport(ERROR,
1186 errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1187 multi)));
1188
1189 if (!MultiXactIdPrecedes(multi, nextMXact))
1190 ereport(ERROR,
1192 errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1193 multi)));
1194
1195 /*
1196 * Find out the offset at which we need to start reading MultiXactMembers
1197 * and the number of members in the multixact. We determine the latter as
1198 * the difference between this multixact's starting offset and the next
1199 * one's.
1200 */
1201 pageno = MultiXactIdToOffsetPage(multi);
1203
1204 /* Acquire the bank lock for the page we need. */
1207
1208 /* read this multi's offset */
1209 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
1210 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1211 offptr += entryno;
1212 offset = *offptr;
1213
1214 if (offset == 0)
1215 ereport(ERROR,
1217 errmsg("MultiXact %u has invalid offset", multi)));
1218
1219 /* read next multi's offset */
1220 {
1222
1223 /* handle wraparound if needed */
1224 tmpMXact = NextMultiXactId(multi);
1225
1226 prev_pageno = pageno;
1227
1230
1231 if (pageno != prev_pageno)
1232 {
1233 LWLock *newlock;
1234
1235 /*
1236 * Since we're going to access a different SLRU page, if this page
1237 * falls under a different bank, release the old bank's lock and
1238 * acquire the lock of the new bank.
1239 */
1241 if (newlock != lock)
1242 {
1243 LWLockRelease(lock);
1245 lock = newlock;
1246 }
1248 }
1249
1250 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1251 offptr += entryno;
1253 }
1254
1255 LWLockRelease(lock);
1256 lock = NULL;
1257
1258 /* Sanity check the next offset */
1259 if (nextMXOffset == 0)
1260 ereport(ERROR,
1262 errmsg("MultiXact %u has invalid next offset", multi)));
1263 if (nextMXOffset == offset)
1264 ereport(ERROR,
1266 errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
1267 multi, offset)));
1268 if (nextMXOffset < offset)
1269 ereport(ERROR,
1271 errmsg("MultiXact %u has offset (%" PRIu64 ") greater than its next offset (%" PRIu64 ")",
1272 multi, offset, nextMXOffset)));
1273 if (nextMXOffset - offset > INT32_MAX)
1274 ereport(ERROR,
1276 errmsg("MultiXact %u has too many members (%" PRIu64 ")",
1277 multi, nextMXOffset - offset)));
1278 length = nextMXOffset - offset;
1279
1280 /* read the members */
1281 ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
1282 prev_pageno = -1;
1283 for (int i = 0; i < length; i++, offset++)
1284 {
1287 int flagsoff;
1288 int bshift;
1289 int memberoff;
1290
1291 pageno = MXOffsetToMemberPage(offset);
1293
1294 if (pageno != prev_pageno)
1295 {
1296 LWLock *newlock;
1297
1298 /*
1299 * Since we're going to access a different SLRU page, if this page
1300 * falls under a different bank, release the old bank's lock and
1301 * acquire the lock of the new bank.
1302 */
1304 if (newlock != lock)
1305 {
1306 if (lock)
1307 LWLockRelease(lock);
1309 lock = newlock;
1310 }
1311
1312 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
1313 prev_pageno = pageno;
1314 }
1315
1317 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1319
1322 flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1323
1324 ptr[i].xid = *xactptr;
1326 }
1327
1328 LWLockRelease(lock);
1329
1330 /*
1331 * Copy the result into the local cache.
1332 */
1333 mXactCachePut(multi, length, ptr);
1334
1335 debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1336 mxid_to_string(multi, length, ptr));
1337 *members = ptr;
1338 return length;
1339}
1340
1341/*
1342 * mxactMemberComparator
1343 * qsort comparison function for MultiXactMember
1344 *
1345 * We can't use wraparound comparison for XIDs because that does not respect
1346 * the triangle inequality! Any old sort order will do.
1347 */
1348static int
1349mxactMemberComparator(const void *arg1, const void *arg2)
1350{
1353
1354 if (member1.xid > member2.xid)
1355 return 1;
1356 if (member1.xid < member2.xid)
1357 return -1;
1358 if (member1.status > member2.status)
1359 return 1;
1360 if (member1.status < member2.status)
1361 return -1;
1362 return 0;
1363}
1364
1365/*
1366 * mXactCacheGetBySet
1367 * returns a MultiXactId from the cache based on the set of
1368 * TransactionIds that compose it, or InvalidMultiXactId if
1369 * none matches.
1370 *
1371 * This is helpful, for example, if two transactions want to lock a huge
1372 * table. By using the cache, the second will use the same MultiXactId
1373 * for the majority of tuples, thus keeping MultiXactId usage low (saving
1374 * both I/O and wraparound issues).
1375 *
1376 * NB: the passed members array will be sorted in-place.
1377 */
1378static MultiXactId
1380{
1381 dlist_iter iter;
1382
1383 debug_elog3(DEBUG2, "CacheGet: looking for %s",
1384 mxid_to_string(InvalidMultiXactId, nmembers, members));
1385
1386 /* sort the array so comparison is easy */
1387 qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1388
1390 {
1392 iter.cur);
1393
1394 if (entry->nmembers != nmembers)
1395 continue;
1396
1397 /*
1398 * We assume the cache entries are sorted, and that the unused bits in
1399 * "status" are zeroed.
1400 */
1401 if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1402 {
1403 debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1405 return entry->multi;
1406 }
1407 }
1408
1409 debug_elog2(DEBUG2, "CacheGet: not found :-(");
1410 return InvalidMultiXactId;
1411}
1412
1413/*
1414 * mXactCacheGetById
1415 * returns the composing MultiXactMember set from the cache for a
1416 * given MultiXactId, if present.
1417 *
1418 * If successful, *xids is set to the address of a palloc'd copy of the
1419 * MultiXactMember set. Return value is number of members, or -1 on failure.
1420 */
1421static int
1423{
1424 dlist_iter iter;
1425
1426 debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1427
1429 {
1431 iter.cur);
1432
1433 if (entry->multi == multi)
1434 {
1435 MultiXactMember *ptr;
1436 Size size;
1437
1438 size = sizeof(MultiXactMember) * entry->nmembers;
1439 ptr = (MultiXactMember *) palloc(size);
1440
1441 memcpy(ptr, entry->members, size);
1442
1443 debug_elog3(DEBUG2, "CacheGet: found %s",
1444 mxid_to_string(multi,
1445 entry->nmembers,
1446 entry->members));
1447
1448 /*
1449 * Note we modify the list while not using a modifiable iterator.
1450 * This is acceptable only because we exit the iteration
1451 * immediately afterwards.
1452 */
1454
1455 *members = ptr;
1456 return entry->nmembers;
1457 }
1458 }
1459
1460 debug_elog2(DEBUG2, "CacheGet: not found");
1461 return -1;
1462}
1463
1464/*
1465 * mXactCachePut
1466 * Add a new MultiXactId and its composing set into the local cache.
1467 */
1468static void
1469mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1470{
1471 mXactCacheEnt *entry;
1472
1473 debug_elog3(DEBUG2, "CachePut: storing %s",
1474 mxid_to_string(multi, nmembers, members));
1475
1476 if (MXactContext == NULL)
1477 {
1478 /* The cache only lives as long as the current transaction */
1479 debug_elog2(DEBUG2, "CachePut: initializing memory context");
1481 "MultiXact cache context",
1483 }
1484
1485 entry = (mXactCacheEnt *)
1487 offsetof(mXactCacheEnt, members) +
1488 nmembers * sizeof(MultiXactMember));
1489
1490 entry->multi = multi;
1491 entry->nmembers = nmembers;
1492 memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1493
1494 /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1495 qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1496
1497 dclist_push_head(&MXactCache, &entry->node);
1499 {
1500 dlist_node *node;
1501
1504
1505 entry = dclist_container(mXactCacheEnt, node, node);
1506 debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1507 entry->multi);
1508
1509 pfree(entry);
1510 }
1511}
1512
1513char *
1515{
1516 switch (status)
1517 {
1519 return "keysh";
1521 return "sh";
1523 return "fornokeyupd";
1525 return "forupd";
1527 return "nokeyupd";
1529 return "upd";
1530 default:
1531 elog(ERROR, "unrecognized multixact status %d", status);
1532 return "";
1533 }
1534}
1535
1536char *
1537mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1538{
1539 static char *str = NULL;
1541 int i;
1542
1543 if (str != NULL)
1544 pfree(str);
1545
1547
1548 appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1549 mxstatus_to_string(members[0].status));
1550
1551 for (i = 1; i < nmembers; i++)
1552 appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1553 mxstatus_to_string(members[i].status));
1554
1557 pfree(buf.data);
1558 return str;
1559}
1560
1561/*
1562 * AtEOXact_MultiXact
1563 * Handle transaction end for MultiXact
1564 *
1565 * This is called at top transaction commit or abort (we don't care which).
1566 */
1567void
1569{
1570 /*
1571 * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1572 * which should only be valid while within a transaction.
1573 *
1574 * We assume that storing a MultiXactId is atomic and so we need not take
1575 * MultiXactGenLock to do this.
1576 */
1579
1580 /*
1581 * Discard the local MultiXactId cache. Since MXactContext was created as
1582 * a child of TopTransactionContext, we needn't delete it explicitly.
1583 */
1586}
1587
1588/*
1589 * AtPrepare_MultiXact
1590 * Save multixact state at 2PC transaction prepare
1591 *
1592 * In this phase, we only store our OldestMemberMXactId value in the two-phase
1593 * state file.
1594 */
1595void
1604
1605/*
1606 * PostPrepare_MultiXact
1607 * Clean up after successful PREPARE TRANSACTION
1608 */
1609void
1611{
1613
1614 /*
1615 * Transfer our OldestMemberMXactId value to the slot reserved for the
1616 * prepared transaction.
1617 */
1620 {
1622
1623 /*
1624 * Even though storing MultiXactId is atomic, acquire lock to make
1625 * sure others see both changes, not just the reset of the slot of the
1626 * current backend. Using a volatile pointer might suffice, but this
1627 * isn't a hot spot.
1628 */
1630
1633
1635 }
1636
1637 /*
1638 * We don't need to transfer OldestVisibleMXactId value, because the
1639 * transaction is not going to be looking at any more multixacts once it's
1640 * prepared.
1641 *
1642 * We assume that storing a MultiXactId is atomic and so we need not take
1643 * MultiXactGenLock to do this.
1644 */
1646
1647 /*
1648 * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1649 */
1652}
1653
1654/*
1655 * multixact_twophase_recover
1656 * Recover the state of a prepared transaction at startup
1657 */
1658void
1660 void *recdata, uint32 len)
1661{
1664
1665 /*
1666 * Get the oldest member XID from the state file record, and set it in the
1667 * OldestMemberMXactId slot reserved for this prepared transaction.
1668 */
1669 Assert(len == sizeof(MultiXactId));
1671
1673}
1674
1675/*
1676 * multixact_twophase_postcommit
1677 * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1678 */
1679void
1689
1690/*
1691 * multixact_twophase_postabort
1692 * This is actually just the same as the COMMIT case.
1693 */
1694void
1700
1701/*
1702 * Initialization of shared memory for MultiXact. We use two SLRU areas,
1703 * thus double memory. Also, reserve space for the shared MultiXactState
1704 * struct and the per-backend MultiXactId arrays (two of those, too).
1705 */
1706Size
1708{
1709 Size size;
1710
1711 /* We need 2*MaxOldestSlot perBackendXactIds[] entries */
1712#define SHARED_MULTIXACT_STATE_SIZE \
1713 add_size(offsetof(MultiXactStateData, perBackendXactIds), \
1714 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
1715
1719
1720 return size;
1721}
1722
1723void
1725{
1726 bool found;
1727
1728 debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
1729
1732
1734 "multixact_offset", multixact_offset_buffers, 0,
1735 "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1738 false);
1741 "multixact_member", multixact_member_buffers, 0,
1742 "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1745 true);
1746 /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
1747
1748 /* Initialize our shared state struct */
1749 MultiXactState = ShmemInitStruct("Shared MultiXact State",
1751 &found);
1752 if (!IsUnderPostmaster)
1753 {
1754 Assert(!found);
1755
1756 /* Make sure we zero out the per-backend state */
1758 }
1759 else
1760 Assert(found);
1761
1762 /*
1763 * Set up array pointers.
1764 */
1767}
1768
1769/*
1770 * GUC check_hook for multixact_offset_buffers
1771 */
1772bool
1774{
1775 return check_slru_buffers("multixact_offset_buffers", newval);
1776}
1777
1778/*
1779 * GUC check_hook for multixact_member_buffers
1780 */
1781bool
1783{
1784 return check_slru_buffers("multixact_member_buffers", newval);
1785}
1786
1787/*
1788 * This func must be called ONCE on system install. It creates the initial
1789 * MultiXact segments. (The MultiXacts directories are assumed to have been
1790 * created by initdb, and MultiXactShmemInit must have been called already.)
1791 */
1792void
1794{
1795 /* Zero the initial pages and flush them to disk */
1798}
1799
1800/*
1801 * This must be called ONCE during postmaster or standalone-backend startup.
1802 *
1803 * StartupXLOG has already established nextMXact/nextOffset by calling
1804 * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
1805 * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
1806 * replayed WAL.
1807 */
1808void
1810{
1813 int64 pageno;
1814
1815 /*
1816 * Initialize offset's idea of the latest page number.
1817 */
1818 pageno = MultiXactIdToOffsetPage(multi);
1819 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1820 pageno);
1821
1822 /*
1823 * Initialize member's idea of the latest page number.
1824 */
1825 pageno = MXOffsetToMemberPage(offset);
1826 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1827 pageno);
1828}
1829
1830/*
1831 * This must be called ONCE at the end of startup/recovery.
1832 */
1833void
1835{
1836 MultiXactId nextMXact;
1837 MultiXactOffset offset;
1840 int64 pageno;
1841 int entryno;
1842 int flagsoff;
1843
1845 nextMXact = MultiXactState->nextMXact;
1846 offset = MultiXactState->nextOffset;
1850
1851 /* Clean up offsets state */
1852
1853 /*
1854 * (Re-)Initialize our idea of the latest page number for offsets.
1855 */
1856 pageno = MultiXactIdToOffsetPage(nextMXact);
1857 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1858 pageno);
1859
1860 /*
1861 * Set the offset of nextMXact on the offsets page. This is normally done
1862 * in RecordNewMultiXact() of the previous multixact, but let's be sure
1863 * the next page exists, if the nextMXact was reset with pg_resetwal for
1864 * example.
1865 *
1866 * Zero out the remainder of the page. See notes in TrimCLOG() for
1867 * background. Unlike CLOG, some WAL record covers every pg_multixact
1868 * SLRU mutation. Since, also unlike CLOG, we ignore the WAL rule "write
1869 * xlog before data," nextMXact successors may carry obsolete, nonzero
1870 * offset values.
1871 */
1872 entryno = MultiXactIdToOffsetEntry(nextMXact);
1873 {
1874 int slotno;
1877
1879 if (entryno == 0 || nextMXact == FirstMultiXactId)
1881 else
1882 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
1883 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1884 offptr += entryno;
1885
1886 *offptr = offset;
1887 if (entryno != 0 && (entryno + 1) * sizeof(MultiXactOffset) != BLCKSZ)
1888 MemSet(offptr + 1, 0, BLCKSZ - (entryno + 1) * sizeof(MultiXactOffset));
1889
1890 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
1891 LWLockRelease(lock);
1892 }
1893
1894 /*
1895 * And the same for members.
1896 *
1897 * (Re-)Initialize our idea of the latest page number for members.
1898 */
1899 pageno = MXOffsetToMemberPage(offset);
1900 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1901 pageno);
1902
1903 /*
1904 * Zero out the remainder of the current members page. See notes in
1905 * TrimCLOG() for motivation.
1906 */
1908 if (flagsoff != 0)
1909 {
1910 int slotno;
1912 int memberoff;
1914
1917 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
1918 xidptr = (TransactionId *)
1919 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1920
1922
1923 /*
1924 * Note: we don't need to zero out the flag bits in the remaining
1925 * members of the current group, because they are always reset before
1926 * writing.
1927 */
1928
1929 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
1930 LWLockRelease(lock);
1931 }
1932
1933 /* signal that we're officially up */
1937
1938 /* Now compute how far away the next multixid wraparound is. */
1940}
1941
1942/*
1943 * Get the MultiXact data to save in a checkpoint record
1944 */
1945void
1947 MultiXactId *nextMulti,
1948 MultiXactOffset *nextMultiOffset,
1949 MultiXactId *oldestMulti,
1950 Oid *oldestMultiDB)
1951{
1953 *nextMulti = MultiXactState->nextMXact;
1954 *nextMultiOffset = MultiXactState->nextOffset;
1955 *oldestMulti = MultiXactState->oldestMultiXactId;
1956 *oldestMultiDB = MultiXactState->oldestMultiXactDB;
1958
1960 "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
1961 *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
1962}
1963
1964/*
1965 * Perform a checkpoint --- either during shutdown, or on-the-fly
1966 */
1967void
1969{
1971
1972 /*
1973 * Write dirty MultiXact pages to disk. This may result in sync requests
1974 * queued for later handling by ProcessSyncRequests(), as part of the
1975 * checkpoint.
1976 */
1979
1981}
1982
1983/*
1984 * Set the next-to-be-assigned MultiXactId and offset
1985 *
1986 * This is used when we can determine the correct next ID/offset exactly
1987 * from a checkpoint record. Although this is only called during bootstrap
1988 * and XLog replay, we take the lock in case any hot-standby backends are
1989 * examining the values.
1990 */
1991void
1993 MultiXactOffset nextMultiOffset)
1994{
1995 Assert(MultiXactIdIsValid(nextMulti));
1996 debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
1997 nextMulti, nextMultiOffset);
1998
2000 MultiXactState->nextMXact = nextMulti;
2001 MultiXactState->nextOffset = nextMultiOffset;
2003}
2004
2005/*
2006 * Determine the last safe MultiXactId to allocate given the currently oldest
2007 * datminmxid (ie, the oldest MultiXactId that might exist in any database
2008 * of our cluster), and the OID of the (or a) database with that value.
2009 *
2010 * This also updates MultiXactState->oldestOffset, by looking up the offset of
2011 * MultiXactState->oldestMultiXactId.
2012 */
2013void
2015{
2016 MultiXactId multiVacLimit;
2017 MultiXactId multiWarnLimit;
2018 MultiXactId multiStopLimit;
2019 MultiXactId multiWrapLimit;
2021
2023
2024 /*
2025 * We pretend that a wrap will happen halfway through the multixact ID
2026 * space, but that's not really true, because multixacts wrap differently
2027 * from transaction IDs.
2028 */
2029 multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2030 if (multiWrapLimit < FirstMultiXactId)
2031 multiWrapLimit += FirstMultiXactId;
2032
2033 /*
2034 * We'll refuse to continue assigning MultiXactIds once we get within 3M
2035 * multi of data loss. See SetTransactionIdLimit.
2036 */
2037 multiStopLimit = multiWrapLimit - 3000000;
2038 if (multiStopLimit < FirstMultiXactId)
2039 multiStopLimit -= FirstMultiXactId;
2040
2041 /*
2042 * We'll start complaining loudly when we get within 40M multis of data
2043 * loss. This is kind of arbitrary, but if you let your gas gauge get
2044 * down to 2% of full, would you be looking for the next gas station? We
2045 * need to be fairly liberal about this number because there are lots of
2046 * scenarios where most transactions are done by automatic clients that
2047 * won't pay attention to warnings. (No, we're not gonna make this
2048 * configurable. If you know enough to configure it, you know enough to
2049 * not get in this kind of trouble in the first place.)
2050 */
2051 multiWarnLimit = multiWrapLimit - 40000000;
2052 if (multiWarnLimit < FirstMultiXactId)
2053 multiWarnLimit -= FirstMultiXactId;
2054
2055 /*
2056 * We'll start trying to force autovacuums when oldest_datminmxid gets to
2057 * be more than autovacuum_multixact_freeze_max_age mxids old.
2058 *
2059 * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2060 * so that we don't have to worry about dealing with on-the-fly changes in
2061 * its value. See SetTransactionIdLimit.
2062 */
2064 if (multiVacLimit < FirstMultiXactId)
2065 multiVacLimit += FirstMultiXactId;
2066
2067 /* Grab lock for just long enough to set the new limit values */
2071 MultiXactState->multiVacLimit = multiVacLimit;
2072 MultiXactState->multiWarnLimit = multiWarnLimit;
2073 MultiXactState->multiStopLimit = multiStopLimit;
2074 MultiXactState->multiWrapLimit = multiWrapLimit;
2077
2078 /* Log the info */
2080 (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2081 multiWrapLimit, oldest_datoid)));
2082
2083 /*
2084 * Computing the actual limits is only possible once the data directory is
2085 * in a consistent state. There's no need to compute the limits while
2086 * still replaying WAL - no decisions about new multis are made even
2087 * though multixact creations might be replayed. So we'll only do further
2088 * checks after TrimMultiXact() has been called.
2089 */
2091 return;
2092
2094
2095 /*
2096 * Offsets are 64-bits wide and never wrap around, so we don't need to
2097 * consider them for emergency autovacuum purposes. But now that we're in
2098 * a consistent state, determine MultiXactState->oldestOffset. It will be
2099 * used to adjust the freezing cutoff, to keep the offsets disk usage in
2100 * check.
2101 */
2103
2104 /*
2105 * If past the autovacuum force point, immediately signal an autovac
2106 * request. The reason for this is that autovac only processes one
2107 * database per invocation. Once it's finished cleaning up the oldest
2108 * database, it'll call here, and we'll signal the postmaster to start
2109 * another iteration immediately if there are still any old databases.
2110 */
2111 if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
2113
2114 /* Give an immediate warning if past the wrap warn point */
2115 if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2116 {
2117 char *oldest_datname;
2118
2119 /*
2120 * We can be called when not inside a transaction, for example during
2121 * StartupXLOG(). In such a case we cannot do database access, so we
2122 * must just report the oldest DB's OID.
2123 *
2124 * Note: it's also possible that get_database_name fails and returns
2125 * NULL, for example because the database just got dropped. We'll
2126 * still warn, even though the warning might now be unnecessary.
2127 */
2128 if (IsTransactionState())
2130 else
2132
2133 if (oldest_datname)
2135 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2136 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2137 multiWrapLimit - curMulti,
2139 multiWrapLimit - curMulti),
2140 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2141 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2142 else
2144 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2145 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2146 multiWrapLimit - curMulti,
2148 multiWrapLimit - curMulti),
2149 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2150 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2151 }
2152}
2153
2154/*
2155 * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2156 * and similarly nextOffset is at least minMultiOffset.
2157 *
2158 * This is used when we can determine minimum safe values from an XLog
2159 * record (either an on-line checkpoint or an mxact creation log entry).
2160 * Although this is only called during XLog replay, we take the lock in case
2161 * any hot-standby backends are examining the values.
2162 */
2163void
2183
2184/*
2185 * Update our oldestMultiXactId value, but only if it's more recent than what
2186 * we had.
2187 *
2188 * This may only be called during WAL replay.
2189 */
2190void
2191MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2192{
2194
2196 SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
2197}
2198
2199/*
2200 * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2201 *
2202 * NB: this is called while holding MultiXactGenLock. We want it to be very
2203 * fast most of the time; even when it's not so fast, no actual I/O need
2204 * happen unless we're forced to write out a dirty log or xlog page to make
2205 * room in shared memory.
2206 */
2207static void
2209{
2210 int64 pageno;
2211 LWLock *lock;
2212
2213 /*
2214 * No work except at first MultiXactId of a page. But beware: just after
2215 * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2216 */
2217 if (MultiXactIdToOffsetEntry(multi) != 0 &&
2218 multi != FirstMultiXactId)
2219 return;
2220
2221 pageno = MultiXactIdToOffsetPage(multi);
2223
2225
2226 /* Zero the page and make a WAL entry about it */
2229 pageno);
2230
2231 LWLockRelease(lock);
2232}
2233
2234/*
2235 * Make sure that MultiXactMember has room for the members of a newly-
2236 * allocated MultiXactId.
2237 *
2238 * Like the above routine, this is called while holding MultiXactGenLock;
2239 * same comments apply.
2240 */
2241static void
2243{
2244 /*
2245 * It's possible that the members span more than one page of the members
2246 * file, so we loop to ensure we consider each page. The coding is not
2247 * optimal if the members span several pages, but that seems unusual
2248 * enough to not worry much about.
2249 */
2250 while (nmembers > 0)
2251 {
2252 int flagsoff;
2253 int flagsbit;
2255
2256 /*
2257 * Only zero when at first entry of a page.
2258 */
2261 if (flagsoff == 0 && flagsbit == 0)
2262 {
2263 int64 pageno;
2264 LWLock *lock;
2265
2266 pageno = MXOffsetToMemberPage(offset);
2268
2270
2271 /* Zero the page and make a WAL entry about it */
2275
2276 LWLockRelease(lock);
2277 }
2278
2279 /* Compute the number of items till end of current page. */
2281
2282 /*
2283 * Advance to next page. OK if nmembers goes negative.
2284 */
2285 nmembers -= difference;
2286 offset += difference;
2287 }
2288}
2289
2290/*
2291 * GetOldestMultiXactId
2292 *
2293 * Return the oldest MultiXactId that's still possibly still seen as live by
2294 * any running transaction. Older ones might still exist on disk, but they no
2295 * longer have any running member transaction.
2296 *
2297 * It's not safe to truncate MultiXact SLRU segments on the value returned by
2298 * this function; however, it can be set as the new relminmxid for any table
2299 * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2300 * to truncate SLRUs when no table can possibly still have a referencing MXID.
2301 */
2304{
2306 int i;
2307
2308 /*
2309 * This is the oldest valid value among all the OldestMemberMXactId[] and
2310 * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2311 */
2314 for (i = 0; i < MaxOldestSlot; i++)
2315 {
2317
2326 }
2327
2329
2330 return oldestMXact;
2331}
2332
2333/*
2334 * Calculate the oldest member offset and install it in MultiXactState, where
2335 * it can be used to adjust multixid freezing cutoffs.
2336 */
2337static void
2339{
2340 MultiXactId oldestMultiXactId;
2341 MultiXactId nextMXact;
2342 MultiXactOffset oldestOffset = 0; /* placate compiler */
2343 MultiXactOffset nextOffset;
2344 bool oldestOffsetKnown = false;
2345
2346 /*
2347 * NB: Have to prevent concurrent truncation, we might otherwise try to
2348 * lookup an oldestMulti that's concurrently getting truncated away.
2349 */
2351
2352 /* Read relevant fields from shared memory. */
2354 oldestMultiXactId = MultiXactState->oldestMultiXactId;
2355 nextMXact = MultiXactState->nextMXact;
2356 nextOffset = MultiXactState->nextOffset;
2359
2360 /*
2361 * Determine the offset of the oldest multixact. Normally, we can read
2362 * the offset from the multixact itself, but there's an important special
2363 * case: if there are no multixacts in existence at all, oldestMXact
2364 * obviously can't point to one. It will instead point to the multixact
2365 * ID that will be assigned the next time one is needed.
2366 */
2367 if (oldestMultiXactId == nextMXact)
2368 {
2369 /*
2370 * When the next multixact gets created, it will be stored at the next
2371 * offset.
2372 */
2373 oldestOffset = nextOffset;
2374 oldestOffsetKnown = true;
2375 }
2376 else
2377 {
2378 /*
2379 * Look up the offset at which the oldest existing multixact's members
2380 * are stored. If we cannot find it, be careful not to fail, and
2381 * leave oldestOffset unchanged. oldestOffset is initialized to zero
2382 * at system startup, which prevents truncating members until a proper
2383 * value is calculated.
2384 *
2385 * (We had bugs in early releases of PostgreSQL 9.3.X and 9.4.X where
2386 * the supposedly-earliest multixact might not really exist. Those
2387 * should be long gone by now, so this should not fail, but let's
2388 * still be defensive.)
2389 */
2391 find_multixact_start(oldestMultiXactId, &oldestOffset);
2392
2395 (errmsg_internal("oldest MultiXactId member is at offset %" PRIu64,
2396 oldestOffset)));
2397 else
2398 ereport(LOG,
2399 (errmsg("MultiXact member truncation is disabled because oldest checkpointed MultiXact %u does not exist on disk",
2400 oldestMultiXactId)));
2401 }
2402
2404
2405 /* Install the computed value */
2407 {
2409 MultiXactState->oldestOffset = oldestOffset;
2411 }
2412}
2413
2414/*
2415 * Find the starting offset of the given MultiXactId.
2416 *
2417 * Returns false if the file containing the multi does not exist on disk.
2418 * Otherwise, returns true and sets *result to the starting member offset.
2419 *
2420 * This function does not prevent concurrent truncation, so if that's
2421 * required, the caller has to protect against that.
2422 */
2423static bool
2425{
2426 MultiXactOffset offset;
2427 int64 pageno;
2428 int entryno;
2429 int slotno;
2431
2433
2434 pageno = MultiXactIdToOffsetPage(multi);
2436
2437 /*
2438 * Write out dirty data, so PhysicalPageExists can work correctly.
2439 */
2442
2444 return false;
2445
2446 /* lock is acquired by SimpleLruReadPage_ReadOnly */
2448 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2449 offptr += entryno;
2450 offset = *offptr;
2452
2453 *result = offset;
2454 return true;
2455}
2456
2457/*
2458 * GetMultiXactInfo
2459 *
2460 * Returns information about the current MultiXact state, as of:
2461 * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
2462 * nextOffset: Next-to-be-assigned offset
2463 * oldestMultiXactId: Oldest MultiXact ID still in use
2464 * oldestOffset: Oldest offset still in use
2465 */
2466void
2468 MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
2469{
2471
2473 *nextOffset = MultiXactState->nextOffset;
2474 *oldestMultiXactId = MultiXactState->oldestMultiXactId;
2476 *oldestOffset = MultiXactState->oldestOffset;
2478
2479 *multixacts = nextMultiXactId - *oldestMultiXactId;
2480}
2481
2482/*
2483 * Multixact members can be removed once the multixacts that refer to them
2484 * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2485 * vacuum_multixact_freeze_table_age work together to make sure we never have
2486 * too many multixacts; we hope that, at least under normal circumstances,
2487 * this will also be sufficient to keep us from using too many offsets.
2488 * However, if the average multixact has many members, we might accumulate a
2489 * large amount of members, consuming disk space, while still using few enough
2490 * multixids that the multixid limits fail to trigger relminmxid advancement
2491 * by VACUUM.
2492 *
2493 * To prevent that, if the members space usage exceeds a threshold
2494 * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
2495 * autovacuum_multixact_freeze_max_age to a value just less than the number of
2496 * multixacts in use. We hope that this will quickly trigger autovacuuming on
2497 * the table or tables with the oldest relminmxid, thus allowing datminmxid
2498 * values to advance and removing some members.
2499 *
2500 * As the amount of the member space in use grows, we become more aggressive
2501 * in clamping this value. That not only causes autovacuum to ramp up, but
2502 * also makes any manual vacuums the user issues more aggressive. This
2503 * happens because vacuum_get_cutoffs() will clamp the freeze table and the
2504 * minimum freeze age cutoffs based on the effective
2505 * autovacuum_multixact_freeze_max_age this function returns. At the extreme,
2506 * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we clamp
2507 * freeze_max_age to zero, and every vacuum of any table will freeze every
2508 * multixact.
2509 */
2510int
2512{
2515 double fraction;
2516 int result;
2517 MultiXactId oldestMultiXactId;
2518 MultiXactOffset oldestOffset;
2519 MultiXactOffset nextOffset;
2520 uint64 members;
2521
2522 /* Read the current offsets and multixact usage. */
2523 GetMultiXactInfo(&multixacts, &nextOffset, &oldestMultiXactId, &oldestOffset);
2524 members = nextOffset - oldestOffset;
2525
2526 /* If member space utilization is low, no special action is required. */
2527 if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
2529
2530 /*
2531 * Compute a target for relminmxid advancement. The number of multixacts
2532 * we try to eliminate from the system is based on how far we are past
2533 * MULTIXACT_MEMBER_LOW_THRESHOLD.
2534 *
2535 * The way this formula works is that when members is exactly at the low
2536 * threshold, fraction = 0.0, and we set freeze_max_age equal to
2537 * mxid_age(oldestMultiXactId). As members grows further, towards the
2538 * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
2539 * shrinks from mxid_age(oldestMultiXactId) to 0. Beyond the high
2540 * threshold, fraction > 1.0 and the result is clamped to 0.
2541 */
2544
2545 /* fraction could be > 1.0, but lowest possible freeze age is zero */
2546 if (fraction >= 1.0)
2547 return 0;
2548
2550 result = multixacts - victim_multixacts;
2551
2552 /*
2553 * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
2554 * autovacuum less aggressive than it would otherwise be.
2555 */
2557}
2558
2559
2560/*
2561 * Delete members segments older than newOldestOffset
2562 */
2563static void
2569
2570/*
2571 * Delete offsets segments older than newOldestMulti
2572 */
2573static void
2575{
2576 /*
2577 * We step back one multixact to avoid passing a cutoff page that hasn't
2578 * been created yet in the rare case that oldestMulti would be the first
2579 * item on a page and oldestMulti == nextMulti. In that case, if we
2580 * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
2581 * detection.
2582 */
2585}
2586
2587/*
2588 * Remove all MultiXactOffset and MultiXactMember segments before the oldest
2589 * ones still of interest.
2590 *
2591 * This is only called on a primary as part of vacuum (via
2592 * vac_truncate_clog()). During recovery truncation is done by replaying
2593 * truncation WAL records logged here.
2594 *
2595 * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
2596 * is one of the databases preventing newOldestMulti from increasing.
2597 */
2598void
2600{
2601 MultiXactId oldestMulti;
2602 MultiXactId nextMulti;
2604 MultiXactOffset nextOffset;
2605
2609
2610 /*
2611 * We can only allow one truncation to happen at once. Otherwise parts of
2612 * members might vanish while we're doing lookups or similar. There's no
2613 * need to have an interlock with creating new multis or such, since those
2614 * are constrained by the limits (which only grow, never shrink).
2615 */
2617
2619 nextMulti = MultiXactState->nextMXact;
2620 nextOffset = MultiXactState->nextOffset;
2621 oldestMulti = MultiXactState->oldestMultiXactId;
2623
2624 /*
2625 * Make sure to only attempt truncation if there's values to truncate
2626 * away. In normal processing values shouldn't go backwards, but there's
2627 * some corner cases (due to bugs) where that's possible.
2628 */
2630 {
2632 return;
2633 }
2634
2635 /*
2636 * Compute up to where to truncate MultiXactMember. Lookup the
2637 * corresponding member offset for newOldestMulti for that.
2638 */
2639 if (newOldestMulti == nextMulti)
2640 {
2641 /* there are NO MultiXacts */
2642 newOldestOffset = nextOffset;
2643 }
2645 {
2646 ereport(LOG,
2647 (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
2648 newOldestMulti)));
2650 return;
2651 }
2652
2653 /*
2654 * On crash, MultiXactIdCreateFromMembers() can leave behind multixids
2655 * that were not yet written out and hence have zero offset on disk. If
2656 * such a multixid becomes oldestMulti, we won't be able to look up its
2657 * offset. That should be rare, so we don't try to do anything smart about
2658 * it. Just skip the truncation, and hope that by the next truncation
2659 * attempt, oldestMulti has advanced to a valid multixid.
2660 */
2661 if (newOldestOffset == 0)
2662 {
2663 ereport(LOG,
2664 (errmsg("cannot truncate up to MultiXact %u because it has invalid offset, skipping truncation",
2665 newOldestMulti)));
2667 return;
2668 }
2669
2670 elog(DEBUG1, "performing multixact truncation: "
2671 "oldestMulti %u (offsets segment %" PRIx64 "), "
2672 "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2677
2678 /*
2679 * Do truncation, and the WAL logging of the truncation, in a critical
2680 * section. That way offsets/members cannot get out of sync anymore, i.e.
2681 * once consistent the newOldestMulti will always exist in members, even
2682 * if we crashed in the wrong moment.
2683 */
2685
2686 /*
2687 * Prevent checkpoints from being scheduled concurrently. This is critical
2688 * because otherwise a truncation record might not be replayed after a
2689 * crash/basebackup, even though the state of the data directory would
2690 * require it.
2691 */
2694
2695 /* WAL log truncation */
2697
2698 /*
2699 * Update in-memory limits before performing the truncation, while inside
2700 * the critical section: Have to do it before truncation, to prevent
2701 * concurrent lookups of those values. Has to be inside the critical
2702 * section as otherwise a future call to this function would error out,
2703 * while looking up the oldest member in offsets, if our caller crashes
2704 * before updating the limits.
2705 */
2711
2712 /* First truncate members */
2714
2715 /* Then offsets */
2717
2719
2722}
2723
2724/*
2725 * Decide whether a MultiXactOffset page number is "older" for truncation
2726 * purposes. Analogous to CLOGPagePrecedes().
2727 *
2728 * Offsetting the values is optional, because MultiXactIdPrecedes() has
2729 * translational symmetry.
2730 */
2731static bool
2746
2747/*
2748 * Decide whether a MultiXactMember page number is "older" for truncation
2749 * purposes. There is no "invalid offset number" and members never wrap
2750 * around, so use the numbers verbatim.
2751 */
2752static bool
2757
2758/*
2759 * Decide which of two MultiXactIds is earlier.
2760 *
2761 * XXX do we need to do something special for InvalidMultiXactId?
2762 * (Doesn't look like it.)
2763 */
2764bool
2766{
2767 int32 diff = (int32) (multi1 - multi2);
2768
2769 return (diff < 0);
2770}
2771
2772/*
2773 * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
2774 *
2775 * XXX do we need to do something special for InvalidMultiXactId?
2776 * (Doesn't look like it.)
2777 */
2778bool
2780{
2781 int32 diff = (int32) (multi1 - multi2);
2782
2783 return (diff <= 0);
2784}
2785
2786
2787/*
2788 * Write a TRUNCATE xlog record
2789 *
2790 * We must flush the xlog record to disk before returning --- see notes in
2791 * TruncateCLOG().
2792 */
2793static void
2795 MultiXactId oldestMulti,
2796 MultiXactOffset oldestOffset)
2797{
2800
2801 xlrec.oldestMultiDB = oldestMultiDB;
2802 xlrec.oldestMulti = oldestMulti;
2803 xlrec.oldestOffset = oldestOffset;
2804
2809}
2810
2811/*
2812 * MULTIXACT resource manager's routines
2813 */
2814void
2816{
2817 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2818
2819 /* Backup blocks are not used in multixact records */
2821
2822 if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
2823 {
2824 int64 pageno;
2825
2826 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
2828 }
2829 else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
2830 {
2831 int64 pageno;
2832
2833 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
2835 }
2836 else if (info == XLOG_MULTIXACT_CREATE_ID)
2837 {
2841 int i;
2842
2843 /* Store the data back into the SLRU files */
2844 RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
2845 xlrec->members);
2846
2847 /* Make sure nextMXact/nextOffset are beyond what this record has */
2849 xlrec->moff + xlrec->nmembers);
2850
2851 /*
2852 * Make sure nextXid is beyond any XID mentioned in the record. This
2853 * should be unnecessary, since any XID found here ought to have other
2854 * evidence in the XLOG, but let's be safe.
2855 */
2856 max_xid = XLogRecGetXid(record);
2857 for (i = 0; i < xlrec->nmembers; i++)
2858 {
2859 if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
2860 max_xid = xlrec->members[i].xid;
2861 }
2862
2864 }
2865 else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
2866 {
2868 int64 pageno;
2869
2870 memcpy(&xlrec, XLogRecGetData(record),
2872
2873 elog(DEBUG1, "replaying multixact truncation: "
2874 "oldestMulti %u (offsets segment %" PRIx64 "), "
2875 "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2876 xlrec.oldestMulti,
2877 MultiXactIdToOffsetSegment(xlrec.oldestMulti),
2878 xlrec.oldestOffset,
2879 MXOffsetToMemberSegment(xlrec.oldestOffset));
2880
2881 /* should not be required, but more than cheap enough */
2883
2884 /*
2885 * Advance the horizon values, so they're current at the end of
2886 * recovery.
2887 */
2888 SetMultiXactIdLimit(xlrec.oldestMulti, xlrec.oldestMultiDB);
2889
2890 PerformMembersTruncation(xlrec.oldestOffset);
2891
2892 /*
2893 * During XLOG replay, latest_page_number isn't necessarily set up
2894 * yet; insert a suitable value to bypass the sanity test in
2895 * SimpleLruTruncate.
2896 */
2897 pageno = MultiXactIdToOffsetPage(xlrec.oldestMulti);
2898 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2899 pageno);
2900 PerformOffsetsTruncation(xlrec.oldestMulti);
2901
2903 }
2904 else
2905 elog(PANIC, "multixact_redo: unknown op code %u", info);
2906}
2907
2908/*
2909 * Entrypoint for sync.c to sync offsets files.
2910 */
2911int
2912multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
2913{
2914 return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
2915}
2916
2917/*
2918 * Entrypoint for sync.c to sync members files.
2919 */
2920int
2921multixactmemberssyncfiletag(const FileTag *ftag, char *path)
2922{
2923 return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
2924}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition atomics.h:485
int autovacuum_multixact_freeze_max_age
Definition autovacuum.c:130
static int32 next
Definition blutils.c:225
#define Min(x, y)
Definition c.h:1007
uint8_t uint8
Definition c.h:554
#define Assert(condition)
Definition c.h:883
int64_t int64
Definition c.h:553
TransactionId MultiXactId
Definition c.h:686
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:490
uint64 MultiXactOffset
Definition c.h:688
int32_t int32
Definition c.h:552
uint64_t uint64
Definition c.h:557
uint16_t uint16
Definition c.h:555
uint32_t uint32
Definition c.h:556
#define MemSet(start, val, len)
Definition c.h:1023
uint32 TransactionId
Definition c.h:676
size_t Size
Definition c.h:629
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition elog.c:1193
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
int errhint(const char *fmt,...)
Definition elog.c:1330
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define LOG
Definition elog.h:31
#define WARNING
Definition elog.h:36
#define DEBUG2
Definition elog.h:29
#define PANIC
Definition elog.h:42
#define DEBUG1
Definition elog.h:30
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
Datum difference(PG_FUNCTION_ARGS)
int multixact_offset_buffers
Definition globals.c:163
ProcNumber MyProcNumber
Definition globals.c:90
bool IsUnderPostmaster
Definition globals.c:120
int multixact_member_buffers
Definition globals.c:162
#define newval
GucSource
Definition guc.h:112
const char * str
#define dclist_container(type, membername, ptr)
Definition ilist.h:947
static uint32 dclist_count(const dclist_head *head)
Definition ilist.h:932
static void dclist_move_head(dclist_head *head, dlist_node *node)
Definition ilist.h:808
static dlist_node * dclist_tail_node(dclist_head *head)
Definition ilist.h:920
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition ilist.h:763
#define DCLIST_STATIC_INIT(name)
Definition ilist.h:282
static void dclist_push_head(dclist_head *head, dlist_node *node)
Definition ilist.h:693
static void dclist_init(dclist_head *head)
Definition ilist.h:671
#define dclist_foreach(iter, lhead)
Definition ilist.h:970
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
int j
Definition isn.c:78
int i
Definition isn.c:77
char * get_database_name(Oid dbid)
Definition lsyscache.c:1242
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1176
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1793
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition mcxt.c:1768
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
MemoryContext TopTransactionContext
Definition mcxt.c:171
void pfree(void *pointer)
Definition mcxt.c:1616
MemoryContext TopMemoryContext
Definition mcxt.c:166
void * palloc(Size size)
Definition mcxt.c:1387
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition memutils.h:170
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
static MultiXactId PreviousMultiXactId(MultiXactId multi)
Definition multixact.c:108
static SlruCtlData MultiXactOffsetCtlData
Definition multixact.c:116
void MultiXactShmemInit(void)
Definition multixact.c:1724
static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2)
Definition multixact.c:2753
static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
Definition multixact.c:924
static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
Definition multixact.c:1422
MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
Definition multixact.c:352
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
Definition multixact.c:2242
void ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
Definition multixact.c:637
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2765
char * mxstatus_to_string(MultiXactStatus status)
Definition multixact.c:1514
void multixact_redo(XLogReaderState *record)
Definition multixact.c:2815
static void PerformOffsetsTruncation(MultiXactId newOldestMulti)
Definition multixact.c:2574
void multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1680
#define debug_elog5(a, b, c, d, e)
Definition multixact.c:260
static void MultiXactIdSetOldestVisible(void)
Definition multixact.c:587
int multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
Definition multixact.c:2912
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result)
Definition multixact.c:2424
void PostPrepare_MultiXact(FullTransactionId fxid)
Definition multixact.c:1610
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition multixact.c:1992
#define MultiXactMemberCtl
Definition multixact.c:120
static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId endTruncOff, MultiXactOffset endTruncMemb)
Definition multixact.c:2794
void AtPrepare_MultiXact(void)
Definition multixact.c:1596
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2779
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition multixact.c:2191
static void mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition multixact.c:1469
void GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *nextOffset, MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
Definition multixact.c:2467
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition multixact.c:463
void MultiXactIdSetOldestMember(void)
Definition multixact.c:537
#define MULTIXACT_MEMBER_LOW_THRESHOLD
Definition multixact.c:98
static MemoryContext MXactContext
Definition multixact.c:248
#define SHARED_MULTIXACT_STATE_SIZE
static MultiXactId * OldestVisibleMXactId
Definition multixact.c:218
static int mxactMemberComparator(const void *arg1, const void *arg2)
Definition multixact.c:1349
static void ExtendMultiXactOffset(MultiXactId multi)
Definition multixact.c:2208
Size MultiXactShmemSize(void)
Definition multixact.c:1707
#define MultiXactOffsetCtl
Definition multixact.c:119
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition multixact.c:1946
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members)
Definition multixact.c:757
int multixactmemberssyncfiletag(const FileTag *ftag, char *path)
Definition multixact.c:2921
#define MAX_CACHE_ENTRIES
Definition multixact.c:246
static MultiXactId NextMultiXactId(MultiXactId multi)
Definition multixact.c:102
MultiXactId GetOldestMultiXactId(void)
Definition multixact.c:2303
void CheckPointMultiXact(void)
Definition multixact.c:1968
#define MaxOldestSlot
Definition multixact.c:213
MultiXactId MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
Definition multixact.c:656
static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members)
Definition multixact.c:1379
static dclist_head MXactCache
Definition multixact.c:247
void TrimMultiXact(void)
Definition multixact.c:1834
#define debug_elog3(a, b, c)
Definition multixact.c:258
char * mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition multixact.c:1537
#define debug_elog4(a, b, c, d)
Definition multixact.c:259
void multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1695
static void PerformMembersTruncation(MultiXactOffset newOldestOffset)
Definition multixact.c:2564
static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
Definition multixact.c:2732
int MultiXactMemberFreezeThreshold(void)
Definition multixact.c:2511
static void SetOldestOffset(void)
Definition multixact.c:2338
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition multixact.c:2164
static MultiXactId * OldestMemberMXactId
Definition multixact.c:217
static MultiXactStateData * MultiXactState
Definition multixact.c:216
MultiXactId ReadNextMultiXactId(void)
Definition multixact.c:620
void BootStrapMultiXact(void)
Definition multixact.c:1793
#define debug_elog6(a, b, c, d, e, f)
Definition multixact.c:261
void multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition multixact.c:1659
MultiXactId MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, TransactionId xid2, MultiXactStatus status2)
Definition multixact.c:299
void TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
Definition multixact.c:2599
bool check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
Definition multixact.c:1773
bool check_multixact_member_buffers(int *newval, void **extra, GucSource source)
Definition multixact.c:1782
void AtEOXact_MultiXact(void)
Definition multixact.c:1568
#define MULTIXACT_MEMBER_HIGH_THRESHOLD
Definition multixact.c:99
static SlruCtlData MultiXactMemberCtlData
Definition multixact.c:117
#define debug_elog2(a, b)
Definition multixact.c:257
void StartupMultiXact(void)
Definition multixact.c:1809
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition multixact.c:2014
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition multixact.c:1113
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define XLOG_MULTIXACT_ZERO_MEM_PAGE
Definition multixact.h:68
#define XLOG_MULTIXACT_ZERO_OFF_PAGE
Definition multixact.h:67
#define FirstMultiXactId
Definition multixact.h:26
MultiXactStatus
Definition multixact.h:37
@ MultiXactStatusForShare
Definition multixact.h:39
@ MultiXactStatusForNoKeyUpdate
Definition multixact.h:40
@ MultiXactStatusNoKeyUpdate
Definition multixact.h:43
@ MultiXactStatusUpdate
Definition multixact.h:45
@ MultiXactStatusForUpdate
Definition multixact.h:41
@ MultiXactStatusForKeyShare
Definition multixact.h:38
#define ISUPDATE_from_mxstatus(status)
Definition multixact.h:51
#define InvalidMultiXactId
Definition multixact.h:25
#define XLOG_MULTIXACT_TRUNCATE_ID
Definition multixact.h:70
#define SizeOfMultiXactCreate
Definition multixact.h:80
#define SizeOfMultiXactTruncate
Definition multixact.h:93
#define XLOG_MULTIXACT_CREATE_ID
Definition multixact.h:69
#define MaxMultiXactId
Definition multixact.h:27
static int64 MultiXactIdToOffsetSegment(MultiXactId multi)
static int64 MXOffsetToMemberSegment(MultiXactOffset offset)
#define MXACT_MEMBER_BITS_PER_XACT
static int MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
#define MXACT_MEMBER_XACT_BITMASK
static int64 MXOffsetToMemberPage(MultiXactOffset32 offset)
#define MULTIXACT_OFFSETS_PER_PAGE
static int MXOffsetToMemberOffset(MultiXactOffset32 offset)
static int MultiXactIdToOffsetEntry(MultiXactId multi)
static int64 MultiXactIdToOffsetPage(MultiXactId multi)
#define MULTIXACT_MEMBERS_PER_PAGE
static int MXOffsetToFlagsOffset(MultiXactOffset32 offset)
#define ERRCODE_DATA_CORRUPTED
const void size_t len
static rewind_source * source
Definition pg_rewind.c:89
static char buf[DEFAULT_XLOG_SEG_SIZE]
void SendPostmasterSignal(PMSignalReason reason)
Definition pmsignal.c:165
@ PMSIGNAL_START_AUTOVAC_LAUNCHER
Definition pmsignal.h:39
#define qsort(a, b, c, d)
Definition port.h:495
unsigned int Oid
static int fb(int x)
#define DELAY_CHKPT_START
Definition proc.h:135
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1404
int ProcNumber
Definition procnumber.h:24
Size add_size(Size s1, Size s2)
Definition shmem.c:495
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:389
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
Definition slru.c:252
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition slru.c:630
void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
Definition slru.c:1347
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
Definition slru.c:771
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition slru.c:527
int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
Definition slru.c:1856
int SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
Definition slru.c:375
void SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno)
Definition slru.c:444
void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
Definition slru.c:1433
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition slru.c:198
bool check_slru_buffers(const char *name, int *newval)
Definition slru.c:355
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition slru.h:160
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition slru.h:185
PGPROC * MyProc
Definition proc.c:67
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
Definition sync.h:51
TransactionId xid
Definition multixact.h:57
MultiXactStatus status
Definition multixact.h:58
MultiXactId multiWrapLimit
Definition multixact.c:158
MultiXactId multiStopLimit
Definition multixact.c:157
MultiXactId multiWarnLimit
Definition multixact.c:156
MultiXactId multiVacLimit
Definition multixact.c:155
MultiXactOffset nextOffset
Definition multixact.c:135
MultiXactId nextMXact
Definition multixact.c:132
MultiXactId oldestMultiXactId
Definition multixact.c:145
MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER]
Definition multixact.c:207
MultiXactOffset oldestOffset
Definition multixact.c:152
int delayChkptFlags
Definition proc.h:263
dlist_node * cur
Definition ilist.h:179
MultiXactId multi
Definition multixact.c:240
dlist_node node
Definition multixact.c:242
MultiXactMember members[FLEXIBLE_ARRAY_MEMBER]
Definition multixact.c:243
@ SYNC_HANDLER_MULTIXACT_MEMBER
Definition sync.h:41
@ SYNC_HANDLER_MULTIXACT_OFFSET
Definition sync.h:40
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
ProcNumber TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
Definition twophase.c:908
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition twophase.c:1271
#define TWOPHASE_RM_MULTIXACT_ID
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition varsup.c:304
bool IsTransactionState(void)
Definition xact.c:388
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:942
bool RecoveryInProgress(void)
Definition xlog.c:6461
void XLogFlush(XLogRecPtr record)
Definition xlog.c:2784
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
Definition xloginsert.c:543
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogBeginInsert(void)
Definition xloginsert.c:152
#define XLogRecGetInfo(decoder)
Definition xlogreader.h:409
#define XLogRecGetData(decoder)
Definition xlogreader.h:414
#define XLogRecGetXid(decoder)
Definition xlogreader.h:411
#define XLogRecHasAnyBlockRefs(decoder)
Definition xlogreader.h:416
bool InRecovery
Definition xlogutils.c:50