PostgreSQL Source Code git master
multixact.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * multixact.c
4 * PostgreSQL multi-transaction-log manager
5 *
6 * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 * TransactionId and a set of flag bits. The name is a bit historical:
10 * originally, a MultiXactId consisted of more than one TransactionId (except
11 * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 * legitimate to have MultiXactIds that only include a single Xid.
13 *
14 * The meaning of the flag bits is opaque to this module, but they are mostly
15 * used in heapam.c to identify lock modes that each of the member transactions
16 * is holding on any given tuple. This module just contains support to store
17 * and retrieve the arrays.
18 *
19 * We use two SLRU areas, one for storing the offsets at which the data
20 * starts for each MultiXactId in the other one. This trick allows us to
21 * store variable length arrays of TransactionIds. (We could alternatively
22 * use one area containing counts and TransactionIds, with valid MultiXactId
23 * values pointing at slots containing counts; but that way seems less robust
24 * since it would get completely confused if someone inquired about a bogus
25 * MultiXactId that pointed to an intermediate slot containing an XID.)
26 *
27 * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 * MEMBERs page is initialized to zeroes, as well as an
29 * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 * This module ignores the WAL rule "write xlog before data," because it
31 * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 * rule. The only way for the MXID to be referenced from any data page is for
33 * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 * an XLOG record that must follow ours. The normal LSN interlock between the
35 * data page and that XLOG record will ensure that our XLOG record reaches
36 * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 * module's XLOG records completely rebuild the data entered since the last
40 * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 * before each checkpoint is considered complete.
42 *
43 * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 * crashes and ensure that MXID and offset numbering increases monotonically
45 * across a crash. We do this in the same way as it's done for transaction
46 * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 * could need to worry about, and we just make sure that at the end of
48 * replay, the next-MXID and next-offset counters are at least as large as
49 * anything we saw during replay.
50 *
51 * We are able to remove segments no longer necessary by carefully tracking
52 * each table's used values: during vacuum, any multixact older than a certain
53 * value is removed; the cutoff value is stored in pg_class. The minimum value
54 * across all tables in each database is stored in pg_database, and the global
55 * minimum across all databases is part of pg_control and is kept in shared
56 * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 *
58 * When new multixactid values are to be created, care is taken that the
59 * counter does not fall within the wraparound horizon considering the global
60 * minimum value.
61 *
62 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
63 * Portions Copyright (c) 1994, Regents of the University of California
64 *
65 * src/backend/access/transam/multixact.c
66 *
67 *-------------------------------------------------------------------------
68 */
69#include "postgres.h"
70
71#include "access/multixact.h"
72#include "access/slru.h"
73#include "access/transam.h"
74#include "access/twophase.h"
76#include "access/xact.h"
77#include "access/xlog.h"
78#include "access/xloginsert.h"
79#include "access/xlogutils.h"
80#include "commands/dbcommands.h"
81#include "funcapi.h"
82#include "lib/ilist.h"
83#include "miscadmin.h"
84#include "pg_trace.h"
85#include "pgstat.h"
87#include "storage/pmsignal.h"
88#include "storage/proc.h"
89#include "storage/procarray.h"
90#include "utils/fmgrprotos.h"
91#include "utils/guc_hooks.h"
93#include "utils/memutils.h"
94
95
96/*
97 * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
98 * used everywhere else in Postgres.
99 *
100 * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
101 * MultiXact page numbering also wraps around at
102 * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
103 * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
104 * take no explicit notice of that fact in this module, except when comparing
105 * segment and page numbers in TruncateMultiXact (see
106 * MultiXactOffsetPagePrecedes).
107 */
108
109/* We need four bytes per offset */
110#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
111
112static inline int64
114{
115 return multi / MULTIXACT_OFFSETS_PER_PAGE;
116}
117
118static inline int
120{
121 return multi % MULTIXACT_OFFSETS_PER_PAGE;
122}
123
124static inline int64
126{
128}
129
130/*
131 * The situation for members is a bit more complex: we store one byte of
132 * additional flag bits for each TransactionId. To do this without getting
133 * into alignment issues, we store four bytes of flags, and then the
134 * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
135 * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
136 * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
137 * performance) trumps space efficiency here.
138 *
139 * Note that the "offset" macros work with byte offset, not array indexes, so
140 * arithmetic must be done using "char *" pointers.
141 */
142/* We need eight bits per xact, so one xact fits in a byte */
143#define MXACT_MEMBER_BITS_PER_XACT 8
144#define MXACT_MEMBER_FLAGS_PER_BYTE 1
145#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
146
147/* how many full bytes of flags are there in a group? */
148#define MULTIXACT_FLAGBYTES_PER_GROUP 4
149#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
150 (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
151/* size in bytes of a complete group */
152#define MULTIXACT_MEMBERGROUP_SIZE \
153 (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
154#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
155#define MULTIXACT_MEMBERS_PER_PAGE \
156 (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
157
158/*
159 * Because the number of items per page is not a divisor of the last item
160 * number (member 0xFFFFFFFF), the last segment does not use the maximum number
161 * of pages, and moreover the last used page therein does not use the same
162 * number of items as previous pages. (Another way to say it is that the
163 * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
164 * has some empty space after that item.)
165 *
166 * This constant is the number of members in the last page of the last segment.
167 */
168#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
169 ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
170
171/* page in which a member is to be found */
172static inline int64
174{
175 return offset / MULTIXACT_MEMBERS_PER_PAGE;
176}
177
178static inline int64
180{
182}
183
184/* Location (byte offset within page) of flag word for a given member */
185static inline int
187{
189 int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
190 int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
191
192 return byteoff;
193}
194
195static inline int
197{
198 int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
199 int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
200
201 return bshift;
202}
203
204/* Location (byte offset within page) of TransactionId of given member */
205static inline int
207{
208 int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
209
210 return MXOffsetToFlagsOffset(offset) +
212 member_in_group * sizeof(TransactionId);
213}
214
215/* Multixact members wraparound thresholds. */
216#define MULTIXACT_MEMBER_SAFE_THRESHOLD (MaxMultiXactOffset / 2)
217#define MULTIXACT_MEMBER_DANGER_THRESHOLD \
218 (MaxMultiXactOffset - MaxMultiXactOffset / 4)
219
220static inline MultiXactId
222{
223 return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
224}
225
226/*
227 * Links to shared-memory data structures for MultiXact control
228 */
231
232#define MultiXactOffsetCtl (&MultiXactOffsetCtlData)
233#define MultiXactMemberCtl (&MultiXactMemberCtlData)
234
235/*
236 * MultiXact state shared across all backends. All this state is protected
237 * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
238 * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
239 * concurrency's sake, we avoid holding more than one of these locks at a
240 * time.)
241 */
242typedef struct MultiXactStateData
243{
244 /* next-to-be-assigned MultiXactId */
246
247 /* next-to-be-assigned offset */
249
250 /* Have we completed multixact startup? */
252
253 /*
254 * Oldest multixact that is still potentially referenced by a relation.
255 * Anything older than this should not be consulted. These values are
256 * updated by vacuum.
257 */
260
261 /*
262 * Oldest multixact offset that is potentially referenced by a multixact
263 * referenced by a relation. We don't always know this value, so there's
264 * a flag here to indicate whether or not we currently do.
265 */
268
269 /* support for anti-wraparound measures */
274
275 /* support for members anti-wraparound measures */
276 MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
277
278 /*
279 * This is used to sleep until a multixact offset is written when we want
280 * to create the next one.
281 */
283
284 /*
285 * Per-backend data starts here. We have two arrays stored in the area
286 * immediately following the MultiXactStateData struct. Each is indexed by
287 * ProcNumber.
288 *
289 * In both arrays, there's a slot for all normal backends
290 * (0..MaxBackends-1) followed by a slot for max_prepared_xacts prepared
291 * transactions.
292 *
293 * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
294 * transaction(s) could possibly be a member of, or InvalidMultiXactId
295 * when the backend has no live transaction that could possibly be a
296 * member of a MultiXact. Each backend sets its entry to the current
297 * nextMXact counter just before first acquiring a shared lock in a given
298 * transaction, and clears it at transaction end. (This works because only
299 * during or after acquiring a shared lock could an XID possibly become a
300 * member of a MultiXact, and that MultiXact would have to be created
301 * during or after the lock acquisition.)
302 *
303 * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
304 * current transaction(s) think is potentially live, or InvalidMultiXactId
305 * when not in a transaction or not in a transaction that's paid any
306 * attention to MultiXacts yet. This is computed when first needed in a
307 * given transaction, and cleared at transaction end. We can compute it
308 * as the minimum of the valid OldestMemberMXactId[] entries at the time
309 * we compute it (using nextMXact if none are valid). Each backend is
310 * required not to attempt to access any SLRU data for MultiXactIds older
311 * than its own OldestVisibleMXactId[] setting; this is necessary because
312 * the relevant SLRU data can be concurrently truncated away.
313 *
314 * The oldest valid value among all of the OldestMemberMXactId[] and
315 * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
316 * possible value still having any live member transaction -- OldestMxact.
317 * Any value older than that is typically removed from tuple headers, or
318 * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
319 * remove an individual MultiXact xmax whose value is >= its OldestMxact
320 * cutoff, though typically only when no individual member XID is still
321 * running. See FreezeMultiXactId for full details.
322 *
323 * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
324 * or the oldest extant Multi remaining in the table is used as the new
325 * pg_class.relminmxid value (whichever is earlier). The minimum of all
326 * relminmxid values in each database is stored in pg_database.datminmxid.
327 * In turn, the minimum of all of those values is stored in pg_control.
328 * This is used as the truncation point for pg_multixact when unneeded
329 * segments get removed by vac_truncate_clog() during vacuuming.
330 */
333
334/*
335 * Size of OldestMemberMXactId and OldestVisibleMXactId arrays.
336 */
337#define MaxOldestSlot (MaxBackends + max_prepared_xacts)
338
339/* Pointers to the state data in shared memory */
343
344
345/*
346 * Definitions for the backend-local MultiXactId cache.
347 *
348 * We use this cache to store known MultiXacts, so we don't need to go to
349 * SLRU areas every time.
350 *
351 * The cache lasts for the duration of a single transaction, the rationale
352 * for this being that most entries will contain our own TransactionId and
353 * so they will be uninteresting by the time our next transaction starts.
354 * (XXX not clear that this is correct --- other members of the MultiXact
355 * could hang around longer than we did. However, it's not clear what a
356 * better policy for flushing old cache entries would be.) FIXME actually
357 * this is plain wrong now that multixact's may contain update Xids.
358 *
359 * We allocate the cache entries in a memory context that is deleted at
360 * transaction end, so we don't need to do retail freeing of entries.
361 */
362typedef struct mXactCacheEnt
363{
369
370#define MAX_CACHE_ENTRIES 256
373
374#ifdef MULTIXACT_DEBUG
375#define debug_elog2(a,b) elog(a,b)
376#define debug_elog3(a,b,c) elog(a,b,c)
377#define debug_elog4(a,b,c,d) elog(a,b,c,d)
378#define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
379#define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
380#else
381#define debug_elog2(a,b)
382#define debug_elog3(a,b,c)
383#define debug_elog4(a,b,c,d)
384#define debug_elog5(a,b,c,d,e)
385#define debug_elog6(a,b,c,d,e,f)
386#endif
387
388/* internal MultiXactId management */
389static void MultiXactIdSetOldestVisible(void);
390static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
391 int nmembers, MultiXactMember *members);
392static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
393
394/* MultiXact cache management */
395static int mxactMemberComparator(const void *arg1, const void *arg2);
396static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
397static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
398static void mXactCachePut(MultiXactId multi, int nmembers,
399 MultiXactMember *members);
400
401static char *mxstatus_to_string(MultiXactStatus status);
402
403/* management of SLRU infrastructure */
404static int ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog);
405static int ZeroMultiXactMemberPage(int64 pageno, bool writeXlog);
406static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
407static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
408static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
409 MultiXactOffset offset2);
410static void ExtendMultiXactOffset(MultiXactId multi);
411static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
412static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
413 MultiXactOffset start, uint32 distance);
414static bool SetOffsetVacuumLimit(bool is_startup);
415static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
416static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
417static void WriteMTruncateXlogRec(Oid oldestMultiDB,
418 MultiXactId startTruncOff,
419 MultiXactId endTruncOff,
420 MultiXactOffset startTruncMemb,
421 MultiXactOffset endTruncMemb);
422
423
424/*
425 * MultiXactIdCreate
426 * Construct a MultiXactId representing two TransactionIds.
427 *
428 * The two XIDs must be different, or be requesting different statuses.
429 *
430 * NB - we don't worry about our local MultiXactId cache here, because that
431 * is handled by the lower-level routines.
432 */
435 TransactionId xid2, MultiXactStatus status2)
436{
437 MultiXactId newMulti;
438 MultiXactMember members[2];
439
442
443 Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
444
445 /* MultiXactIdSetOldestMember() must have been called already. */
447
448 /*
449 * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
450 * are still running. In typical usage, xid2 will be our own XID and the
451 * caller just did a check on xid1, so it'd be wasted effort.
452 */
453
454 members[0].xid = xid1;
455 members[0].status = status1;
456 members[1].xid = xid2;
457 members[1].status = status2;
458
459 newMulti = MultiXactIdCreateFromMembers(2, members);
460
461 debug_elog3(DEBUG2, "Create: %s",
462 mxid_to_string(newMulti, 2, members));
463
464 return newMulti;
465}
466
467/*
468 * MultiXactIdExpand
469 * Add a TransactionId to a pre-existing MultiXactId.
470 *
471 * If the TransactionId is already a member of the passed MultiXactId with the
472 * same status, just return it as-is.
473 *
474 * Note that we do NOT actually modify the membership of a pre-existing
475 * MultiXactId; instead we create a new one. This is necessary to avoid
476 * a race condition against code trying to wait for one MultiXactId to finish;
477 * see notes in heapam.c.
478 *
479 * NB - we don't worry about our local MultiXactId cache here, because that
480 * is handled by the lower-level routines.
481 *
482 * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
483 * one upgraded by pg_upgrade from a cluster older than this feature) are not
484 * passed in.
485 */
488{
489 MultiXactId newMulti;
490 MultiXactMember *members;
491 MultiXactMember *newMembers;
492 int nmembers;
493 int i;
494 int j;
495
498
499 /* MultiXactIdSetOldestMember() must have been called already. */
501
502 debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
503 multi, xid, mxstatus_to_string(status));
504
505 /*
506 * Note: we don't allow for old multis here. The reason is that the only
507 * caller of this function does a check that the multixact is no longer
508 * running.
509 */
510 nmembers = GetMultiXactIdMembers(multi, &members, false, false);
511
512 if (nmembers < 0)
513 {
514 MultiXactMember member;
515
516 /*
517 * The MultiXactId is obsolete. This can only happen if all the
518 * MultiXactId members stop running between the caller checking and
519 * passing it to us. It would be better to return that fact to the
520 * caller, but it would complicate the API and it's unlikely to happen
521 * too often, so just deal with it by creating a singleton MultiXact.
522 */
523 member.xid = xid;
524 member.status = status;
525 newMulti = MultiXactIdCreateFromMembers(1, &member);
526
527 debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
528 multi, newMulti);
529 return newMulti;
530 }
531
532 /*
533 * If the TransactionId is already a member of the MultiXactId with the
534 * same status, just return the existing MultiXactId.
535 */
536 for (i = 0; i < nmembers; i++)
537 {
538 if (TransactionIdEquals(members[i].xid, xid) &&
539 (members[i].status == status))
540 {
541 debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
542 xid, multi);
543 pfree(members);
544 return multi;
545 }
546 }
547
548 /*
549 * Determine which of the members of the MultiXactId are still of
550 * interest. This is any running transaction, and also any transaction
551 * that grabbed something stronger than just a lock and was committed. (An
552 * update that aborted is of no interest here; and having more than one
553 * update Xid in a multixact would cause errors elsewhere.)
554 *
555 * Removing dead members is not just an optimization: freezing of tuples
556 * whose Xmax are multis depends on this behavior.
557 *
558 * Note we have the same race condition here as above: j could be 0 at the
559 * end of the loop.
560 */
561 newMembers = (MultiXactMember *)
562 palloc(sizeof(MultiXactMember) * (nmembers + 1));
563
564 for (i = 0, j = 0; i < nmembers; i++)
565 {
566 if (TransactionIdIsInProgress(members[i].xid) ||
567 (ISUPDATE_from_mxstatus(members[i].status) &&
568 TransactionIdDidCommit(members[i].xid)))
569 {
570 newMembers[j].xid = members[i].xid;
571 newMembers[j++].status = members[i].status;
572 }
573 }
574
575 newMembers[j].xid = xid;
576 newMembers[j++].status = status;
577 newMulti = MultiXactIdCreateFromMembers(j, newMembers);
578
579 pfree(members);
580 pfree(newMembers);
581
582 debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
583
584 return newMulti;
585}
586
587/*
588 * MultiXactIdIsRunning
589 * Returns whether a MultiXactId is "running".
590 *
591 * We return true if at least one member of the given MultiXactId is still
592 * running. Note that a "false" result is certain not to change,
593 * because it is not legal to add members to an existing MultiXactId.
594 *
595 * Caller is expected to have verified that the multixact does not come from
596 * a pg_upgraded share-locked tuple.
597 */
598bool
599MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
600{
601 MultiXactMember *members;
602 int nmembers;
603 int i;
604
605 debug_elog3(DEBUG2, "IsRunning %u?", multi);
606
607 /*
608 * "false" here means we assume our callers have checked that the given
609 * multi cannot possibly come from a pg_upgraded database.
610 */
611 nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
612
613 if (nmembers <= 0)
614 {
615 debug_elog2(DEBUG2, "IsRunning: no members");
616 return false;
617 }
618
619 /*
620 * Checking for myself is cheap compared to looking in shared memory;
621 * return true if any live subtransaction of the current top-level
622 * transaction is a member.
623 *
624 * This is not needed for correctness, it's just a fast path.
625 */
626 for (i = 0; i < nmembers; i++)
627 {
628 if (TransactionIdIsCurrentTransactionId(members[i].xid))
629 {
630 debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
631 pfree(members);
632 return true;
633 }
634 }
635
636 /*
637 * This could be made faster by having another entry point in procarray.c,
638 * walking the PGPROC array only once for all the members. But in most
639 * cases nmembers should be small enough that it doesn't much matter.
640 */
641 for (i = 0; i < nmembers; i++)
642 {
643 if (TransactionIdIsInProgress(members[i].xid))
644 {
645 debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
646 i, members[i].xid);
647 pfree(members);
648 return true;
649 }
650 }
651
652 pfree(members);
653
654 debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
655
656 return false;
657}
658
659/*
660 * MultiXactIdSetOldestMember
661 * Save the oldest MultiXactId this transaction could be a member of.
662 *
663 * We set the OldestMemberMXactId for a given transaction the first time it's
664 * going to do some operation that might require a MultiXactId (tuple lock,
665 * update or delete). We need to do this even if we end up using a
666 * TransactionId instead of a MultiXactId, because there is a chance that
667 * another transaction would add our XID to a MultiXactId.
668 *
669 * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
670 * be called just before doing any such possibly-MultiXactId-able operation.
671 */
672void
674{
676 {
677 MultiXactId nextMXact;
678
679 /*
680 * You might think we don't need to acquire a lock here, since
681 * fetching and storing of TransactionIds is probably atomic, but in
682 * fact we do: suppose we pick up nextMXact and then lose the CPU for
683 * a long time. Someone else could advance nextMXact, and then
684 * another someone else could compute an OldestVisibleMXactId that
685 * would be after the value we are going to store when we get control
686 * back. Which would be wrong.
687 *
688 * Note that a shared lock is sufficient, because it's enough to stop
689 * someone from advancing nextMXact; and nobody else could be trying
690 * to write to our OldestMember entry, only reading (and we assume
691 * storing it is atomic.)
692 */
693 LWLockAcquire(MultiXactGenLock, LW_SHARED);
694
695 /*
696 * We have to beware of the possibility that nextMXact is in the
697 * wrapped-around state. We don't fix the counter itself here, but we
698 * must be sure to store a valid value in our array entry.
699 */
700 nextMXact = MultiXactState->nextMXact;
701 if (nextMXact < FirstMultiXactId)
702 nextMXact = FirstMultiXactId;
703
705
706 LWLockRelease(MultiXactGenLock);
707
708 debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
709 MyProcNumber, nextMXact);
710 }
711}
712
713/*
714 * MultiXactIdSetOldestVisible
715 * Save the oldest MultiXactId this transaction considers possibly live.
716 *
717 * We set the OldestVisibleMXactId for a given transaction the first time
718 * it's going to inspect any MultiXactId. Once we have set this, we are
719 * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
720 * won't be truncated away.
721 *
722 * The value to set is the oldest of nextMXact and all the valid per-backend
723 * OldestMemberMXactId[] entries. Because of the locking we do, we can be
724 * certain that no subsequent call to MultiXactIdSetOldestMember can set
725 * an OldestMemberMXactId[] entry older than what we compute here. Therefore
726 * there is no live transaction, now or later, that can be a member of any
727 * MultiXactId older than the OldestVisibleMXactId we compute here.
728 */
729static void
731{
733 {
734 MultiXactId oldestMXact;
735 int i;
736
737 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
738
739 /*
740 * We have to beware of the possibility that nextMXact is in the
741 * wrapped-around state. We don't fix the counter itself here, but we
742 * must be sure to store a valid value in our array entry.
743 */
744 oldestMXact = MultiXactState->nextMXact;
745 if (oldestMXact < FirstMultiXactId)
746 oldestMXact = FirstMultiXactId;
747
748 for (i = 0; i < MaxOldestSlot; i++)
749 {
750 MultiXactId thisoldest = OldestMemberMXactId[i];
751
752 if (MultiXactIdIsValid(thisoldest) &&
753 MultiXactIdPrecedes(thisoldest, oldestMXact))
754 oldestMXact = thisoldest;
755 }
756
757 OldestVisibleMXactId[MyProcNumber] = oldestMXact;
758
759 LWLockRelease(MultiXactGenLock);
760
761 debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u",
762 MyProcNumber, oldestMXact);
763 }
764}
765
766/*
767 * ReadNextMultiXactId
768 * Return the next MultiXactId to be assigned, but don't allocate it
769 */
772{
773 MultiXactId mxid;
774
775 /* XXX we could presumably do this without a lock. */
776 LWLockAcquire(MultiXactGenLock, LW_SHARED);
778 LWLockRelease(MultiXactGenLock);
779
780 if (mxid < FirstMultiXactId)
781 mxid = FirstMultiXactId;
782
783 return mxid;
784}
785
786/*
787 * ReadMultiXactIdRange
788 * Get the range of IDs that may still be referenced by a relation.
789 */
790void
792{
793 LWLockAcquire(MultiXactGenLock, LW_SHARED);
796 LWLockRelease(MultiXactGenLock);
797
798 if (*oldest < FirstMultiXactId)
799 *oldest = FirstMultiXactId;
800 if (*next < FirstMultiXactId)
802}
803
804
805/*
806 * MultiXactIdCreateFromMembers
807 * Make a new MultiXactId from the specified set of members
808 *
809 * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
810 * given TransactionIds as members. Returns the newly created MultiXactId.
811 *
812 * NB: the passed members[] array will be sorted in-place.
813 */
816{
817 MultiXactId multi;
818 MultiXactOffset offset;
820
821 debug_elog3(DEBUG2, "Create: %s",
822 mxid_to_string(InvalidMultiXactId, nmembers, members));
823
824 /*
825 * See if the same set of members already exists in our cache; if so, just
826 * re-use that MultiXactId. (Note: it might seem that looking in our
827 * cache is insufficient, and we ought to search disk to see if a
828 * duplicate definition already exists. But since we only ever create
829 * MultiXacts containing our own XID, in most cases any such MultiXacts
830 * were in fact created by us, and so will be in our cache. There are
831 * corner cases where someone else added us to a MultiXact without our
832 * knowledge, but it's not worth checking for.)
833 */
834 multi = mXactCacheGetBySet(nmembers, members);
835 if (MultiXactIdIsValid(multi))
836 {
837 debug_elog2(DEBUG2, "Create: in cache!");
838 return multi;
839 }
840
841 /* Verify that there is a single update Xid among the given members. */
842 {
843 int i;
844 bool has_update = false;
845
846 for (i = 0; i < nmembers; i++)
847 {
848 if (ISUPDATE_from_mxstatus(members[i].status))
849 {
850 if (has_update)
851 elog(ERROR, "new multixact has more than one updating member: %s",
852 mxid_to_string(InvalidMultiXactId, nmembers, members));
853 has_update = true;
854 }
855 }
856 }
857
858 /* Load the injection point before entering the critical section */
859 INJECTION_POINT_LOAD("multixact-create-from-members");
860
861 /*
862 * Assign the MXID and offsets range to use, and make sure there is space
863 * in the OFFSETs and MEMBERs files. NB: this routine does
864 * START_CRIT_SECTION().
865 *
866 * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
867 * that we've called MultiXactIdSetOldestMember here. This is because
868 * this routine is used in some places to create new MultiXactIds of which
869 * the current backend is not a member, notably during freezing of multis
870 * in vacuum. During vacuum, in particular, it would be unacceptable to
871 * keep OldestMulti set, in case it runs for long.
872 */
873 multi = GetNewMultiXactId(nmembers, &offset);
874
875 INJECTION_POINT_CACHED("multixact-create-from-members");
876
877 /* Make an XLOG entry describing the new MXID. */
878 xlrec.mid = multi;
879 xlrec.moff = offset;
880 xlrec.nmembers = nmembers;
881
882 /*
883 * XXX Note: there's a lot of padding space in MultiXactMember. We could
884 * find a more compact representation of this Xlog record -- perhaps all
885 * the status flags in one XLogRecData, then all the xids in another one?
886 * Not clear that it's worth the trouble though.
887 */
889 XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate);
890 XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember));
891
892 (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
893
894 /* Now enter the information into the OFFSETs and MEMBERs logs */
895 RecordNewMultiXact(multi, offset, nmembers, members);
896
897 /* Done with critical section */
899
900 /* Store the new MultiXactId in the local cache, too */
901 mXactCachePut(multi, nmembers, members);
902
903 debug_elog2(DEBUG2, "Create: all done");
904
905 return multi;
906}
907
908/*
909 * RecordNewMultiXact
910 * Write info about a new multixact into the offsets and members files
911 *
912 * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
913 * use it.
914 */
915static void
917 int nmembers, MultiXactMember *members)
918{
919 int64 pageno;
920 int64 prev_pageno;
921 int entryno;
922 int slotno;
923 MultiXactOffset *offptr;
924 int i;
925 LWLock *lock;
926 LWLock *prevlock = NULL;
927
928 pageno = MultiXactIdToOffsetPage(multi);
929 entryno = MultiXactIdToOffsetEntry(multi);
930
933
934 /*
935 * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
936 * to complain about if there's any I/O error. This is kinda bogus, but
937 * since the errors will always give the full pathname, it should be clear
938 * enough that a MultiXactId is really involved. Perhaps someday we'll
939 * take the trouble to generalize the slru.c error reporting code.
940 */
941 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
942 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
943 offptr += entryno;
944
945 *offptr = offset;
946
947 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
948
949 /* Release MultiXactOffset SLRU lock. */
950 LWLockRelease(lock);
951
952 /*
953 * If anybody was waiting to know the offset of this multixact ID we just
954 * wrote, they can read it now, so wake them up.
955 */
957
958 prev_pageno = -1;
959
960 for (i = 0; i < nmembers; i++, offset++)
961 {
962 TransactionId *memberptr;
963 uint32 *flagsptr;
964 uint32 flagsval;
965 int bshift;
966 int flagsoff;
967 int memberoff;
968
969 Assert(members[i].status <= MultiXactStatusUpdate);
970
971 pageno = MXOffsetToMemberPage(offset);
972 memberoff = MXOffsetToMemberOffset(offset);
973 flagsoff = MXOffsetToFlagsOffset(offset);
974 bshift = MXOffsetToFlagsBitShift(offset);
975
976 if (pageno != prev_pageno)
977 {
978 /*
979 * MultiXactMember SLRU page is changed so check if this new page
980 * fall into the different SLRU bank then release the old bank's
981 * lock and acquire lock on the new bank.
982 */
984 if (lock != prevlock)
985 {
986 if (prevlock != NULL)
987 LWLockRelease(prevlock);
988
990 prevlock = lock;
991 }
992 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
993 prev_pageno = pageno;
994 }
995
996 memberptr = (TransactionId *)
997 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
998
999 *memberptr = members[i].xid;
1000
1001 flagsptr = (uint32 *)
1002 (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1003
1004 flagsval = *flagsptr;
1005 flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
1006 flagsval |= (members[i].status << bshift);
1007 *flagsptr = flagsval;
1008
1009 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
1010 }
1011
1012 if (prevlock != NULL)
1013 LWLockRelease(prevlock);
1014}
1015
1016/*
1017 * GetNewMultiXactId
1018 * Get the next MultiXactId.
1019 *
1020 * Also, reserve the needed amount of space in the "members" area. The
1021 * starting offset of the reserved space is returned in *offset.
1022 *
1023 * This may generate XLOG records for expansion of the offsets and/or members
1024 * files. Unfortunately, we have to do that while holding MultiXactGenLock
1025 * to avoid race conditions --- the XLOG record for zeroing a page must appear
1026 * before any backend can possibly try to store data in that page!
1027 *
1028 * We start a critical section before advancing the shared counters. The
1029 * caller must end the critical section after writing SLRU data.
1030 */
1031static MultiXactId
1033{
1034 MultiXactId result;
1035 MultiXactOffset nextOffset;
1036
1037 debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
1038
1039 /* safety check, we should never get this far in a HS standby */
1040 if (RecoveryInProgress())
1041 elog(ERROR, "cannot assign MultiXactIds during recovery");
1042
1043 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1044
1045 /* Handle wraparound of the nextMXact counter */
1048
1049 /* Assign the MXID */
1050 result = MultiXactState->nextMXact;
1051
1052 /*----------
1053 * Check to see if it's safe to assign another MultiXactId. This protects
1054 * against catastrophic data loss due to multixact wraparound. The basic
1055 * rules are:
1056 *
1057 * If we're past multiVacLimit or the safe threshold for member storage
1058 * space, or we don't know what the safe threshold for member storage is,
1059 * start trying to force autovacuum cycles.
1060 * If we're past multiWarnLimit, start issuing warnings.
1061 * If we're past multiStopLimit, refuse to create new MultiXactIds.
1062 *
1063 * Note these are pretty much the same protections in GetNewTransactionId.
1064 *----------
1065 */
1067 {
1068 /*
1069 * For safety's sake, we release MultiXactGenLock while sending
1070 * signals, warnings, etc. This is not so much because we care about
1071 * preserving concurrency in this situation, as to avoid any
1072 * possibility of deadlock while doing get_database_name(). First,
1073 * copy all the shared values we'll need in this path.
1074 */
1075 MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
1076 MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
1077 MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
1078 Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
1079
1080 LWLockRelease(MultiXactGenLock);
1081
1082 if (IsUnderPostmaster &&
1083 !MultiXactIdPrecedes(result, multiStopLimit))
1084 {
1085 char *oldest_datname = get_database_name(oldest_datoid);
1086
1087 /*
1088 * Immediately kick autovacuum into action as we're already in
1089 * ERROR territory.
1090 */
1092
1093 /* complain even if that DB has disappeared */
1094 if (oldest_datname)
1095 ereport(ERROR,
1096 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1097 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
1098 oldest_datname),
1099 errhint("Execute a database-wide VACUUM in that database.\n"
1100 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1101 else
1102 ereport(ERROR,
1103 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1104 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
1105 oldest_datoid),
1106 errhint("Execute a database-wide VACUUM in that database.\n"
1107 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1108 }
1109
1110 /*
1111 * To avoid swamping the postmaster with signals, we issue the autovac
1112 * request only once per 64K multis generated. This still gives
1113 * plenty of chances before we get into real trouble.
1114 */
1115 if (IsUnderPostmaster && (result % 65536) == 0)
1117
1118 if (!MultiXactIdPrecedes(result, multiWarnLimit))
1119 {
1120 char *oldest_datname = get_database_name(oldest_datoid);
1121
1122 /* complain even if that DB has disappeared */
1123 if (oldest_datname)
1125 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1126 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1127 multiWrapLimit - result,
1128 oldest_datname,
1129 multiWrapLimit - result),
1130 errhint("Execute a database-wide VACUUM in that database.\n"
1131 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1132 else
1134 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1135 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1136 multiWrapLimit - result,
1137 oldest_datoid,
1138 multiWrapLimit - result),
1139 errhint("Execute a database-wide VACUUM in that database.\n"
1140 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1141 }
1142
1143 /* Re-acquire lock and start over */
1144 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1145 result = MultiXactState->nextMXact;
1146 if (result < FirstMultiXactId)
1147 result = FirstMultiXactId;
1148 }
1149
1150 /* Make sure there is room for the MXID in the file. */
1151 ExtendMultiXactOffset(result);
1152
1153 /*
1154 * Reserve the members space, similarly to above. Also, be careful not to
1155 * return zero as the starting offset for any multixact. See
1156 * GetMultiXactIdMembers() for motivation.
1157 */
1158 nextOffset = MultiXactState->nextOffset;
1159 if (nextOffset == 0)
1160 {
1161 *offset = 1;
1162 nmembers++; /* allocate member slot 0 too */
1163 }
1164 else
1165 *offset = nextOffset;
1166
1167 /*----------
1168 * Protect against overrun of the members space as well, with the
1169 * following rules:
1170 *
1171 * If we're past offsetStopLimit, refuse to generate more multis.
1172 * If we're close to offsetStopLimit, emit a warning.
1173 *
1174 * Arbitrarily, we start emitting warnings when we're 20 segments or less
1175 * from offsetStopLimit.
1176 *
1177 * Note we haven't updated the shared state yet, so if we fail at this
1178 * point, the multixact ID we grabbed can still be used by the next guy.
1179 *
1180 * Note that there is no point in forcing autovacuum runs here: the
1181 * multixact freeze settings would have to be reduced for that to have any
1182 * effect.
1183 *----------
1184 */
1185#define OFFSET_WARN_SEGMENTS 20
1188 nmembers))
1189 {
1190 /* see comment in the corresponding offsets wraparound case */
1192
1193 ereport(ERROR,
1194 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1195 errmsg("multixact \"members\" limit exceeded"),
1196 errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
1197 "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
1198 MultiXactState->offsetStopLimit - nextOffset - 1,
1199 nmembers,
1200 MultiXactState->offsetStopLimit - nextOffset - 1),
1201 errhint("Execute a database-wide VACUUM in database with OID %u with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.",
1203 }
1204
1205 /*
1206 * Check whether we should kick autovacuum into action, to prevent members
1207 * wraparound. NB we use a much larger window to trigger autovacuum than
1208 * just the warning limit. The warning is just a measure of last resort -
1209 * this is in line with GetNewTransactionId's behaviour.
1210 */
1214 {
1215 /*
1216 * To avoid swamping the postmaster with signals, we issue the autovac
1217 * request only when crossing a segment boundary. With default
1218 * compilation settings that's roughly after 50k members. This still
1219 * gives plenty of chances before we get into real trouble.
1220 */
1221 if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
1222 (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
1224 }
1225
1228 nextOffset,
1231 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1232 errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
1233 "database with OID %u must be vacuumed before %d more multixact members are used",
1234 MultiXactState->offsetStopLimit - nextOffset + nmembers,
1236 MultiXactState->offsetStopLimit - nextOffset + nmembers),
1237 errhint("Execute a database-wide VACUUM in that database with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.")));
1238
1239 ExtendMultiXactMember(nextOffset, nmembers);
1240
1241 /*
1242 * Critical section from here until caller has written the data into the
1243 * just-reserved SLRU space; we don't want to error out with a partly
1244 * written MultiXact structure. (In particular, failing to write our
1245 * start offset after advancing nextMXact would effectively corrupt the
1246 * previous MultiXact.)
1247 */
1249
1250 /*
1251 * Advance counters. As in GetNewTransactionId(), this must not happen
1252 * until after file extension has succeeded!
1253 *
1254 * We don't care about MultiXactId wraparound here; it will be handled by
1255 * the next iteration. But note that nextMXact may be InvalidMultiXactId
1256 * or the first value on a segment-beginning page after this routine
1257 * exits, so anyone else looking at the variable must be prepared to deal
1258 * with either case. Similarly, nextOffset may be zero, but we won't use
1259 * that as the actual start offset of the next multixact.
1260 */
1262
1263 MultiXactState->nextOffset += nmembers;
1264
1265 LWLockRelease(MultiXactGenLock);
1266
1267 debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
1268 return result;
1269}
1270
1271/*
1272 * GetMultiXactIdMembers
1273 * Return the set of MultiXactMembers that make up a MultiXactId
1274 *
1275 * Return value is the number of members found, or -1 if there are none,
1276 * and *members is set to a newly palloc'ed array of members. It's the
1277 * caller's responsibility to free it when done with it.
1278 *
1279 * from_pgupgrade must be passed as true if and only if only the multixact
1280 * corresponds to a value from a tuple that was locked in a 9.2-or-older
1281 * installation and later pg_upgrade'd (that is, the infomask is
1282 * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1283 * can still be running, so we return -1 just like for an empty multixact
1284 * without any further checking. It would be wrong to try to resolve such a
1285 * multixact: either the multixact is within the current valid multixact
1286 * range, in which case the returned result would be bogus, or outside that
1287 * range, in which case an error would be raised.
1288 *
1289 * In all other cases, the passed multixact must be within the known valid
1290 * range, that is, greater to or equal than oldestMultiXactId, and less than
1291 * nextMXact. Otherwise, an error is raised.
1292 *
1293 * isLockOnly must be set to true if caller is certain that the given multi
1294 * is used only to lock tuples; can be false without loss of correctness,
1295 * but passing a true means we can return quickly without checking for
1296 * old updates.
1297 */
1298int
1300 bool from_pgupgrade, bool isLockOnly)
1301{
1302 int64 pageno;
1303 int64 prev_pageno;
1304 int entryno;
1305 int slotno;
1306 MultiXactOffset *offptr;
1307 MultiXactOffset offset;
1308 int length;
1309 int truelength;
1310 MultiXactId oldestMXact;
1311 MultiXactId nextMXact;
1312 MultiXactId tmpMXact;
1313 MultiXactOffset nextOffset;
1314 MultiXactMember *ptr;
1315 LWLock *lock;
1316 bool slept = false;
1317
1318 debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1319
1320 if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1321 {
1322 *members = NULL;
1323 return -1;
1324 }
1325
1326 /* See if the MultiXactId is in the local cache */
1327 length = mXactCacheGetById(multi, members);
1328 if (length >= 0)
1329 {
1330 debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1331 mxid_to_string(multi, length, *members));
1332 return length;
1333 }
1334
1335 /* Set our OldestVisibleMXactId[] entry if we didn't already */
1337
1338 /*
1339 * If we know the multi is used only for locking and not for updates, then
1340 * we can skip checking if the value is older than our oldest visible
1341 * multi. It cannot possibly still be running.
1342 */
1343 if (isLockOnly &&
1345 {
1346 debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
1347 *members = NULL;
1348 return -1;
1349 }
1350
1351 /*
1352 * We check known limits on MultiXact before resorting to the SLRU area.
1353 *
1354 * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1355 * useful; it has already been removed, or will be removed shortly, by
1356 * truncation. If one is passed, an error is raised.
1357 *
1358 * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1359 * implies undetected ID wraparound has occurred. This raises a hard
1360 * error.
1361 *
1362 * Shared lock is enough here since we aren't modifying any global state.
1363 * Acquire it just long enough to grab the current counter values. We may
1364 * need both nextMXact and nextOffset; see below.
1365 */
1366 LWLockAcquire(MultiXactGenLock, LW_SHARED);
1367
1368 oldestMXact = MultiXactState->oldestMultiXactId;
1369 nextMXact = MultiXactState->nextMXact;
1370 nextOffset = MultiXactState->nextOffset;
1371
1372 LWLockRelease(MultiXactGenLock);
1373
1374 if (MultiXactIdPrecedes(multi, oldestMXact))
1375 ereport(ERROR,
1376 (errcode(ERRCODE_INTERNAL_ERROR),
1377 errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1378 multi)));
1379
1380 if (!MultiXactIdPrecedes(multi, nextMXact))
1381 ereport(ERROR,
1382 (errcode(ERRCODE_INTERNAL_ERROR),
1383 errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1384 multi)));
1385
1386 /*
1387 * Find out the offset at which we need to start reading MultiXactMembers
1388 * and the number of members in the multixact. We determine the latter as
1389 * the difference between this multixact's starting offset and the next
1390 * one's. However, there are some corner cases to worry about:
1391 *
1392 * 1. This multixact may be the latest one created, in which case there is
1393 * no next one to look at. In this case the nextOffset value we just
1394 * saved is the correct endpoint.
1395 *
1396 * 2. The next multixact may still be in process of being filled in: that
1397 * is, another process may have done GetNewMultiXactId but not yet written
1398 * the offset entry for that ID. In that scenario, it is guaranteed that
1399 * the offset entry for that multixact exists (because GetNewMultiXactId
1400 * won't release MultiXactGenLock until it does) but contains zero
1401 * (because we are careful to pre-zero offset pages). Because
1402 * GetNewMultiXactId will never return zero as the starting offset for a
1403 * multixact, when we read zero as the next multixact's offset, we know we
1404 * have this case. We handle this by sleeping on the condition variable
1405 * we have just for this; the process in charge will signal the CV as soon
1406 * as it has finished writing the multixact offset.
1407 *
1408 * 3. Because GetNewMultiXactId increments offset zero to offset one to
1409 * handle case #2, there is an ambiguity near the point of offset
1410 * wraparound. If we see next multixact's offset is one, is that our
1411 * multixact's actual endpoint, or did it end at zero with a subsequent
1412 * increment? We handle this using the knowledge that if the zero'th
1413 * member slot wasn't filled, it'll contain zero, and zero isn't a valid
1414 * transaction ID so it can't be a multixact member. Therefore, if we
1415 * read a zero from the members array, just ignore it.
1416 *
1417 * This is all pretty messy, but the mess occurs only in infrequent corner
1418 * cases, so it seems better than holding the MultiXactGenLock for a long
1419 * time on every multixact creation.
1420 */
1421retry:
1422 pageno = MultiXactIdToOffsetPage(multi);
1423 entryno = MultiXactIdToOffsetEntry(multi);
1424
1425 /* Acquire the bank lock for the page we need. */
1428
1429 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
1430 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1431 offptr += entryno;
1432 offset = *offptr;
1433
1434 Assert(offset != 0);
1435
1436 /*
1437 * Use the same increment rule as GetNewMultiXactId(), that is, don't
1438 * handle wraparound explicitly until needed.
1439 */
1440 tmpMXact = multi + 1;
1441
1442 if (nextMXact == tmpMXact)
1443 {
1444 /* Corner case 1: there is no next multixact */
1445 length = nextOffset - offset;
1446 }
1447 else
1448 {
1449 MultiXactOffset nextMXOffset;
1450
1451 /* handle wraparound if needed */
1452 if (tmpMXact < FirstMultiXactId)
1453 tmpMXact = FirstMultiXactId;
1454
1455 prev_pageno = pageno;
1456
1457 pageno = MultiXactIdToOffsetPage(tmpMXact);
1458 entryno = MultiXactIdToOffsetEntry(tmpMXact);
1459
1460 if (pageno != prev_pageno)
1461 {
1462 LWLock *newlock;
1463
1464 /*
1465 * Since we're going to access a different SLRU page, if this page
1466 * falls under a different bank, release the old bank's lock and
1467 * acquire the lock of the new bank.
1468 */
1469 newlock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1470 if (newlock != lock)
1471 {
1472 LWLockRelease(lock);
1473 LWLockAcquire(newlock, LW_EXCLUSIVE);
1474 lock = newlock;
1475 }
1476 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
1477 }
1478
1479 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1480 offptr += entryno;
1481 nextMXOffset = *offptr;
1482
1483 if (nextMXOffset == 0)
1484 {
1485 /* Corner case 2: next multixact is still being filled in */
1486 LWLockRelease(lock);
1488
1489 INJECTION_POINT("multixact-get-members-cv-sleep");
1490
1492 WAIT_EVENT_MULTIXACT_CREATION);
1493 slept = true;
1494 goto retry;
1495 }
1496
1497 length = nextMXOffset - offset;
1498 }
1499
1500 LWLockRelease(lock);
1501 lock = NULL;
1502
1503 /*
1504 * If we slept above, clean up state; it's no longer needed.
1505 */
1506 if (slept)
1508
1509 ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
1510
1511 truelength = 0;
1512 prev_pageno = -1;
1513 for (int i = 0; i < length; i++, offset++)
1514 {
1515 TransactionId *xactptr;
1516 uint32 *flagsptr;
1517 int flagsoff;
1518 int bshift;
1519 int memberoff;
1520
1521 pageno = MXOffsetToMemberPage(offset);
1522 memberoff = MXOffsetToMemberOffset(offset);
1523
1524 if (pageno != prev_pageno)
1525 {
1526 LWLock *newlock;
1527
1528 /*
1529 * Since we're going to access a different SLRU page, if this page
1530 * falls under a different bank, release the old bank's lock and
1531 * acquire the lock of the new bank.
1532 */
1533 newlock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1534 if (newlock != lock)
1535 {
1536 if (lock)
1537 LWLockRelease(lock);
1538 LWLockAcquire(newlock, LW_EXCLUSIVE);
1539 lock = newlock;
1540 }
1541
1542 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
1543 prev_pageno = pageno;
1544 }
1545
1546 xactptr = (TransactionId *)
1547 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1548
1549 if (!TransactionIdIsValid(*xactptr))
1550 {
1551 /* Corner case 3: we must be looking at unused slot zero */
1552 Assert(offset == 0);
1553 continue;
1554 }
1555
1556 flagsoff = MXOffsetToFlagsOffset(offset);
1557 bshift = MXOffsetToFlagsBitShift(offset);
1558 flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1559
1560 ptr[truelength].xid = *xactptr;
1561 ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
1562 truelength++;
1563 }
1564
1565 LWLockRelease(lock);
1566
1567 /* A multixid with zero members should not happen */
1568 Assert(truelength > 0);
1569
1570 /*
1571 * Copy the result into the local cache.
1572 */
1573 mXactCachePut(multi, truelength, ptr);
1574
1575 debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1576 mxid_to_string(multi, truelength, ptr));
1577 *members = ptr;
1578 return truelength;
1579}
1580
1581/*
1582 * mxactMemberComparator
1583 * qsort comparison function for MultiXactMember
1584 *
1585 * We can't use wraparound comparison for XIDs because that does not respect
1586 * the triangle inequality! Any old sort order will do.
1587 */
1588static int
1589mxactMemberComparator(const void *arg1, const void *arg2)
1590{
1591 MultiXactMember member1 = *(const MultiXactMember *) arg1;
1592 MultiXactMember member2 = *(const MultiXactMember *) arg2;
1593
1594 if (member1.xid > member2.xid)
1595 return 1;
1596 if (member1.xid < member2.xid)
1597 return -1;
1598 if (member1.status > member2.status)
1599 return 1;
1600 if (member1.status < member2.status)
1601 return -1;
1602 return 0;
1603}
1604
1605/*
1606 * mXactCacheGetBySet
1607 * returns a MultiXactId from the cache based on the set of
1608 * TransactionIds that compose it, or InvalidMultiXactId if
1609 * none matches.
1610 *
1611 * This is helpful, for example, if two transactions want to lock a huge
1612 * table. By using the cache, the second will use the same MultiXactId
1613 * for the majority of tuples, thus keeping MultiXactId usage low (saving
1614 * both I/O and wraparound issues).
1615 *
1616 * NB: the passed members array will be sorted in-place.
1617 */
1618static MultiXactId
1620{
1621 dlist_iter iter;
1622
1623 debug_elog3(DEBUG2, "CacheGet: looking for %s",
1624 mxid_to_string(InvalidMultiXactId, nmembers, members));
1625
1626 /* sort the array so comparison is easy */
1627 qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1628
1630 {
1632 iter.cur);
1633
1634 if (entry->nmembers != nmembers)
1635 continue;
1636
1637 /*
1638 * We assume the cache entries are sorted, and that the unused bits in
1639 * "status" are zeroed.
1640 */
1641 if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1642 {
1643 debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1645 return entry->multi;
1646 }
1647 }
1648
1649 debug_elog2(DEBUG2, "CacheGet: not found :-(");
1650 return InvalidMultiXactId;
1651}
1652
1653/*
1654 * mXactCacheGetById
1655 * returns the composing MultiXactMember set from the cache for a
1656 * given MultiXactId, if present.
1657 *
1658 * If successful, *xids is set to the address of a palloc'd copy of the
1659 * MultiXactMember set. Return value is number of members, or -1 on failure.
1660 */
1661static int
1663{
1664 dlist_iter iter;
1665
1666 debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1667
1669 {
1671 iter.cur);
1672
1673 if (entry->multi == multi)
1674 {
1675 MultiXactMember *ptr;
1676 Size size;
1677
1678 size = sizeof(MultiXactMember) * entry->nmembers;
1679 ptr = (MultiXactMember *) palloc(size);
1680
1681 memcpy(ptr, entry->members, size);
1682
1683 debug_elog3(DEBUG2, "CacheGet: found %s",
1684 mxid_to_string(multi,
1685 entry->nmembers,
1686 entry->members));
1687
1688 /*
1689 * Note we modify the list while not using a modifiable iterator.
1690 * This is acceptable only because we exit the iteration
1691 * immediately afterwards.
1692 */
1694
1695 *members = ptr;
1696 return entry->nmembers;
1697 }
1698 }
1699
1700 debug_elog2(DEBUG2, "CacheGet: not found");
1701 return -1;
1702}
1703
1704/*
1705 * mXactCachePut
1706 * Add a new MultiXactId and its composing set into the local cache.
1707 */
1708static void
1709mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1710{
1711 mXactCacheEnt *entry;
1712
1713 debug_elog3(DEBUG2, "CachePut: storing %s",
1714 mxid_to_string(multi, nmembers, members));
1715
1716 if (MXactContext == NULL)
1717 {
1718 /* The cache only lives as long as the current transaction */
1719 debug_elog2(DEBUG2, "CachePut: initializing memory context");
1721 "MultiXact cache context",
1723 }
1724
1725 entry = (mXactCacheEnt *)
1727 offsetof(mXactCacheEnt, members) +
1728 nmembers * sizeof(MultiXactMember));
1729
1730 entry->multi = multi;
1731 entry->nmembers = nmembers;
1732 memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1733
1734 /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1735 qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1736
1737 dclist_push_head(&MXactCache, &entry->node);
1739 {
1740 dlist_node *node;
1741
1744
1745 entry = dclist_container(mXactCacheEnt, node, node);
1746 debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1747 entry->multi);
1748
1749 pfree(entry);
1750 }
1751}
1752
1753static char *
1755{
1756 switch (status)
1757 {
1759 return "keysh";
1761 return "sh";
1763 return "fornokeyupd";
1765 return "forupd";
1767 return "nokeyupd";
1769 return "upd";
1770 default:
1771 elog(ERROR, "unrecognized multixact status %d", status);
1772 return "";
1773 }
1774}
1775
1776char *
1777mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1778{
1779 static char *str = NULL;
1781 int i;
1782
1783 if (str != NULL)
1784 pfree(str);
1785
1787
1788 appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1789 mxstatus_to_string(members[0].status));
1790
1791 for (i = 1; i < nmembers; i++)
1792 appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1793 mxstatus_to_string(members[i].status));
1794
1797 pfree(buf.data);
1798 return str;
1799}
1800
1801/*
1802 * AtEOXact_MultiXact
1803 * Handle transaction end for MultiXact
1804 *
1805 * This is called at top transaction commit or abort (we don't care which).
1806 */
1807void
1809{
1810 /*
1811 * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1812 * which should only be valid while within a transaction.
1813 *
1814 * We assume that storing a MultiXactId is atomic and so we need not take
1815 * MultiXactGenLock to do this.
1816 */
1819
1820 /*
1821 * Discard the local MultiXactId cache. Since MXactContext was created as
1822 * a child of TopTransactionContext, we needn't delete it explicitly.
1823 */
1824 MXactContext = NULL;
1826}
1827
1828/*
1829 * AtPrepare_MultiXact
1830 * Save multixact state at 2PC transaction prepare
1831 *
1832 * In this phase, we only store our OldestMemberMXactId value in the two-phase
1833 * state file.
1834 */
1835void
1837{
1839
1840 if (MultiXactIdIsValid(myOldestMember))
1842 &myOldestMember, sizeof(MultiXactId));
1843}
1844
1845/*
1846 * PostPrepare_MultiXact
1847 * Clean up after successful PREPARE TRANSACTION
1848 */
1849void
1851{
1852 MultiXactId myOldestMember;
1853
1854 /*
1855 * Transfer our OldestMemberMXactId value to the slot reserved for the
1856 * prepared transaction.
1857 */
1858 myOldestMember = OldestMemberMXactId[MyProcNumber];
1859 if (MultiXactIdIsValid(myOldestMember))
1860 {
1861 ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false);
1862
1863 /*
1864 * Even though storing MultiXactId is atomic, acquire lock to make
1865 * sure others see both changes, not just the reset of the slot of the
1866 * current backend. Using a volatile pointer might suffice, but this
1867 * isn't a hot spot.
1868 */
1869 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1870
1871 OldestMemberMXactId[dummyProcNumber] = myOldestMember;
1873
1874 LWLockRelease(MultiXactGenLock);
1875 }
1876
1877 /*
1878 * We don't need to transfer OldestVisibleMXactId value, because the
1879 * transaction is not going to be looking at any more multixacts once it's
1880 * prepared.
1881 *
1882 * We assume that storing a MultiXactId is atomic and so we need not take
1883 * MultiXactGenLock to do this.
1884 */
1886
1887 /*
1888 * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1889 */
1890 MXactContext = NULL;
1892}
1893
1894/*
1895 * multixact_twophase_recover
1896 * Recover the state of a prepared transaction at startup
1897 */
1898void
1900 void *recdata, uint32 len)
1901{
1902 ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false);
1903 MultiXactId oldestMember;
1904
1905 /*
1906 * Get the oldest member XID from the state file record, and set it in the
1907 * OldestMemberMXactId slot reserved for this prepared transaction.
1908 */
1909 Assert(len == sizeof(MultiXactId));
1910 oldestMember = *((MultiXactId *) recdata);
1911
1912 OldestMemberMXactId[dummyProcNumber] = oldestMember;
1913}
1914
1915/*
1916 * multixact_twophase_postcommit
1917 * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1918 */
1919void
1921 void *recdata, uint32 len)
1922{
1923 ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, true);
1924
1925 Assert(len == sizeof(MultiXactId));
1926
1927 OldestMemberMXactId[dummyProcNumber] = InvalidMultiXactId;
1928}
1929
1930/*
1931 * multixact_twophase_postabort
1932 * This is actually just the same as the COMMIT case.
1933 */
1934void
1936 void *recdata, uint32 len)
1937{
1938 multixact_twophase_postcommit(xid, info, recdata, len);
1939}
1940
1941/*
1942 * Initialization of shared memory for MultiXact. We use two SLRU areas,
1943 * thus double memory. Also, reserve space for the shared MultiXactState
1944 * struct and the per-backend MultiXactId arrays (two of those, too).
1945 */
1946Size
1948{
1949 Size size;
1950
1951 /* We need 2*MaxOldestSlot perBackendXactIds[] entries */
1952#define SHARED_MULTIXACT_STATE_SIZE \
1953 add_size(offsetof(MultiXactStateData, perBackendXactIds), \
1954 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
1955
1959
1960 return size;
1961}
1962
1963void
1965{
1966 bool found;
1967
1968 debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
1969
1972
1974 "multixact_offset", multixact_offset_buffers, 0,
1975 "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1978 false);
1981 "multixact_member", multixact_member_buffers, 0,
1982 "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1985 false);
1986 /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
1987
1988 /* Initialize our shared state struct */
1989 MultiXactState = ShmemInitStruct("Shared MultiXact State",
1991 &found);
1992 if (!IsUnderPostmaster)
1993 {
1994 Assert(!found);
1995
1996 /* Make sure we zero out the per-backend state */
1999 }
2000 else
2001 Assert(found);
2002
2003 /*
2004 * Set up array pointers.
2005 */
2008}
2009
2010/*
2011 * GUC check_hook for multixact_offset_buffers
2012 */
2013bool
2015{
2016 return check_slru_buffers("multixact_offset_buffers", newval);
2017}
2018
2019/*
2020 * GUC check_hook for multixact_member_buffers
2021 */
2022bool
2024{
2025 return check_slru_buffers("multixact_member_buffers", newval);
2026}
2027
2028/*
2029 * This func must be called ONCE on system install. It creates the initial
2030 * MultiXact segments. (The MultiXacts directories are assumed to have been
2031 * created by initdb, and MultiXactShmemInit must have been called already.)
2032 */
2033void
2035{
2036 int slotno;
2037 LWLock *lock;
2038
2041
2042 /* Create and zero the first page of the offsets log */
2043 slotno = ZeroMultiXactOffsetPage(0, false);
2044
2045 /* Make sure it's written out */
2047 Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
2048
2049 LWLockRelease(lock);
2050
2053
2054 /* Create and zero the first page of the members log */
2055 slotno = ZeroMultiXactMemberPage(0, false);
2056
2057 /* Make sure it's written out */
2059 Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
2060
2061 LWLockRelease(lock);
2062}
2063
2064/*
2065 * Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
2066 * If writeXlog is true, also emit an XLOG record saying we did this.
2067 *
2068 * The page is not actually written, just set up in shared memory.
2069 * The slot number of the new page is returned.
2070 *
2071 * Control lock must be held at entry, and will be held at exit.
2072 */
2073static int
2074ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog)
2075{
2076 int slotno;
2077
2078 slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
2079
2080 if (writeXlog)
2082
2083 return slotno;
2084}
2085
2086/*
2087 * Ditto, for MultiXactMember
2088 */
2089static int
2090ZeroMultiXactMemberPage(int64 pageno, bool writeXlog)
2091{
2092 int slotno;
2093
2094 slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
2095
2096 if (writeXlog)
2098
2099 return slotno;
2100}
2101
2102/*
2103 * MaybeExtendOffsetSlru
2104 * Extend the offsets SLRU area, if necessary
2105 *
2106 * After a binary upgrade from <= 9.2, the pg_multixact/offsets SLRU area might
2107 * contain files that are shorter than necessary; this would occur if the old
2108 * installation had used multixacts beyond the first page (files cannot be
2109 * copied, because the on-disk representation is different). pg_upgrade would
2110 * update pg_control to set the next offset value to be at that position, so
2111 * that tuples marked as locked by such MultiXacts would be seen as visible
2112 * without having to consult multixact. However, trying to create and use a
2113 * new MultiXactId would result in an error because the page on which the new
2114 * value would reside does not exist. This routine is in charge of creating
2115 * such pages.
2116 */
2117static void
2119{
2120 int64 pageno;
2121 LWLock *lock;
2122
2125
2127
2129 {
2130 int slotno;
2131
2132 /*
2133 * Fortunately for us, SimpleLruWritePage is already prepared to deal
2134 * with creating a new segment file even if the page we're writing is
2135 * not the first in it, so this is enough.
2136 */
2137 slotno = ZeroMultiXactOffsetPage(pageno, false);
2139 }
2140
2141 LWLockRelease(lock);
2142}
2143
2144/*
2145 * This must be called ONCE during postmaster or standalone-backend startup.
2146 *
2147 * StartupXLOG has already established nextMXact/nextOffset by calling
2148 * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
2149 * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
2150 * replayed WAL.
2151 */
2152void
2154{
2157 int64 pageno;
2158
2159 /*
2160 * Initialize offset's idea of the latest page number.
2161 */
2162 pageno = MultiXactIdToOffsetPage(multi);
2163 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2164 pageno);
2165
2166 /*
2167 * Initialize member's idea of the latest page number.
2168 */
2169 pageno = MXOffsetToMemberPage(offset);
2170 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
2171 pageno);
2172}
2173
2174/*
2175 * This must be called ONCE at the end of startup/recovery.
2176 */
2177void
2179{
2180 MultiXactId nextMXact;
2181 MultiXactOffset offset;
2182 MultiXactId oldestMXact;
2183 Oid oldestMXactDB;
2184 int64 pageno;
2185 int entryno;
2186 int flagsoff;
2187
2188 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2189 nextMXact = MultiXactState->nextMXact;
2190 offset = MultiXactState->nextOffset;
2191 oldestMXact = MultiXactState->oldestMultiXactId;
2192 oldestMXactDB = MultiXactState->oldestMultiXactDB;
2193 LWLockRelease(MultiXactGenLock);
2194
2195 /* Clean up offsets state */
2196
2197 /*
2198 * (Re-)Initialize our idea of the latest page number for offsets.
2199 */
2200 pageno = MultiXactIdToOffsetPage(nextMXact);
2201 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2202 pageno);
2203
2204 /*
2205 * Zero out the remainder of the current offsets page. See notes in
2206 * TrimCLOG() for background. Unlike CLOG, some WAL record covers every
2207 * pg_multixact SLRU mutation. Since, also unlike CLOG, we ignore the WAL
2208 * rule "write xlog before data," nextMXact successors may carry obsolete,
2209 * nonzero offset values. Zero those so case 2 of GetMultiXactIdMembers()
2210 * operates normally.
2211 */
2212 entryno = MultiXactIdToOffsetEntry(nextMXact);
2213 if (entryno != 0)
2214 {
2215 int slotno;
2216 MultiXactOffset *offptr;
2218
2220 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
2221 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2222 offptr += entryno;
2223
2224 MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
2225
2226 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
2227 LWLockRelease(lock);
2228 }
2229
2230 /*
2231 * And the same for members.
2232 *
2233 * (Re-)Initialize our idea of the latest page number for members.
2234 */
2235 pageno = MXOffsetToMemberPage(offset);
2236 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
2237 pageno);
2238
2239 /*
2240 * Zero out the remainder of the current members page. See notes in
2241 * TrimCLOG() for motivation.
2242 */
2243 flagsoff = MXOffsetToFlagsOffset(offset);
2244 if (flagsoff != 0)
2245 {
2246 int slotno;
2247 TransactionId *xidptr;
2248 int memberoff;
2250
2252 memberoff = MXOffsetToMemberOffset(offset);
2253 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
2254 xidptr = (TransactionId *)
2255 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
2256
2257 MemSet(xidptr, 0, BLCKSZ - memberoff);
2258
2259 /*
2260 * Note: we don't need to zero out the flag bits in the remaining
2261 * members of the current group, because they are always reset before
2262 * writing.
2263 */
2264
2265 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
2266 LWLockRelease(lock);
2267 }
2268
2269 /* signal that we're officially up */
2270 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2272 LWLockRelease(MultiXactGenLock);
2273
2274 /* Now compute how far away the next members wraparound is. */
2275 SetMultiXactIdLimit(oldestMXact, oldestMXactDB, true);
2276}
2277
2278/*
2279 * Get the MultiXact data to save in a checkpoint record
2280 */
2281void
2283 MultiXactId *nextMulti,
2284 MultiXactOffset *nextMultiOffset,
2285 MultiXactId *oldestMulti,
2286 Oid *oldestMultiDB)
2287{
2288 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2289 *nextMulti = MultiXactState->nextMXact;
2290 *nextMultiOffset = MultiXactState->nextOffset;
2291 *oldestMulti = MultiXactState->oldestMultiXactId;
2292 *oldestMultiDB = MultiXactState->oldestMultiXactDB;
2293 LWLockRelease(MultiXactGenLock);
2294
2296 "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
2297 *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
2298}
2299
2300/*
2301 * Perform a checkpoint --- either during shutdown, or on-the-fly
2302 */
2303void
2305{
2306 TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_START(true);
2307
2308 /*
2309 * Write dirty MultiXact pages to disk. This may result in sync requests
2310 * queued for later handling by ProcessSyncRequests(), as part of the
2311 * checkpoint.
2312 */
2315
2316 TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
2317}
2318
2319/*
2320 * Set the next-to-be-assigned MultiXactId and offset
2321 *
2322 * This is used when we can determine the correct next ID/offset exactly
2323 * from a checkpoint record. Although this is only called during bootstrap
2324 * and XLog replay, we take the lock in case any hot-standby backends are
2325 * examining the values.
2326 */
2327void
2329 MultiXactOffset nextMultiOffset)
2330{
2331 debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
2332 nextMulti, nextMultiOffset);
2333 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2334 MultiXactState->nextMXact = nextMulti;
2335 MultiXactState->nextOffset = nextMultiOffset;
2336 LWLockRelease(MultiXactGenLock);
2337
2338 /*
2339 * During a binary upgrade, make sure that the offsets SLRU is large
2340 * enough to contain the next value that would be created.
2341 *
2342 * We need to do this pretty early during the first startup in binary
2343 * upgrade mode: before StartupMultiXact() in fact, because this routine
2344 * is called even before that by StartupXLOG(). And we can't do it
2345 * earlier than at this point, because during that first call of this
2346 * routine we determine the MultiXactState->nextMXact value that
2347 * MaybeExtendOffsetSlru needs.
2348 */
2349 if (IsBinaryUpgrade)
2351}
2352
2353/*
2354 * Determine the last safe MultiXactId to allocate given the currently oldest
2355 * datminmxid (ie, the oldest MultiXactId that might exist in any database
2356 * of our cluster), and the OID of the (or a) database with that value.
2357 *
2358 * is_startup is true when we are just starting the cluster, false when we
2359 * are updating state in a running cluster. This only affects log messages.
2360 */
2361void
2362SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
2363 bool is_startup)
2364{
2365 MultiXactId multiVacLimit;
2366 MultiXactId multiWarnLimit;
2367 MultiXactId multiStopLimit;
2368 MultiXactId multiWrapLimit;
2369 MultiXactId curMulti;
2370 bool needs_offset_vacuum;
2371
2372 Assert(MultiXactIdIsValid(oldest_datminmxid));
2373
2374 /*
2375 * We pretend that a wrap will happen halfway through the multixact ID
2376 * space, but that's not really true, because multixacts wrap differently
2377 * from transaction IDs. Note that, separately from any concern about
2378 * multixact IDs wrapping, we must ensure that multixact members do not
2379 * wrap. Limits for that are set in SetOffsetVacuumLimit, not here.
2380 */
2381 multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2382 if (multiWrapLimit < FirstMultiXactId)
2383 multiWrapLimit += FirstMultiXactId;
2384
2385 /*
2386 * We'll refuse to continue assigning MultiXactIds once we get within 3M
2387 * multi of data loss. See SetTransactionIdLimit.
2388 */
2389 multiStopLimit = multiWrapLimit - 3000000;
2390 if (multiStopLimit < FirstMultiXactId)
2391 multiStopLimit -= FirstMultiXactId;
2392
2393 /*
2394 * We'll start complaining loudly when we get within 40M multis of data
2395 * loss. This is kind of arbitrary, but if you let your gas gauge get
2396 * down to 2% of full, would you be looking for the next gas station? We
2397 * need to be fairly liberal about this number because there are lots of
2398 * scenarios where most transactions are done by automatic clients that
2399 * won't pay attention to warnings. (No, we're not gonna make this
2400 * configurable. If you know enough to configure it, you know enough to
2401 * not get in this kind of trouble in the first place.)
2402 */
2403 multiWarnLimit = multiWrapLimit - 40000000;
2404 if (multiWarnLimit < FirstMultiXactId)
2405 multiWarnLimit -= FirstMultiXactId;
2406
2407 /*
2408 * We'll start trying to force autovacuums when oldest_datminmxid gets to
2409 * be more than autovacuum_multixact_freeze_max_age mxids old.
2410 *
2411 * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2412 * so that we don't have to worry about dealing with on-the-fly changes in
2413 * its value. See SetTransactionIdLimit.
2414 */
2415 multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age;
2416 if (multiVacLimit < FirstMultiXactId)
2417 multiVacLimit += FirstMultiXactId;
2418
2419 /* Grab lock for just long enough to set the new limit values */
2420 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2421 MultiXactState->oldestMultiXactId = oldest_datminmxid;
2422 MultiXactState->oldestMultiXactDB = oldest_datoid;
2423 MultiXactState->multiVacLimit = multiVacLimit;
2424 MultiXactState->multiWarnLimit = multiWarnLimit;
2425 MultiXactState->multiStopLimit = multiStopLimit;
2426 MultiXactState->multiWrapLimit = multiWrapLimit;
2427 curMulti = MultiXactState->nextMXact;
2428 LWLockRelease(MultiXactGenLock);
2429
2430 /* Log the info */
2432 (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2433 multiWrapLimit, oldest_datoid)));
2434
2435 /*
2436 * Computing the actual limits is only possible once the data directory is
2437 * in a consistent state. There's no need to compute the limits while
2438 * still replaying WAL - no decisions about new multis are made even
2439 * though multixact creations might be replayed. So we'll only do further
2440 * checks after TrimMultiXact() has been called.
2441 */
2443 return;
2444
2446
2447 /* Set limits for offset vacuum. */
2448 needs_offset_vacuum = SetOffsetVacuumLimit(is_startup);
2449
2450 /*
2451 * If past the autovacuum force point, immediately signal an autovac
2452 * request. The reason for this is that autovac only processes one
2453 * database per invocation. Once it's finished cleaning up the oldest
2454 * database, it'll call here, and we'll signal the postmaster to start
2455 * another iteration immediately if there are still any old databases.
2456 */
2457 if ((MultiXactIdPrecedes(multiVacLimit, curMulti) ||
2458 needs_offset_vacuum) && IsUnderPostmaster)
2460
2461 /* Give an immediate warning if past the wrap warn point */
2462 if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2463 {
2464 char *oldest_datname;
2465
2466 /*
2467 * We can be called when not inside a transaction, for example during
2468 * StartupXLOG(). In such a case we cannot do database access, so we
2469 * must just report the oldest DB's OID.
2470 *
2471 * Note: it's also possible that get_database_name fails and returns
2472 * NULL, for example because the database just got dropped. We'll
2473 * still warn, even though the warning might now be unnecessary.
2474 */
2475 if (IsTransactionState())
2476 oldest_datname = get_database_name(oldest_datoid);
2477 else
2478 oldest_datname = NULL;
2479
2480 if (oldest_datname)
2482 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2483 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2484 multiWrapLimit - curMulti,
2485 oldest_datname,
2486 multiWrapLimit - curMulti),
2487 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2488 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2489 else
2491 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2492 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2493 multiWrapLimit - curMulti,
2494 oldest_datoid,
2495 multiWrapLimit - curMulti),
2496 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2497 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2498 }
2499}
2500
2501/*
2502 * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2503 * and similarly nextOffset is at least minMultiOffset.
2504 *
2505 * This is used when we can determine minimum safe values from an XLog
2506 * record (either an on-line checkpoint or an mxact creation log entry).
2507 * Although this is only called during XLog replay, we take the lock in case
2508 * any hot-standby backends are examining the values.
2509 */
2510void
2512 MultiXactOffset minMultiOffset)
2513{
2514 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2516 {
2517 debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
2518 MultiXactState->nextMXact = minMulti;
2519 }
2521 {
2522 debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
2523 minMultiOffset);
2524 MultiXactState->nextOffset = minMultiOffset;
2525 }
2526 LWLockRelease(MultiXactGenLock);
2527}
2528
2529/*
2530 * Update our oldestMultiXactId value, but only if it's more recent than what
2531 * we had.
2532 *
2533 * This may only be called during WAL replay.
2534 */
2535void
2536MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2537{
2539
2541 SetMultiXactIdLimit(oldestMulti, oldestMultiDB, false);
2542}
2543
2544/*
2545 * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2546 *
2547 * NB: this is called while holding MultiXactGenLock. We want it to be very
2548 * fast most of the time; even when it's not so fast, no actual I/O need
2549 * happen unless we're forced to write out a dirty log or xlog page to make
2550 * room in shared memory.
2551 */
2552static void
2554{
2555 int64 pageno;
2556 LWLock *lock;
2557
2558 /*
2559 * No work except at first MultiXactId of a page. But beware: just after
2560 * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2561 */
2562 if (MultiXactIdToOffsetEntry(multi) != 0 &&
2563 multi != FirstMultiXactId)
2564 return;
2565
2566 pageno = MultiXactIdToOffsetPage(multi);
2568
2570
2571 /* Zero the page and make an XLOG entry about it */
2572 ZeroMultiXactOffsetPage(pageno, true);
2573
2574 LWLockRelease(lock);
2575}
2576
2577/*
2578 * Make sure that MultiXactMember has room for the members of a newly-
2579 * allocated MultiXactId.
2580 *
2581 * Like the above routine, this is called while holding MultiXactGenLock;
2582 * same comments apply.
2583 */
2584static void
2586{
2587 /*
2588 * It's possible that the members span more than one page of the members
2589 * file, so we loop to ensure we consider each page. The coding is not
2590 * optimal if the members span several pages, but that seems unusual
2591 * enough to not worry much about.
2592 */
2593 while (nmembers > 0)
2594 {
2595 int flagsoff;
2596 int flagsbit;
2598
2599 /*
2600 * Only zero when at first entry of a page.
2601 */
2602 flagsoff = MXOffsetToFlagsOffset(offset);
2603 flagsbit = MXOffsetToFlagsBitShift(offset);
2604 if (flagsoff == 0 && flagsbit == 0)
2605 {
2606 int64 pageno;
2607 LWLock *lock;
2608
2609 pageno = MXOffsetToMemberPage(offset);
2611
2613
2614 /* Zero the page and make an XLOG entry about it */
2615 ZeroMultiXactMemberPage(pageno, true);
2616
2617 LWLockRelease(lock);
2618 }
2619
2620 /*
2621 * Compute the number of items till end of current page. Careful: if
2622 * addition of unsigned ints wraps around, we're at the last page of
2623 * the last segment; since that page holds a different number of items
2624 * than other pages, we need to do it differently.
2625 */
2626 if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
2627 {
2628 /*
2629 * This is the last page of the last segment; we can compute the
2630 * number of items left to allocate in it without modulo
2631 * arithmetic.
2632 */
2633 difference = MaxMultiXactOffset - offset + 1;
2634 }
2635 else
2637
2638 /*
2639 * Advance to next page, taking care to properly handle the wraparound
2640 * case. OK if nmembers goes negative.
2641 */
2642 nmembers -= difference;
2643 offset += difference;
2644 }
2645}
2646
2647/*
2648 * GetOldestMultiXactId
2649 *
2650 * Return the oldest MultiXactId that's still possibly still seen as live by
2651 * any running transaction. Older ones might still exist on disk, but they no
2652 * longer have any running member transaction.
2653 *
2654 * It's not safe to truncate MultiXact SLRU segments on the value returned by
2655 * this function; however, it can be set as the new relminmxid for any table
2656 * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2657 * to truncate SLRUs when no table can possibly still have a referencing MXID.
2658 */
2661{
2662 MultiXactId oldestMXact;
2663 MultiXactId nextMXact;
2664 int i;
2665
2666 /*
2667 * This is the oldest valid value among all the OldestMemberMXactId[] and
2668 * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2669 */
2670 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2671
2672 /*
2673 * We have to beware of the possibility that nextMXact is in the
2674 * wrapped-around state. We don't fix the counter itself here, but we
2675 * must be sure to use a valid value in our calculation.
2676 */
2677 nextMXact = MultiXactState->nextMXact;
2678 if (nextMXact < FirstMultiXactId)
2679 nextMXact = FirstMultiXactId;
2680
2681 oldestMXact = nextMXact;
2682 for (i = 0; i < MaxOldestSlot; i++)
2683 {
2684 MultiXactId thisoldest;
2685
2686 thisoldest = OldestMemberMXactId[i];
2687 if (MultiXactIdIsValid(thisoldest) &&
2688 MultiXactIdPrecedes(thisoldest, oldestMXact))
2689 oldestMXact = thisoldest;
2690 thisoldest = OldestVisibleMXactId[i];
2691 if (MultiXactIdIsValid(thisoldest) &&
2692 MultiXactIdPrecedes(thisoldest, oldestMXact))
2693 oldestMXact = thisoldest;
2694 }
2695
2696 LWLockRelease(MultiXactGenLock);
2697
2698 return oldestMXact;
2699}
2700
2701/*
2702 * Determine how aggressively we need to vacuum in order to prevent member
2703 * wraparound.
2704 *
2705 * To do so determine what's the oldest member offset and install the limit
2706 * info in MultiXactState, where it can be used to prevent overrun of old data
2707 * in the members SLRU area.
2708 *
2709 * The return value is true if emergency autovacuum is required and false
2710 * otherwise.
2711 */
2712static bool
2713SetOffsetVacuumLimit(bool is_startup)
2714{
2715 MultiXactId oldestMultiXactId;
2716 MultiXactId nextMXact;
2717 MultiXactOffset oldestOffset = 0; /* placate compiler */
2718 MultiXactOffset prevOldestOffset;
2719 MultiXactOffset nextOffset;
2720 bool oldestOffsetKnown = false;
2721 bool prevOldestOffsetKnown;
2722 MultiXactOffset offsetStopLimit = 0;
2723 MultiXactOffset prevOffsetStopLimit;
2724
2725 /*
2726 * NB: Have to prevent concurrent truncation, we might otherwise try to
2727 * lookup an oldestMulti that's concurrently getting truncated away.
2728 */
2729 LWLockAcquire(MultiXactTruncationLock, LW_SHARED);
2730
2731 /* Read relevant fields from shared memory. */
2732 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2733 oldestMultiXactId = MultiXactState->oldestMultiXactId;
2734 nextMXact = MultiXactState->nextMXact;
2735 nextOffset = MultiXactState->nextOffset;
2736 prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
2737 prevOldestOffset = MultiXactState->oldestOffset;
2738 prevOffsetStopLimit = MultiXactState->offsetStopLimit;
2740 LWLockRelease(MultiXactGenLock);
2741
2742 /*
2743 * Determine the offset of the oldest multixact. Normally, we can read
2744 * the offset from the multixact itself, but there's an important special
2745 * case: if there are no multixacts in existence at all, oldestMXact
2746 * obviously can't point to one. It will instead point to the multixact
2747 * ID that will be assigned the next time one is needed.
2748 */
2749 if (oldestMultiXactId == nextMXact)
2750 {
2751 /*
2752 * When the next multixact gets created, it will be stored at the next
2753 * offset.
2754 */
2755 oldestOffset = nextOffset;
2756 oldestOffsetKnown = true;
2757 }
2758 else
2759 {
2760 /*
2761 * Figure out where the oldest existing multixact's offsets are
2762 * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X,
2763 * the supposedly-earliest multixact might not really exist. We are
2764 * careful not to fail in that case.
2765 */
2766 oldestOffsetKnown =
2767 find_multixact_start(oldestMultiXactId, &oldestOffset);
2768
2769 if (oldestOffsetKnown)
2771 (errmsg_internal("oldest MultiXactId member is at offset %u",
2772 oldestOffset)));
2773 else
2774 ereport(LOG,
2775 (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
2776 oldestMultiXactId)));
2777 }
2778
2779 LWLockRelease(MultiXactTruncationLock);
2780
2781 /*
2782 * If we can, compute limits (and install them MultiXactState) to prevent
2783 * overrun of old data in the members SLRU area. We can only do so if the
2784 * oldest offset is known though.
2785 */
2786 if (oldestOffsetKnown)
2787 {
2788 /* move back to start of the corresponding segment */
2789 offsetStopLimit = oldestOffset - (oldestOffset %
2791
2792 /* always leave one segment before the wraparound point */
2794
2795 if (!prevOldestOffsetKnown && !is_startup)
2796 ereport(LOG,
2797 (errmsg("MultiXact member wraparound protections are now enabled")));
2798
2800 (errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u",
2801 offsetStopLimit, oldestMultiXactId)));
2802 }
2803 else if (prevOldestOffsetKnown)
2804 {
2805 /*
2806 * If we failed to get the oldest offset this time, but we have a
2807 * value from a previous pass through this function, use the old
2808 * values rather than automatically forcing an emergency autovacuum
2809 * cycle again.
2810 */
2811 oldestOffset = prevOldestOffset;
2812 oldestOffsetKnown = true;
2813 offsetStopLimit = prevOffsetStopLimit;
2814 }
2815
2816 /* Install the computed values */
2817 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2818 MultiXactState->oldestOffset = oldestOffset;
2819 MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
2820 MultiXactState->offsetStopLimit = offsetStopLimit;
2821 LWLockRelease(MultiXactGenLock);
2822
2823 /*
2824 * Do we need an emergency autovacuum? If we're not sure, assume yes.
2825 */
2826 return !oldestOffsetKnown ||
2827 (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
2828}
2829
2830/*
2831 * Return whether adding "distance" to "start" would move past "boundary".
2832 *
2833 * We use this to determine whether the addition is "wrapping around" the
2834 * boundary point, hence the name. The reason we don't want to use the regular
2835 * 2^31-modulo arithmetic here is that we want to be able to use the whole of
2836 * the 2^32-1 space here, allowing for more multixacts than would fit
2837 * otherwise.
2838 */
2839static bool
2841 uint32 distance)
2842{
2843 MultiXactOffset finish;
2844
2845 /*
2846 * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
2847 * if the addition wraps around the UINT_MAX boundary, skip that value.
2848 */
2849 finish = start + distance;
2850 if (finish < start)
2851 finish++;
2852
2853 /*-----------------------------------------------------------------------
2854 * When the boundary is numerically greater than the starting point, any
2855 * value numerically between the two is not wrapped:
2856 *
2857 * <----S----B---->
2858 * [---) = F wrapped past B (and UINT_MAX)
2859 * [---) = F not wrapped
2860 * [----] = F wrapped past B
2861 *
2862 * When the boundary is numerically less than the starting point (i.e. the
2863 * UINT_MAX wraparound occurs somewhere in between) then all values in
2864 * between are wrapped:
2865 *
2866 * <----B----S---->
2867 * [---) = F not wrapped past B (but wrapped past UINT_MAX)
2868 * [---) = F wrapped past B (and UINT_MAX)
2869 * [----] = F not wrapped
2870 *-----------------------------------------------------------------------
2871 */
2872 if (start < boundary)
2873 return finish >= boundary || finish < start;
2874 else
2875 return finish >= boundary && finish < start;
2876}
2877
2878/*
2879 * Find the starting offset of the given MultiXactId.
2880 *
2881 * Returns false if the file containing the multi does not exist on disk.
2882 * Otherwise, returns true and sets *result to the starting member offset.
2883 *
2884 * This function does not prevent concurrent truncation, so if that's
2885 * required, the caller has to protect against that.
2886 */
2887static bool
2889{
2890 MultiXactOffset offset;
2891 int64 pageno;
2892 int entryno;
2893 int slotno;
2894 MultiXactOffset *offptr;
2895
2897
2898 pageno = MultiXactIdToOffsetPage(multi);
2899 entryno = MultiXactIdToOffsetEntry(multi);
2900
2901 /*
2902 * Write out dirty data, so PhysicalPageExists can work correctly.
2903 */
2906
2908 return false;
2909
2910 /* lock is acquired by SimpleLruReadPage_ReadOnly */
2911 slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
2912 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2913 offptr += entryno;
2914 offset = *offptr;
2916
2917 *result = offset;
2918 return true;
2919}
2920
2921/*
2922 * Determine how many multixacts, and how many multixact members, currently
2923 * exist. Return false if unable to determine.
2924 */
2925static bool
2927{
2928 MultiXactOffset nextOffset;
2929 MultiXactOffset oldestOffset;
2930 MultiXactId oldestMultiXactId;
2931 MultiXactId nextMultiXactId;
2932 bool oldestOffsetKnown;
2933
2934 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2935 nextOffset = MultiXactState->nextOffset;
2936 oldestMultiXactId = MultiXactState->oldestMultiXactId;
2937 nextMultiXactId = MultiXactState->nextMXact;
2938 oldestOffset = MultiXactState->oldestOffset;
2939 oldestOffsetKnown = MultiXactState->oldestOffsetKnown;
2940 LWLockRelease(MultiXactGenLock);
2941
2942 if (!oldestOffsetKnown)
2943 return false;
2944
2945 *members = nextOffset - oldestOffset;
2946 *multixacts = nextMultiXactId - oldestMultiXactId;
2947 return true;
2948}
2949
2950/*
2951 * Multixact members can be removed once the multixacts that refer to them
2952 * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2953 * vacuum_multixact_freeze_table_age work together to make sure we never have
2954 * too many multixacts; we hope that, at least under normal circumstances,
2955 * this will also be sufficient to keep us from using too many offsets.
2956 * However, if the average multixact has many members, we might exhaust the
2957 * members space while still using few enough members that these limits fail
2958 * to trigger relminmxid advancement by VACUUM. At that point, we'd have no
2959 * choice but to start failing multixact-creating operations with an error.
2960 *
2961 * To prevent that, if more than a threshold portion of the members space is
2962 * used, we effectively reduce autovacuum_multixact_freeze_max_age and
2963 * to a value just less than the number of multixacts in use. We hope that
2964 * this will quickly trigger autovacuuming on the table or tables with the
2965 * oldest relminmxid, thus allowing datminmxid values to advance and removing
2966 * some members.
2967 *
2968 * As the fraction of the member space currently in use grows, we become
2969 * more aggressive in clamping this value. That not only causes autovacuum
2970 * to ramp up, but also makes any manual vacuums the user issues more
2971 * aggressive. This happens because vacuum_get_cutoffs() will clamp the
2972 * freeze table and the minimum freeze age cutoffs based on the effective
2973 * autovacuum_multixact_freeze_max_age this function returns. In the worst
2974 * case, we'll claim the freeze_max_age to zero, and every vacuum of any
2975 * table will freeze every multixact.
2976 */
2977int
2979{
2980 MultiXactOffset members;
2981 uint32 multixacts;
2982 uint32 victim_multixacts;
2983 double fraction;
2984 int result;
2985
2986 /* If we can't determine member space utilization, assume the worst. */
2987 if (!ReadMultiXactCounts(&multixacts, &members))
2988 return 0;
2989
2990 /* If member space utilization is low, no special action is required. */
2991 if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
2993
2994 /*
2995 * Compute a target for relminmxid advancement. The number of multixacts
2996 * we try to eliminate from the system is based on how far we are past
2997 * MULTIXACT_MEMBER_SAFE_THRESHOLD.
2998 */
2999 fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
3001 victim_multixacts = multixacts * fraction;
3002
3003 /* fraction could be > 1.0, but lowest possible freeze age is zero */
3004 if (victim_multixacts > multixacts)
3005 return 0;
3006 result = multixacts - victim_multixacts;
3007
3008 /*
3009 * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
3010 * autovacuum less aggressive than it would otherwise be.
3011 */
3013}
3014
3015typedef struct mxtruncinfo
3016{
3019
3020/*
3021 * SlruScanDirectory callback
3022 * This callback determines the earliest existing page number.
3023 */
3024static bool
3026{
3027 mxtruncinfo *trunc = (mxtruncinfo *) data;
3028
3029 if (trunc->earliestExistingPage == -1 ||
3030 ctl->PagePrecedes(segpage, trunc->earliestExistingPage))
3031 {
3032 trunc->earliestExistingPage = segpage;
3033 }
3034
3035 return false; /* keep going */
3036}
3037
3038
3039/*
3040 * Delete members segments [oldest, newOldest)
3041 *
3042 * The members SLRU can, in contrast to the offsets one, be filled to almost
3043 * the full range at once. This means SimpleLruTruncate() can't trivially be
3044 * used - instead the to-be-deleted range is computed using the offsets
3045 * SLRU. C.f. TruncateMultiXact().
3046 */
3047static void
3049{
3051 int64 startsegment = MXOffsetToMemberSegment(oldestOffset);
3052 int64 endsegment = MXOffsetToMemberSegment(newOldestOffset);
3053 int64 segment = startsegment;
3054
3055 /*
3056 * Delete all the segments but the last one. The last segment can still
3057 * contain, possibly partially, valid data.
3058 */
3059 while (segment != endsegment)
3060 {
3061 elog(DEBUG2, "truncating multixact members segment %llx",
3062 (unsigned long long) segment);
3064
3065 /* move to next segment, handling wraparound correctly */
3066 if (segment == maxsegment)
3067 segment = 0;
3068 else
3069 segment += 1;
3070 }
3071}
3072
3073/*
3074 * Delete offsets segments [oldest, newOldest)
3075 */
3076static void
3078{
3079 /*
3080 * We step back one multixact to avoid passing a cutoff page that hasn't
3081 * been created yet in the rare case that oldestMulti would be the first
3082 * item on a page and oldestMulti == nextMulti. In that case, if we
3083 * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
3084 * detection.
3085 */
3088}
3089
3090/*
3091 * Remove all MultiXactOffset and MultiXactMember segments before the oldest
3092 * ones still of interest.
3093 *
3094 * This is only called on a primary as part of vacuum (via
3095 * vac_truncate_clog()). During recovery truncation is done by replaying
3096 * truncation WAL records logged here.
3097 *
3098 * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
3099 * is one of the databases preventing newOldestMulti from increasing.
3100 */
3101void
3102TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
3103{
3104 MultiXactId oldestMulti;
3105 MultiXactId nextMulti;
3106 MultiXactOffset newOldestOffset;
3107 MultiXactOffset oldestOffset;
3108 MultiXactOffset nextOffset;
3109 mxtruncinfo trunc;
3110 MultiXactId earliest;
3111
3114
3115 /*
3116 * We can only allow one truncation to happen at once. Otherwise parts of
3117 * members might vanish while we're doing lookups or similar. There's no
3118 * need to have an interlock with creating new multis or such, since those
3119 * are constrained by the limits (which only grow, never shrink).
3120 */
3121 LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
3122
3123 LWLockAcquire(MultiXactGenLock, LW_SHARED);
3124 nextMulti = MultiXactState->nextMXact;
3125 nextOffset = MultiXactState->nextOffset;
3126 oldestMulti = MultiXactState->oldestMultiXactId;
3127 LWLockRelease(MultiXactGenLock);
3128 Assert(MultiXactIdIsValid(oldestMulti));
3129
3130 /*
3131 * Make sure to only attempt truncation if there's values to truncate
3132 * away. In normal processing values shouldn't go backwards, but there's
3133 * some corner cases (due to bugs) where that's possible.
3134 */
3135 if (MultiXactIdPrecedesOrEquals(newOldestMulti, oldestMulti))
3136 {
3137 LWLockRelease(MultiXactTruncationLock);
3138 return;
3139 }
3140
3141 /*
3142 * Note we can't just plow ahead with the truncation; it's possible that
3143 * there are no segments to truncate, which is a problem because we are
3144 * going to attempt to read the offsets page to determine where to
3145 * truncate the members SLRU. So we first scan the directory to determine
3146 * the earliest offsets page number that we can read without error.
3147 *
3148 * When nextMXact is less than one segment away from multiWrapLimit,
3149 * SlruScanDirCbFindEarliest can find some early segment other than the
3150 * actual earliest. (MultiXactOffsetPagePrecedes(EARLIEST, LATEST)
3151 * returns false, because not all pairs of entries have the same answer.)
3152 * That can also arise when an earlier truncation attempt failed unlink()
3153 * or returned early from this function. The only consequence is
3154 * returning early, which wastes space that we could have liberated.
3155 *
3156 * NB: It's also possible that the page that oldestMulti is on has already
3157 * been truncated away, and we crashed before updating oldestMulti.
3158 */
3159 trunc.earliestExistingPage = -1;
3162 if (earliest < FirstMultiXactId)
3163 earliest = FirstMultiXactId;
3164
3165 /* If there's nothing to remove, we can bail out early. */
3166 if (MultiXactIdPrecedes(oldestMulti, earliest))
3167 {
3168 LWLockRelease(MultiXactTruncationLock);
3169 return;
3170 }
3171
3172 /*
3173 * First, compute the safe truncation point for MultiXactMember. This is
3174 * the starting offset of the oldest multixact.
3175 *
3176 * Hopefully, find_multixact_start will always work here, because we've
3177 * already checked that it doesn't precede the earliest MultiXact on disk.
3178 * But if it fails, don't truncate anything, and log a message.
3179 */
3180 if (oldestMulti == nextMulti)
3181 {
3182 /* there are NO MultiXacts */
3183 oldestOffset = nextOffset;
3184 }
3185 else if (!find_multixact_start(oldestMulti, &oldestOffset))
3186 {
3187 ereport(LOG,
3188 (errmsg("oldest MultiXact %u not found, earliest MultiXact %u, skipping truncation",
3189 oldestMulti, earliest)));
3190 LWLockRelease(MultiXactTruncationLock);
3191 return;
3192 }
3193
3194 /*
3195 * Secondly compute up to where to truncate. Lookup the corresponding
3196 * member offset for newOldestMulti for that.
3197 */
3198 if (newOldestMulti == nextMulti)
3199 {
3200 /* there are NO MultiXacts */
3201 newOldestOffset = nextOffset;
3202 }
3203 else if (!find_multixact_start(newOldestMulti, &newOldestOffset))
3204 {
3205 ereport(LOG,
3206 (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
3207 newOldestMulti)));
3208 LWLockRelease(MultiXactTruncationLock);
3209 return;
3210 }
3211
3212 elog(DEBUG1, "performing multixact truncation: "
3213 "offsets [%u, %u), offsets segments [%llx, %llx), "
3214 "members [%u, %u), members segments [%llx, %llx)",
3215 oldestMulti, newOldestMulti,
3216 (unsigned long long) MultiXactIdToOffsetSegment(oldestMulti),
3217 (unsigned long long) MultiXactIdToOffsetSegment(newOldestMulti),
3218 oldestOffset, newOldestOffset,
3219 (unsigned long long) MXOffsetToMemberSegment(oldestOffset),
3220 (unsigned long long) MXOffsetToMemberSegment(newOldestOffset));
3221
3222 /*
3223 * Do truncation, and the WAL logging of the truncation, in a critical
3224 * section. That way offsets/members cannot get out of sync anymore, i.e.
3225 * once consistent the newOldestMulti will always exist in members, even
3226 * if we crashed in the wrong moment.
3227 */
3229
3230 /*
3231 * Prevent checkpoints from being scheduled concurrently. This is critical
3232 * because otherwise a truncation record might not be replayed after a
3233 * crash/basebackup, even though the state of the data directory would
3234 * require it.
3235 */
3238
3239 /* WAL log truncation */
3240 WriteMTruncateXlogRec(newOldestMultiDB,
3241 oldestMulti, newOldestMulti,
3242 oldestOffset, newOldestOffset);
3243
3244 /*
3245 * Update in-memory limits before performing the truncation, while inside
3246 * the critical section: Have to do it before truncation, to prevent
3247 * concurrent lookups of those values. Has to be inside the critical
3248 * section as otherwise a future call to this function would error out,
3249 * while looking up the oldest member in offsets, if our caller crashes
3250 * before updating the limits.
3251 */
3252 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
3253 MultiXactState->oldestMultiXactId = newOldestMulti;
3254 MultiXactState->oldestMultiXactDB = newOldestMultiDB;
3255 LWLockRelease(MultiXactGenLock);
3256
3257 /* First truncate members */
3258 PerformMembersTruncation(oldestOffset, newOldestOffset);
3259
3260 /* Then offsets */
3261 PerformOffsetsTruncation(oldestMulti, newOldestMulti);
3262
3263 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
3264
3266 LWLockRelease(MultiXactTruncationLock);
3267}
3268
3269/*
3270 * Decide whether a MultiXactOffset page number is "older" for truncation
3271 * purposes. Analogous to CLOGPagePrecedes().
3272 *
3273 * Offsetting the values is optional, because MultiXactIdPrecedes() has
3274 * translational symmetry.
3275 */
3276static bool
3278{
3279 MultiXactId multi1;
3280 MultiXactId multi2;
3281
3282 multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE;
3283 multi1 += FirstMultiXactId + 1;
3284 multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE;
3285 multi2 += FirstMultiXactId + 1;
3286
3287 return (MultiXactIdPrecedes(multi1, multi2) &&
3288 MultiXactIdPrecedes(multi1,
3289 multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));
3290}
3291
3292/*
3293 * Decide whether a MultiXactMember page number is "older" for truncation
3294 * purposes. There is no "invalid offset number" so use the numbers verbatim.
3295 */
3296static bool
3298{
3299 MultiXactOffset offset1;
3300 MultiXactOffset offset2;
3301
3302 offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
3303 offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
3304
3305 return (MultiXactOffsetPrecedes(offset1, offset2) &&
3307 offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1));
3308}
3309
3310/*
3311 * Decide which of two MultiXactIds is earlier.
3312 *
3313 * XXX do we need to do something special for InvalidMultiXactId?
3314 * (Doesn't look like it.)
3315 */
3316bool
3318{
3319 int32 diff = (int32) (multi1 - multi2);
3320
3321 return (diff < 0);
3322}
3323
3324/*
3325 * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
3326 *
3327 * XXX do we need to do something special for InvalidMultiXactId?
3328 * (Doesn't look like it.)
3329 */
3330bool
3332{
3333 int32 diff = (int32) (multi1 - multi2);
3334
3335 return (diff <= 0);
3336}
3337
3338
3339/*
3340 * Decide which of two offsets is earlier.
3341 */
3342static bool
3344{
3345 int32 diff = (int32) (offset1 - offset2);
3346
3347 return (diff < 0);
3348}
3349
3350/*
3351 * Write an xlog record reflecting the zeroing of either a MEMBERs or
3352 * OFFSETs page (info shows which)
3353 */
3354static void
3356{
3358 XLogRegisterData((char *) (&pageno), sizeof(pageno));
3359 (void) XLogInsert(RM_MULTIXACT_ID, info);
3360}
3361
3362/*
3363 * Write a TRUNCATE xlog record
3364 *
3365 * We must flush the xlog record to disk before returning --- see notes in
3366 * TruncateCLOG().
3367 */
3368static void
3370 MultiXactId startTruncOff, MultiXactId endTruncOff,
3371 MultiXactOffset startTruncMemb, MultiXactOffset endTruncMemb)
3372{
3373 XLogRecPtr recptr;
3375
3376 xlrec.oldestMultiDB = oldestMultiDB;
3377
3378 xlrec.startTruncOff = startTruncOff;
3379 xlrec.endTruncOff = endTruncOff;
3380
3381 xlrec.startTruncMemb = startTruncMemb;
3382 xlrec.endTruncMemb = endTruncMemb;
3383
3385 XLogRegisterData((char *) (&xlrec), SizeOfMultiXactTruncate);
3386 recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID);
3387 XLogFlush(recptr);
3388}
3389
3390/*
3391 * MULTIXACT resource manager's routines
3392 */
3393void
3395{
3396 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
3397
3398 /* Backup blocks are not used in multixact records */
3400
3401 if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
3402 {
3403 int64 pageno;
3404 int slotno;
3405 LWLock *lock;
3406
3407 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
3408
3411
3412 slotno = ZeroMultiXactOffsetPage(pageno, false);
3414 Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
3415
3416 LWLockRelease(lock);
3417 }
3418 else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
3419 {
3420 int64 pageno;
3421 int slotno;
3422 LWLock *lock;
3423
3424 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
3425
3428
3429 slotno = ZeroMultiXactMemberPage(pageno, false);
3431 Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
3432
3433 LWLockRelease(lock);
3434 }
3435 else if (info == XLOG_MULTIXACT_CREATE_ID)
3436 {
3437 xl_multixact_create *xlrec =
3439 TransactionId max_xid;
3440 int i;
3441
3442 /* Store the data back into the SLRU files */
3443 RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
3444 xlrec->members);
3445
3446 /* Make sure nextMXact/nextOffset are beyond what this record has */
3447 MultiXactAdvanceNextMXact(xlrec->mid + 1,
3448 xlrec->moff + xlrec->nmembers);
3449
3450 /*
3451 * Make sure nextXid is beyond any XID mentioned in the record. This
3452 * should be unnecessary, since any XID found here ought to have other
3453 * evidence in the XLOG, but let's be safe.
3454 */
3455 max_xid = XLogRecGetXid(record);
3456 for (i = 0; i < xlrec->nmembers; i++)
3457 {
3458 if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
3459 max_xid = xlrec->members[i].xid;
3460 }
3461
3463 }
3464 else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
3465 {
3467 int64 pageno;
3468
3469 memcpy(&xlrec, XLogRecGetData(record),
3471
3472 elog(DEBUG1, "replaying multixact truncation: "
3473 "offsets [%u, %u), offsets segments [%llx, %llx), "
3474 "members [%u, %u), members segments [%llx, %llx)",
3475 xlrec.startTruncOff, xlrec.endTruncOff,
3476 (unsigned long long) MultiXactIdToOffsetSegment(xlrec.startTruncOff),
3477 (unsigned long long) MultiXactIdToOffsetSegment(xlrec.endTruncOff),
3478 xlrec.startTruncMemb, xlrec.endTruncMemb,
3479 (unsigned long long) MXOffsetToMemberSegment(xlrec.startTruncMemb),
3480 (unsigned long long) MXOffsetToMemberSegment(xlrec.endTruncMemb));
3481
3482 /* should not be required, but more than cheap enough */
3483 LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
3484
3485 /*
3486 * Advance the horizon values, so they're current at the end of
3487 * recovery.
3488 */
3489 SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB, false);
3490
3492
3493 /*
3494 * During XLOG replay, latest_page_number isn't necessarily set up
3495 * yet; insert a suitable value to bypass the sanity test in
3496 * SimpleLruTruncate.
3497 */
3498 pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
3499 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
3500 pageno);
3502
3503 LWLockRelease(MultiXactTruncationLock);
3504 }
3505 else
3506 elog(PANIC, "multixact_redo: unknown op code %u", info);
3507}
3508
3509Datum
3511{
3512 typedef struct
3513 {
3514 MultiXactMember *members;
3515 int nmembers;
3516 int iter;
3517 } mxact;
3519 mxact *multi;
3520 FuncCallContext *funccxt;
3521
3522 if (mxid < FirstMultiXactId)
3523 ereport(ERROR,
3524 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3525 errmsg("invalid MultiXactId: %u", mxid)));
3526
3527 if (SRF_IS_FIRSTCALL())
3528 {
3529 MemoryContext oldcxt;
3530 TupleDesc tupdesc;
3531
3532 funccxt = SRF_FIRSTCALL_INIT();
3534
3535 multi = palloc(sizeof(mxact));
3536 /* no need to allow for old values here */
3537 multi->nmembers = GetMultiXactIdMembers(mxid, &multi->members, false,
3538 false);
3539 multi->iter = 0;
3540
3541 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
3542 elog(ERROR, "return type must be a row type");
3543 funccxt->tuple_desc = tupdesc;
3544 funccxt->attinmeta = TupleDescGetAttInMetadata(tupdesc);
3545 funccxt->user_fctx = multi;
3546
3547 MemoryContextSwitchTo(oldcxt);
3548 }
3549
3550 funccxt = SRF_PERCALL_SETUP();
3551 multi = (mxact *) funccxt->user_fctx;
3552
3553 while (multi->iter < multi->nmembers)
3554 {
3555 HeapTuple tuple;
3556 char *values[2];
3557
3558 values[0] = psprintf("%u", multi->members[multi->iter].xid);
3559 values[1] = mxstatus_to_string(multi->members[multi->iter].status);
3560
3561 tuple = BuildTupleFromCStrings(funccxt->attinmeta, values);
3562
3563 multi->iter++;
3564 pfree(values[0]);
3565 SRF_RETURN_NEXT(funccxt, HeapTupleGetDatum(tuple));
3566 }
3567
3568 SRF_RETURN_DONE(funccxt);
3569}
3570
3571/*
3572 * Entrypoint for sync.c to sync offsets files.
3573 */
3574int
3575multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
3576{
3577 return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
3578}
3579
3580/*
3581 * Entrypoint for sync.c to sync members files.
3582 */
3583int
3584multixactmemberssyncfiletag(const FileTag *ftag, char *path)
3585{
3586 return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
3587}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
int autovacuum_multixact_freeze_max_age
Definition: autovacuum.c:130
static int32 next
Definition: blutils.c:221
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define Min(x, y)
Definition: c.h:961
uint8_t uint8
Definition: c.h:486
#define Assert(condition)
Definition: c.h:815
int64_t int64
Definition: c.h:485
uint32 MultiXactOffset
Definition: c.h:621
TransactionId MultiXactId
Definition: c.h:619
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:420
int32_t int32
Definition: c.h:484
uint16_t uint16
Definition: c.h:487
uint32_t uint32
Definition: c.h:488
#define MemSet(start, val, len)
Definition: c.h:977
uint32 TransactionId
Definition: c.h:609
size_t Size
Definition: c.h:562
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariableInit(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3187
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1180
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errdetail_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1295
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define PANIC
Definition: elog.h:42
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
Definition: execTuples.c:2322
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2273
#define PG_GETARG_TRANSACTIONID(n)
Definition: fmgr.h:279
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
Datum difference(PG_FUNCTION_ARGS)
int multixact_offset_buffers
Definition: globals.c:162
bool IsBinaryUpgrade
Definition: globals.c:120
ProcNumber MyProcNumber
Definition: globals.c:89
bool IsUnderPostmaster
Definition: globals.c:119
int multixact_member_buffers
Definition: globals.c:161
#define newval
GucSource
Definition: guc.h:112
return str start
const char * str
#define dclist_container(type, membername, ptr)
Definition: ilist.h:947
static uint32 dclist_count(const dclist_head *head)
Definition: ilist.h:932
static void dclist_move_head(dclist_head *head, dlist_node *node)
Definition: ilist.h:808
static dlist_node * dclist_tail_node(dclist_head *head)
Definition: ilist.h:920
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition: ilist.h:763
#define DCLIST_STATIC_INIT(name)
Definition: ilist.h:282
static void dclist_push_head(dclist_head *head, dlist_node *node)
Definition: ilist.h:693
static void dclist_init(dclist_head *head)
Definition: ilist.h:671
#define dclist_foreach(iter, lhead)
Definition: ilist.h:970
#define INJECTION_POINT(name)
#define INJECTION_POINT_CACHED(name)
#define INJECTION_POINT_LOAD(name)
int j
Definition: isn.c:73
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1168
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1781
@ LWTRANCHE_MULTIXACTOFFSET_SLRU
Definition: lwlock.h:212
@ LWTRANCHE_MULTIXACTMEMBER_SLRU
Definition: lwlock.h:211
@ LWTRANCHE_MULTIXACTMEMBER_BUFFER
Definition: lwlock.h:183
@ LWTRANCHE_MULTIXACTOFFSET_BUFFER
Definition: lwlock.h:182
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1683
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
MemoryContext TopTransactionContext
Definition: mcxt.c:154
void pfree(void *pointer)
Definition: mcxt.c:1521
MemoryContext TopMemoryContext
Definition: mcxt.c:149
void * palloc(Size size)
Definition: mcxt.c:1317
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:170
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId startTruncOff, MultiXactId endTruncOff, MultiXactOffset startTruncMemb, MultiXactOffset endTruncMemb)
Definition: multixact.c:3369
static MultiXactId PreviousMultiXactId(MultiXactId multi)
Definition: multixact.c:221
static SlruCtlData MultiXactOffsetCtlData
Definition: multixact.c:229
void MultiXactShmemInit(void)
Definition: multixact.c:1964
#define MULTIXACT_MEMBER_SAFE_THRESHOLD
Definition: multixact.c:216
static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2)
Definition: multixact.c:3297
static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
Definition: multixact.c:1032
static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
Definition: multixact.c:1662
MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
Definition: multixact.c:487
static int ZeroMultiXactMemberPage(int64 pageno, bool writeXlog)
Definition: multixact.c:2090
static int64 MXOffsetToMemberPage(MultiXactOffset offset)
Definition: multixact.c:173
#define MXACT_MEMBER_BITS_PER_XACT
Definition: multixact.c:143
static int64 MultiXactIdToOffsetSegment(MultiXactId multi)
Definition: multixact.c:125
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
Definition: multixact.c:2585
void ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
Definition: multixact.c:791
static void PerformOffsetsTruncation(MultiXactId oldestMulti, MultiXactId newOldestMulti)
Definition: multixact.c:3077
#define MXACT_MEMBER_XACT_BITMASK
Definition: multixact.c:145
#define MULTIXACT_FLAGBYTES_PER_GROUP
Definition: multixact.c:148
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3317
void multixact_redo(XLogReaderState *record)
Definition: multixact.c:3394
#define MULTIXACT_OFFSETS_PER_PAGE
Definition: multixact.c:110
#define debug_elog5(a, b, c, d, e)
Definition: multixact.c:384
static void MultiXactIdSetOldestVisible(void)
Definition: multixact.c:730
int multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
Definition: multixact.c:3575
void multixact_twophase_postcommit(TransactionId xid, uint16 info, void *recdata, uint32 len)
Definition: multixact.c:1920
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result)
Definition: multixact.c:2888
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2328
void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len)
Definition: multixact.c:1899
#define MultiXactMemberCtl
Definition: multixact.c:233
static bool SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: multixact.c:3025
void AtPrepare_MultiXact(void)
Definition: multixact.c:1836
static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, uint32 distance)
Definition: multixact.c:2840
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3331
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2536
static int MultiXactIdToOffsetEntry(MultiXactId multi)
Definition: multixact.c:119
static void mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition: multixact.c:1709
static void MaybeExtendOffsetSlru(void)
Definition: multixact.c:2118
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:599
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:673
static void PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
Definition: multixact.c:3048
static MemoryContext MXactContext
Definition: multixact.c:372
#define SHARED_MULTIXACT_STATE_SIZE
static MultiXactId * OldestVisibleMXactId
Definition: multixact.c:342
struct mxtruncinfo mxtruncinfo
static int mxactMemberComparator(const void *arg1, const void *arg2)
Definition: multixact.c:1589
struct MultiXactStateData MultiXactStateData
static void ExtendMultiXactOffset(MultiXactId multi)
Definition: multixact.c:2553
void PostPrepare_MultiXact(TransactionId xid)
Definition: multixact.c:1850
Size MultiXactShmemSize(void)
Definition: multixact.c:1947
#define MULTIXACT_MEMBERGROUPS_PER_PAGE
Definition: multixact.c:154
#define MultiXactOffsetCtl
Definition: multixact.c:232
void multixact_twophase_postabort(TransactionId xid, uint16 info, void *recdata, uint32 len)
Definition: multixact.c:1935
static int MXOffsetToMemberOffset(MultiXactOffset offset)
Definition: multixact.c:206
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2282
static void WriteMZeroPageXlogRec(int64 pageno, uint8 info)
Definition: multixact.c:3355
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2362
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members)
Definition: multixact.c:916
int multixactmemberssyncfiletag(const FileTag *ftag, char *path)
Definition: multixact.c:3584
#define MAX_CACHE_ENTRIES
Definition: multixact.c:370
static int64 MultiXactIdToOffsetPage(MultiXactId multi)
Definition: multixact.c:113
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2660
void CheckPointMultiXact(void)
Definition: multixact.c:2304
#define MaxOldestSlot
Definition: multixact.c:337
MultiXactId MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
Definition: multixact.c:815
static bool ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
Definition: multixact.c:2926
struct mXactCacheEnt mXactCacheEnt
static int64 MXOffsetToMemberSegment(MultiXactOffset offset)
Definition: multixact.c:179
static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members)
Definition: multixact.c:1619
static dclist_head MXactCache
Definition: multixact.c:371
void TrimMultiXact(void)
Definition: multixact.c:2178
#define debug_elog3(a, b, c)
Definition: multixact.c:382
char * mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition: multixact.c:1777
#define MULTIXACT_MEMBERGROUP_SIZE
Definition: multixact.c:152
#define debug_elog4(a, b, c, d)
Definition: multixact.c:383
static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
Definition: multixact.c:3277
static bool SetOffsetVacuumLimit(bool is_startup)
Definition: multixact.c:2713
static int MXOffsetToFlagsOffset(MultiXactOffset offset)
Definition: multixact.c:186
int MultiXactMemberFreezeThreshold(void)
Definition: multixact.c:2978
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2511
static MultiXactId * OldestMemberMXactId
Definition: multixact.c:341
#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE
Definition: multixact.c:168
static MultiXactStateData * MultiXactState
Definition: multixact.c:340
static int ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog)
Definition: multixact.c:2074
#define MULTIXACT_MEMBERS_PER_MEMBERGROUP
Definition: multixact.c:149
static char * mxstatus_to_string(MultiXactStatus status)
Definition: multixact.c:1754
#define OFFSET_WARN_SEGMENTS
Datum pg_get_multixact_members(PG_FUNCTION_ARGS)
Definition: multixact.c:3510
MultiXactId ReadNextMultiXactId(void)
Definition: multixact.c:771
void BootStrapMultiXact(void)
Definition: multixact.c:2034
#define debug_elog6(a, b, c, d, e, f)
Definition: multixact.c:385
#define MULTIXACT_MEMBERS_PER_PAGE
Definition: multixact.c:155
MultiXactId MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, TransactionId xid2, MultiXactStatus status2)
Definition: multixact.c:434
void TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
Definition: multixact.c:3102
#define MULTIXACT_MEMBER_DANGER_THRESHOLD
Definition: multixact.c:217
static int MXOffsetToFlagsBitShift(MultiXactOffset offset)
Definition: multixact.c:196
bool check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
Definition: multixact.c:2014
static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
Definition: multixact.c:3343
bool check_multixact_member_buffers(int *newval, void **extra, GucSource source)
Definition: multixact.c:2023
void AtEOXact_MultiXact(void)
Definition: multixact.c:1808
static SlruCtlData MultiXactMemberCtlData
Definition: multixact.c:230
#define debug_elog2(a, b)
Definition: multixact.c:381
void StartupMultiXact(void)
Definition: multixact.c:2153
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1299
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
#define XLOG_MULTIXACT_ZERO_MEM_PAGE
Definition: multixact.h:69
#define XLOG_MULTIXACT_ZERO_OFF_PAGE
Definition: multixact.h:68
#define FirstMultiXactId
Definition: multixact.h:25
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusForShare
Definition: multixact.h:40
@ MultiXactStatusForNoKeyUpdate
Definition: multixact.h:41
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
@ MultiXactStatusUpdate
Definition: multixact.h:46
@ MultiXactStatusForUpdate
Definition: multixact.h:42
@ MultiXactStatusForKeyShare
Definition: multixact.h:39
#define ISUPDATE_from_mxstatus(status)
Definition: multixact.h:52
#define InvalidMultiXactId
Definition: multixact.h:24
#define XLOG_MULTIXACT_TRUNCATE_ID
Definition: multixact.h:71
#define SizeOfMultiXactCreate
Definition: multixact.h:81
#define SizeOfMultiXactTruncate
Definition: multixact.h:96
#define XLOG_MULTIXACT_CREATE_ID
Definition: multixact.h:70
#define MaxMultiXactOffset
Definition: multixact.h:30
#define MaxMultiXactId
Definition: multixact.h:26
struct MultiXactMember MultiXactMember
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
const void size_t len
const void * data
while(p+4<=pend)
static char * filename
Definition: pg_dumpall.c:119
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:72
void SendPostmasterSignal(PMSignalReason reason)
Definition: pmsignal.c:165
@ PMSIGNAL_START_AUTOVAC_LAUNCHER
Definition: pmsignal.h:38
#define qsort(a, b, c, d)
Definition: port.h:475
uintptr_t Datum
Definition: postgres.h:69
unsigned int Oid
Definition: postgres_ext.h:32
#define DELAY_CHKPT_START
Definition: proc.h:119
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1402
int ProcNumber
Definition: procnumber.h:24
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
tree ctl
Definition: radixtree.h:1838
Size add_size(Size s1, Size s2)
Definition: shmem.c:488
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:382
static pg_noinline void Size size
Definition: slab.c:607
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
Definition: slru.c:252
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:605
void SimpleLruWritePage(SlruCtl ctl, int slotno)
Definition: slru.c:732
void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
Definition: slru.c:1322
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
Definition: slru.c:746
void SlruDeleteSegment(SlruCtl ctl, int64 segno)
Definition: slru.c:1526
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1791
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition: slru.c:502
int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
Definition: slru.c:1831
int SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:375
void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
Definition: slru.c:1408
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:199
bool check_slru_buffers(const char *name, int *newval)
Definition: slru.c:355
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition: slru.h:175
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition: slru.h:199
#define SLRU_PAGES_PER_SEGMENT
Definition: slru.h:39
PGPROC * MyProc
Definition: proc.c:66
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
Definition: sync.h:51
void * user_fctx
Definition: funcapi.h:82
AttInMetadata * attinmeta
Definition: funcapi.h:91
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc tuple_desc
Definition: funcapi.h:112
Definition: lwlock.h:42
TransactionId xid
Definition: multixact.h:58
MultiXactStatus status
Definition: multixact.h:59
MultiXactId multiWrapLimit
Definition: multixact.c:273
MultiXactId multiStopLimit
Definition: multixact.c:272
MultiXactId multiWarnLimit
Definition: multixact.c:271
MultiXactId multiVacLimit
Definition: multixact.c:270
MultiXactOffset offsetStopLimit
Definition: multixact.c:276
MultiXactOffset nextOffset
Definition: multixact.c:248
MultiXactId nextMXact
Definition: multixact.c:245
MultiXactId oldestMultiXactId
Definition: multixact.c:258
MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER]
Definition: multixact.c:331
MultiXactOffset oldestOffset
Definition: multixact.c:266
ConditionVariable nextoff_cv
Definition: multixact.c:282
int delayChkptFlags
Definition: proc.h:240
dlist_node * cur
Definition: ilist.h:179
MultiXactId multi
Definition: multixact.c:364
dlist_node node
Definition: multixact.c:366
MultiXactMember members[FLEXIBLE_ARRAY_MEMBER]
Definition: multixact.c:367
int64 earliestExistingPage
Definition: multixact.c:3017
MultiXactId mid
Definition: multixact.h:75
MultiXactMember members[FLEXIBLE_ARRAY_MEMBER]
Definition: multixact.h:78
MultiXactOffset moff
Definition: multixact.h:76
MultiXactId endTruncOff
Definition: multixact.h:89
MultiXactOffset startTruncMemb
Definition: multixact.h:92
MultiXactOffset endTruncMemb
Definition: multixact.h:93
MultiXactId startTruncOff
Definition: multixact.h:88
@ SYNC_HANDLER_MULTIXACT_MEMBER
Definition: sync.h:41
@ SYNC_HANDLER_MULTIXACT_OFFSET
Definition: sync.h:40
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition: twophase.c:1264
ProcNumber TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held)
Definition: twophase.c:903
#define TWOPHASE_RM_MULTIXACT_ID
Definition: twophase_rmgr.h:27
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition: varsup.c:304
bool IsTransactionState(void)
Definition: xact.c:386
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:940
bool RecoveryInProgress(void)
Definition: xlog.c:6355
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2805
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:410
#define XLogRecGetData(decoder)
Definition: xlogreader.h:415
#define XLogRecGetXid(decoder)
Definition: xlogreader.h:412
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:417
bool InRecovery
Definition: xlogutils.c:50