PostgreSQL Source Code git master
multixact_read_v18.c
Go to the documentation of this file.
1/*
2 * multixact_read_v18.c
3 *
4 * Functions to read multixact SLRUs from clusters of PostgreSQL version 18
5 * and older. In version 19, the multixid offsets were expanded from 32 to 64
6 * bits.
7 *
8 * Copyright (c) 2025, PostgreSQL Global Development Group
9 * src/bin/pg_upgrade/multixact_read_v18.c
10 */
11
12#include "postgres_fe.h"
13
14#include "multixact_read_v18.h"
15#include "pg_upgrade.h"
16
17/*
18 * NOTE: below are a bunch of definitions that are copy-pasted from
19 * multixact.c from version 18. It's important that this file doesn't
20 * #include the new definitions with same names from "multixact_internal.h"!
21 *
22 * To further avoid confusion in the functions exposed outside this source
23 * file, we use MultiXactOffset32 to represent the old-style 32-bit multixid
24 * offsets. The new 64-bit MultiXactOffset should not be used anywhere in
25 * this file.
26 */
27#ifdef MULTIXACT_INTERNAL_H
28#error multixact_internal.h should not be included in multixact_read_v18.c
29#endif
30#define MultiXactOffset should_not_be_used
31
32/* We need four bytes per offset and 8 bytes per base for each page. */
33#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset32))
34
35static inline int64
37{
38 return multi / MULTIXACT_OFFSETS_PER_PAGE;
39}
40
41static inline int
43{
44 return multi % MULTIXACT_OFFSETS_PER_PAGE;
45}
46
47/*
48 * The situation for members is a bit more complex: we store one byte of
49 * additional flag bits for each TransactionId. To do this without getting
50 * into alignment issues, we store four bytes of flags, and then the
51 * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
52 * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
53 * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
54 * performance) trumps space efficiency here.
55 *
56 * Note that the "offset" macros work with byte offset, not array indexes, so
57 * arithmetic must be done using "char *" pointers.
58 */
59/* We need eight bits per xact, so one xact fits in a byte */
60#define MXACT_MEMBER_BITS_PER_XACT 8
61#define MXACT_MEMBER_FLAGS_PER_BYTE 1
62#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
63
64/* how many full bytes of flags are there in a group? */
65#define MULTIXACT_FLAGBYTES_PER_GROUP 4
66#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
67 (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
68/* size in bytes of a complete group */
69#define MULTIXACT_MEMBERGROUP_SIZE \
70 (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
71#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
72#define MULTIXACT_MEMBERS_PER_PAGE \
73 (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
74
75/* page in which a member is to be found */
76static inline int64
78{
79 return offset / MULTIXACT_MEMBERS_PER_PAGE;
80}
81
82/* Location (byte offset within page) of flag word for a given member */
83static inline int
85{
87 int grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
88 int byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
89
90 return byteoff;
91}
92
93/* Location (byte offset within page) of TransactionId of given member */
94static inline int
96{
97 int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
98
99 return MXOffsetToFlagsOffset(offset) +
101 member_in_group * sizeof(TransactionId);
102}
103
104static inline int
106{
107 int member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
108 int bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
109
110 return bshift;
111}
112
113/*
114 * Construct reader of old multixacts.
115 *
116 * Returns the malloced memory used by the all other calls in this module.
117 */
119AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
120 MultiXactOffset32 nextOffset)
121{
123 char dir[MAXPGPATH] = {0};
124
125 state->nextMXact = nextMulti;
126 state->nextOffset = nextOffset;
127
128 pg_sprintf(dir, "%s/pg_multixact/offsets", pgdata);
129 state->offset = AllocSlruRead(dir, false);
130
131 pg_sprintf(dir, "%s/pg_multixact/members", pgdata);
132 state->members = AllocSlruRead(dir, false);
133
134 return state;
135}
136
137/*
138 * This is a simplified version of the GetMultiXactIdMembers() server
139 * function:
140 *
141 * - Only return the updating member, if any. Upgrade only cares about the
142 * updaters. If there is no updating member, return somewhat arbitrarily
143 * the first locking-only member, because we don't have any way to represent
144 * "no members".
145 *
146 * - Because there's no concurrent activity, we don't need to worry about
147 * locking and some corner cases.
148 *
149 * - Don't bail out on invalid entries. If the server crashes, it can leave
150 * invalid or half-written entries on disk. Such multixids won't appear
151 * anywhere else on disk, so the server will never try to read them. During
152 * upgrade, however, we scan through all multixids in order, and will
153 * encounter such invalid but unreferenced multixids too.
154 *
155 * Returns true on success, false if the multixact was invalid.
156 */
157bool
159 MultiXactMember *member)
160{
161 MultiXactId nextMXact,
162 nextOffset,
163 tmpMXact;
164 int64 pageno,
165 prev_pageno;
166 int entryno,
167 length;
168 char *buf;
169 MultiXactOffset32 *offptr,
170 offset;
171 MultiXactOffset32 nextMXOffset;
173 MultiXactStatus result_status = 0;
174
175 nextMXact = state->nextMXact;
176 nextOffset = state->nextOffset;
177
178 /*
179 * Comment copied from GetMultiXactIdMembers in PostgreSQL v18
180 * multixact.c:
181 *
182 * Find out the offset at which we need to start reading MultiXactMembers
183 * and the number of members in the multixact. We determine the latter as
184 * the difference between this multixact's starting offset and the next
185 * one's. However, there are some corner cases to worry about:
186 *
187 * 1. This multixact may be the latest one created, in which case there is
188 * no next one to look at. The next multixact's offset should be set
189 * already, as we set it in RecordNewMultiXact(), but we used to not do
190 * that in older minor versions. To cope with that case, if this
191 * multixact is the latest one created, use the nextOffset value we read
192 * above as the endpoint.
193 *
194 * 2. Because GetNewMultiXactId skips over offset zero, to reserve zero
195 * for to mean "unset", there is an ambiguity near the point of offset
196 * wraparound. If we see next multixact's offset is one, is that our
197 * multixact's actual endpoint, or did it end at zero with a subsequent
198 * increment? We handle this using the knowledge that if the zero'th
199 * member slot wasn't filled, it'll contain zero, and zero isn't a valid
200 * transaction ID so it can't be a multixact member. Therefore, if we
201 * read a zero from the members array, just ignore it.
202 */
203
204 pageno = MultiXactIdToOffsetPage(multi);
205 entryno = MultiXactIdToOffsetEntry(multi);
206
207 buf = SlruReadSwitchPage(state->offset, pageno);
208 offptr = (MultiXactOffset32 *) buf;
209 offptr += entryno;
210 offset = *offptr;
211
212 if (offset == 0)
213 {
214 /* Invalid entry */
215 return false;
216 }
217
218 /*
219 * Use the same increment rule as GetNewMultiXactId(), that is, don't
220 * handle wraparound explicitly until needed.
221 */
222 tmpMXact = multi + 1;
223
224 if (nextMXact == tmpMXact)
225 {
226 /* Corner case 1: there is no next multixact */
227 nextMXOffset = nextOffset;
228 }
229 else
230 {
231 /* handle wraparound if needed */
232 if (tmpMXact < FirstMultiXactId)
233 tmpMXact = FirstMultiXactId;
234
235 prev_pageno = pageno;
236
237 pageno = MultiXactIdToOffsetPage(tmpMXact);
238 entryno = MultiXactIdToOffsetEntry(tmpMXact);
239
240 if (pageno != prev_pageno)
241 buf = SlruReadSwitchPage(state->offset, pageno);
242
243 offptr = (MultiXactOffset32 *) buf;
244 offptr += entryno;
245 nextMXOffset = *offptr;
246 }
247
248 if (nextMXOffset == 0)
249 {
250 /* Invalid entry */
251 return false;
252 }
253 length = nextMXOffset - offset;
254
255 /* read the members */
256 prev_pageno = -1;
257 for (int i = 0; i < length; i++, offset++)
258 {
259 TransactionId *xactptr;
260 uint32 *flagsptr;
261 int flagsoff;
262 int bshift;
263 int memberoff;
264 MultiXactStatus status;
265
266 pageno = MXOffsetToMemberPage(offset);
267 memberoff = MXOffsetToMemberOffset(offset);
268
269 if (pageno != prev_pageno)
270 {
271 buf = SlruReadSwitchPage(state->members, pageno);
272 prev_pageno = pageno;
273 }
274
275 xactptr = (TransactionId *) (buf + memberoff);
276 if (!TransactionIdIsValid(*xactptr))
277 {
278 /*
279 * Corner case 2: we are looking at unused slot zero
280 */
281 if (offset == 0)
282 continue;
283
284 /*
285 * Otherwise this is an invalid entry that should not be
286 * referenced from anywhere in the heap. We could return 'false'
287 * here, but we prefer to continue reading the members and
288 * converting them the best we can, to preserve evidence in case
289 * this is corruption that should not happen.
290 */
291 }
292
293 flagsoff = MXOffsetToFlagsOffset(offset);
294 bshift = MXOffsetToFlagsBitShift(offset);
295 flagsptr = (uint32 *) (buf + flagsoff);
296
297 status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
298
299 /*
300 * Remember the updating XID among the members, or first locking XID
301 * if no updating XID.
302 */
303 if (ISUPDATE_from_mxstatus(status))
304 {
305 /* sanity check */
306 if (ISUPDATE_from_mxstatus(result_status))
307 {
308 /*
309 * We don't expect to see more than one updating member, even
310 * if the server had crashed.
311 */
312 pg_fatal("multixact %u has more than one updating member",
313 multi);
314 }
315 result_xid = *xactptr;
316 result_status = status;
317 }
318 else if (!TransactionIdIsValid(result_xid))
319 {
320 result_xid = *xactptr;
321 result_status = status;
322 }
323 }
324
325 member->xid = result_xid;
326 member->status = result_status;
327 return true;
328}
329
330/*
331 * Frees the malloced reader.
332 */
333void
335{
336 FreeSlruRead(state->offset);
337 FreeSlruRead(state->members);
338
339 pfree(state);
340}
int64_t int64
Definition: c.h:549
TransactionId MultiXactId
Definition: c.h:681
uint32_t uint32
Definition: c.h:552
uint32 TransactionId
Definition: c.h:671
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
int i
Definition: isn.c:77
void pfree(void *pointer)
Definition: mcxt.c:1594
#define FirstMultiXactId
Definition: multixact.h:26
MultiXactStatus
Definition: multixact.h:37
#define ISUPDATE_from_mxstatus(status)
Definition: multixact.h:51
#define MXACT_MEMBER_BITS_PER_XACT
static int MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
#define MXACT_MEMBER_XACT_BITMASK
#define MULTIXACT_FLAGBYTES_PER_GROUP
static int64 MXOffsetToMemberPage(MultiXactOffset32 offset)
#define MULTIXACT_OFFSETS_PER_PAGE
bool GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi, MultiXactMember *member)
static int MXOffsetToMemberOffset(MultiXactOffset32 offset)
static int MultiXactIdToOffsetEntry(MultiXactId multi)
#define MULTIXACT_MEMBERGROUPS_PER_PAGE
static int64 MultiXactIdToOffsetPage(MultiXactId multi)
OldMultiXactReader * AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti, MultiXactOffset32 nextOffset)
#define MULTIXACT_MEMBERGROUP_SIZE
#define MULTIXACT_MEMBERS_PER_MEMBERGROUP
#define MULTIXACT_MEMBERS_PER_PAGE
void FreeOldMultiXactReader(OldMultiXactReader *state)
static int MXOffsetToFlagsOffset(MultiXactOffset32 offset)
uint32 MultiXactOffset32
#define pg_fatal(...)
#define MAXPGPATH
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
int int int int pg_sprintf(char *str, const char *fmt,...) pg_attribute_printf(2
SlruSegState * AllocSlruRead(const char *dir, bool long_segment_names)
Definition: slru_io.c:62
void FreeSlruRead(SlruSegState *state)
Definition: slru_io.c:153
static char * SlruReadSwitchPage(SlruSegState *state, uint64 pageno)
Definition: slru_io.h:33
TransactionId xid
Definition: multixact.h:57
MultiXactStatus status
Definition: multixact.h:58
Definition: regguts.h:323
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41