PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
predicate_internals.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * predicate_internals.h
4 * POSTGRES internal predicate locking definitions.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/storage/predicate_internals.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef PREDICATE_INTERNALS_H
15#define PREDICATE_INTERNALS_H
16
17#include "lib/ilist.h"
18#include "storage/lock.h"
19#include "storage/lwlock.h"
20
21/*
22 * Commit number.
23 */
25
26/*
27 * Reserved commit sequence numbers:
28 * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
29 * used as a SerCommitSeqNo, even an invalid one
30 * - InvalidSerCommitSeqNo is used to indicate a transaction that
31 * hasn't committed yet, so use a number greater than all valid
32 * ones to make comparison do the expected thing
33 * - RecoverySerCommitSeqNo is used to refer to transactions that
34 * happened before a crash/recovery, since we restart the sequence
35 * at that point. It's earlier than all normal sequence numbers,
36 * and is only used by recovered prepared transactions
37 */
38#define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
39#define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
40#define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
41
42/*
43 * The SERIALIZABLEXACT struct contains information needed for each
44 * serializable database transaction to support SSI techniques.
45 *
46 * A home-grown list is maintained in shared memory to manage these.
47 * An entry is used when the serializable transaction acquires a snapshot.
48 * Unless the transaction is rolled back, this entry must generally remain
49 * until all concurrent transactions have completed. (There are special
50 * optimizations for READ ONLY transactions which often allow them to be
51 * cleaned up earlier.) A transaction which is rolled back is cleaned up
52 * as soon as possible.
53 *
54 * Eligibility for cleanup of committed transactions is generally determined
55 * by comparing the transaction's finishedBefore field to
56 * SxactGlobalXmin.
57 */
58typedef struct SERIALIZABLEXACT
59{
60 VirtualTransactionId vxid; /* The executing process always has one of
61 * these. */
62
63 /*
64 * We use two numbers to track the order that transactions commit. Before
65 * commit, a transaction is marked as prepared, and prepareSeqNo is set.
66 * Shortly after commit, it's marked as committed, and commitSeqNo is set.
67 * This doesn't give a strict commit order, but these two values together
68 * are good enough for us, as we can always err on the safe side and
69 * assume that there's a conflict, if we can't be sure of the exact
70 * ordering of two commits.
71 *
72 * Note that a transaction is marked as prepared for a short period during
73 * commit processing, even if two-phase commit is not used. But with
74 * two-phase commit, a transaction can stay in prepared state for some
75 * time.
76 */
79
80 /* these values are not both interesting at the same time */
81 union
82 {
84 * conflict out */
85 SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
86 * no conflict out */
88 dlist_head outConflicts; /* list of write transactions whose data we
89 * couldn't read. */
90 dlist_head inConflicts; /* list of read transactions which couldn't
91 * see our write. */
92 dlist_head predicateLocks; /* list of associated PREDICATELOCK objects */
93 dlist_node finishedLink; /* list link in
94 * FinishedSerializableTransactions */
95 dlist_node xactLink; /* PredXact->activeList/availableList */
96
97 /*
98 * perXactPredicateListLock is only used in parallel queries: it protects
99 * this SERIALIZABLEXACT's predicate lock list against other workers of
100 * the same session.
101 */
103
104 /*
105 * for r/o transactions: list of concurrent r/w transactions that we could
106 * potentially have conflicts with, and vice versa for r/w transactions
107 */
109
110 TransactionId topXid; /* top level xid for the transaction, if one
111 * exists; else invalid */
112 TransactionId finishedBefore; /* invalid means still running; else the
113 * struct expires when no serializable
114 * xids are before this. */
115 TransactionId xmin; /* the transaction's snapshot xmin */
116 uint32 flags; /* OR'd combination of values defined below */
117 int pid; /* pid of associated process */
118 int pgprocno; /* pgprocno of associated process */
120
121#define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
122#define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
123#define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
124#define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
125/*
126 * The following flag actually means that the flagged transaction has a
127 * conflict out *to a transaction which committed ahead of it*. It's hard
128 * to get that into a name of a reasonable length.
129 */
130#define SXACT_FLAG_CONFLICT_OUT 0x00000010
131#define SXACT_FLAG_READ_ONLY 0x00000020
132#define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
133#define SXACT_FLAG_RO_SAFE 0x00000080
134#define SXACT_FLAG_RO_UNSAFE 0x00000100
135#define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
136#define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
137/*
138 * The following flag means the transaction has been partially released
139 * already, but is being preserved because parallel workers might have a
140 * reference to it. It'll be recycled by the leader at end-of-transaction.
141 */
142#define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
143
144typedef struct PredXactListData
145{
148
149 /*
150 * These global variables are maintained when registering and cleaning up
151 * serializable transactions. They must be global across all backends,
152 * but are not needed outside the predicate.c source file. Protected by
153 * SerializableXactHashLock.
154 */
155 TransactionId SxactGlobalXmin; /* global xmin for active serializable
156 * transactions */
157 int SxactGlobalXminCount; /* how many active serializable
158 * transactions have this xmin */
159 int WritableSxactCount; /* how many non-read-only serializable
160 * transactions are active */
161 SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
162 * increasing number for commits
163 * of serializable transactions */
164 /* Protected by SerializableXactHashLock. */
165 SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks and
166 * inConflicts for committed
167 * transactions through this seq
168 * no */
169 /* Protected by SerializableFinishedListLock. */
170 SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
171 * seq no */
172 SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
173
176
178
179#define PredXactListDataSize \
180 ((Size)MAXALIGN(sizeof(PredXactListData)))
181
182
183/*
184 * The following types are used to provide lists of rw-conflicts between
185 * pairs of transactions. Since exactly the same information is needed,
186 * they are also used to record possible unsafe transaction relationships
187 * for purposes of identifying safe snapshots for read-only transactions.
188 *
189 * When a RWConflictData is not in use to record either type of relationship
190 * between a pair of transactions, it is kept on an "available" list. The
191 * outLink field is used for maintaining that list.
192 */
193typedef struct RWConflictData
194{
195 dlist_node outLink; /* link for list of conflicts out from a sxact */
196 dlist_node inLink; /* link for list of conflicts in to a sxact */
200
202
203#define RWConflictDataSize \
204 ((Size)MAXALIGN(sizeof(RWConflictData)))
205
207{
211
213
214#define RWConflictPoolHeaderDataSize \
215 ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
216
217
218/*
219 * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
220 * transaction or any of its subtransactions.
221 */
222typedef struct SERIALIZABLEXIDTAG
223{
226
227/*
228 * The SERIALIZABLEXID struct provides a link from a TransactionId for a
229 * serializable transaction to the related SERIALIZABLEXACT record, even if
230 * the transaction has completed and its connection has been closed.
231 *
232 * These are created as new top level transaction IDs are first assigned to
233 * transactions which are participating in predicate locking. This may
234 * never happen for a particular transaction if it doesn't write anything.
235 * They are removed with their related serializable transaction objects.
236 *
237 * The SubTransGetTopmostTransaction method is used where necessary to get
238 * from an XID which might be from a subtransaction to the top level XID.
239 */
240typedef struct SERIALIZABLEXID
241{
242 /* hash key */
244
245 /* data */
246 SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
248
249
250/*
251 * The PREDICATELOCKTARGETTAG struct identifies a database object which can
252 * be the target of predicate locks.
253 *
254 * Note that the hash function being used doesn't properly respect tag
255 * length -- if the length of the structure isn't a multiple of four bytes it
256 * will go to a four byte boundary past the end of the tag. If you change
257 * this struct, make sure any slack space is initialized, so that any random
258 * bytes in the middle or at the end are not included in the hash.
259 *
260 * TODO SSI: If we always use the same fields for the same type of value, we
261 * should rename these. Holding off until it's clear there are no exceptions.
262 * Since indexes are relations with blocks and tuples, it's looking likely that
263 * the rename will be possible. If not, we may need to divide the last field
264 * and use part of it for a target type, so that we know how to interpret the
265 * data..
266 */
268{
269 uint32 locktag_field1; /* a 32-bit ID field */
270 uint32 locktag_field2; /* a 32-bit ID field */
271 uint32 locktag_field3; /* a 32-bit ID field */
272 uint32 locktag_field4; /* a 32-bit ID field */
274
275/*
276 * The PREDICATELOCKTARGET struct represents a database object on which there
277 * are predicate locks.
278 *
279 * A hash list of these objects is maintained in shared memory. An entry is
280 * added when a predicate lock is requested on an object which doesn't
281 * already have one. An entry is removed when the last lock is removed from
282 * its list.
283 */
285{
286 /* hash key */
287 PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
288
289 /* data */
290 dlist_head predicateLocks; /* list of PREDICATELOCK objects assoc. with
291 * predicate lock target */
293
294
295/*
296 * The PREDICATELOCKTAG struct identifies an individual predicate lock.
297 *
298 * It is the combination of predicate lock target (which is a lockable
299 * object) and a serializable transaction which has acquired a lock on that
300 * target.
301 */
302typedef struct PREDICATELOCKTAG
303{
307
308/*
309 * The PREDICATELOCK struct represents an individual lock.
310 *
311 * An entry can be created here when the related database object is read, or
312 * by promotion of multiple finer-grained targets. All entries related to a
313 * serializable transaction are removed when that serializable transaction is
314 * cleaned up. Entries can also be removed when they are combined into a
315 * single coarser-grained lock entry.
316 */
317typedef struct PREDICATELOCK
318{
319 /* hash key */
320 PREDICATELOCKTAG tag; /* unique identifier of lock */
321
322 /* data */
323 dlist_node targetLink; /* list link in PREDICATELOCKTARGET's list of
324 * predicate locks */
325 dlist_node xactLink; /* list link in SERIALIZABLEXACT's list of
326 * predicate locks */
327 SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
329
330
331/*
332 * The LOCALPREDICATELOCK struct represents a local copy of data which is
333 * also present in the PREDICATELOCK table, organized for fast access without
334 * needing to acquire a LWLock. It is strictly for optimization.
335 *
336 * Each serializable transaction creates its own local hash table to hold a
337 * collection of these. This information is used to determine when a number
338 * of fine-grained locks should be promoted to a single coarser-grained lock.
339 * The information is maintained more-or-less in parallel to the
340 * PREDICATELOCK data, but because this data is not protected by locks and is
341 * only used in an optimization heuristic, it is allowed to drift in a few
342 * corner cases where maintaining exact data would be expensive.
343 *
344 * The hash table is created when the serializable transaction acquires its
345 * snapshot, and its memory is released upon completion of the transaction.
346 */
347typedef struct LOCALPREDICATELOCK
348{
349 /* hash key */
350 PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
351
352 /* data */
353 bool held; /* is lock held, or just its children? */
354 int childLocks; /* number of child locks currently held */
356
357
358/*
359 * The types of predicate locks which can be acquired.
360 */
362{
366 /* TODO SSI: Other types may be needed for index locking */
368
369
370/*
371 * This structure is used to quickly capture a copy of all predicate
372 * locks. This is currently used only by the pg_lock_status function,
373 * which in turn is used by the pg_locks view.
374 */
375typedef struct PredicateLockData
376{
381
382
383/*
384 * These macros define how we map logical IDs of lockable objects into the
385 * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
386 * rather than accessing the fields directly. Note multiple eval of target!
387 */
388#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
389 ((locktag).locktag_field1 = (dboid), \
390 (locktag).locktag_field2 = (reloid), \
391 (locktag).locktag_field3 = InvalidBlockNumber, \
392 (locktag).locktag_field4 = InvalidOffsetNumber)
393
394#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
395 ((locktag).locktag_field1 = (dboid), \
396 (locktag).locktag_field2 = (reloid), \
397 (locktag).locktag_field3 = (blocknum), \
398 (locktag).locktag_field4 = InvalidOffsetNumber)
399
400#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
401 ((locktag).locktag_field1 = (dboid), \
402 (locktag).locktag_field2 = (reloid), \
403 (locktag).locktag_field3 = (blocknum), \
404 (locktag).locktag_field4 = (offnum))
405
406#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
407 ((Oid) (locktag).locktag_field1)
408#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
409 ((Oid) (locktag).locktag_field2)
410#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
411 ((BlockNumber) (locktag).locktag_field3)
412#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
413 ((OffsetNumber) (locktag).locktag_field4)
414#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
415 (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
416 (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
417 PREDLOCKTAG_RELATION))
418
419/*
420 * Two-phase commit statefile records. There are two types: for each
421 * transaction, we generate one per-transaction record and a variable
422 * number of per-predicate-lock records.
423 */
425{
429
430/*
431 * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
432 * much is needed because most of it not meaningful for a recovered
433 * prepared transaction.
434 *
435 * In particular, we do not record the in and out conflict lists for a
436 * prepared transaction because the associated SERIALIZABLEXACTs will
437 * not be available after recovery. Instead, we simply record the
438 * existence of each type of conflict by setting the transaction's
439 * summary conflict in/out flag.
440 */
442{
446
447/* Per-lock state */
449{
451 uint32 filler; /* to avoid length change in back-patched fix */
453
455{
457 union
458 {
463
464/*
465 * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
466 */
467#define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
468
469
470/*
471 * Function definitions for functions needing awareness of predicate
472 * locking internals.
473 */
475extern int GetSafeSnapshotBlockingPids(int blocked_pid,
476 int *output, int output_size);
477
478#endif /* PREDICATE_INTERNALS_H */
uint64_t uint64
Definition: c.h:489
uint32_t uint32
Definition: c.h:488
uint32 TransactionId
Definition: c.h:609
FILE * output
PredicateLockData * GetPredicateLockStatusData(void)
Definition: predicate.c:1435
struct RWConflictData RWConflictData
struct PredXactListData * PredXactList
struct TwoPhasePredicateRecord TwoPhasePredicateRecord
struct RWConflictPoolHeaderData RWConflictPoolHeaderData
struct SERIALIZABLEXACT SERIALIZABLEXACT
TwoPhasePredicateRecordType
@ TWOPHASEPREDICATERECORD_XACT
@ TWOPHASEPREDICATERECORD_LOCK
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
Definition: predicate.c:1618
struct TwoPhasePredicateLockRecord TwoPhasePredicateLockRecord
PredicateLockTargetType
@ PREDLOCKTAG_RELATION
@ PREDLOCKTAG_PAGE
@ PREDLOCKTAG_TUPLE
struct PREDICATELOCKTAG PREDICATELOCKTAG
struct TwoPhasePredicateXactRecord TwoPhasePredicateXactRecord
struct LOCALPREDICATELOCK LOCALPREDICATELOCK
struct RWConflictPoolHeaderData * RWConflictPoolHeader
struct SERIALIZABLEXIDTAG SERIALIZABLEXIDTAG
struct PREDICATELOCKTARGET PREDICATELOCKTARGET
struct PREDICATELOCKTARGETTAG PREDICATELOCKTARGETTAG
struct SERIALIZABLEXID SERIALIZABLEXID
struct PredicateLockData PredicateLockData
struct RWConflictData * RWConflict
uint64 SerCommitSeqNo
struct PredXactListData PredXactListData
struct PREDICATELOCK PREDICATELOCK
PREDICATELOCKTARGETTAG tag
Definition: lwlock.h:42
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGET * myTarget
PREDICATELOCKTARGETTAG tag
PREDICATELOCKTAG tag
SerCommitSeqNo commitSeqNo
SERIALIZABLEXACT * element
SerCommitSeqNo LastSxactCommitSeqNo
SerCommitSeqNo CanPartialClearThrough
SERIALIZABLEXACT * OldCommittedSxact
SerCommitSeqNo HavePartialClearedThrough
TransactionId SxactGlobalXmin
PREDICATELOCKTARGETTAG * locktags
SERIALIZABLEXACT * xacts
SERIALIZABLEXACT * sxactIn
SERIALIZABLEXACT * sxactOut
VirtualTransactionId vxid
SerCommitSeqNo lastCommitBeforeSnapshot
dlist_head possibleUnsafeConflicts
SerCommitSeqNo prepareSeqNo
SerCommitSeqNo commitSeqNo
TransactionId finishedBefore
SerCommitSeqNo earliestOutConflictCommit
union SERIALIZABLEXACT::@122 SeqNo
SERIALIZABLEXIDTAG tag
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGETTAG target
TwoPhasePredicateRecordType type
TwoPhasePredicateLockRecord lockRecord
TwoPhasePredicateXactRecord xactRecord
union TwoPhasePredicateRecord::@123 data