PostgreSQL Source Code  git master
predicate_internals.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * predicate_internals.h
4  * POSTGRES internal predicate locking definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/predicate_internals.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef PREDICATE_INTERNALS_H
15 #define PREDICATE_INTERNALS_H
16 
17 #include "lib/ilist.h"
18 #include "storage/lock.h"
19 #include "storage/lwlock.h"
20 
21 /*
22  * Commit number.
23  */
24 typedef uint64 SerCommitSeqNo;
25 
26 /*
27  * Reserved commit sequence numbers:
28  * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
29  * used as a SerCommitSeqNo, even an invalid one
30  * - InvalidSerCommitSeqNo is used to indicate a transaction that
31  * hasn't committed yet, so use a number greater than all valid
32  * ones to make comparison do the expected thing
33  * - RecoverySerCommitSeqNo is used to refer to transactions that
34  * happened before a crash/recovery, since we restart the sequence
35  * at that point. It's earlier than all normal sequence numbers,
36  * and is only used by recovered prepared transactions
37  */
38 #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
39 #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
40 #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
41 
42 /*
43  * The SERIALIZABLEXACT struct contains information needed for each
44  * serializable database transaction to support SSI techniques.
45  *
46  * A home-grown list is maintained in shared memory to manage these.
47  * An entry is used when the serializable transaction acquires a snapshot.
48  * Unless the transaction is rolled back, this entry must generally remain
49  * until all concurrent transactions have completed. (There are special
50  * optimizations for READ ONLY transactions which often allow them to be
51  * cleaned up earlier.) A transaction which is rolled back is cleaned up
52  * as soon as possible.
53  *
54  * Eligibility for cleanup of committed transactions is generally determined
55  * by comparing the transaction's finishedBefore field to
56  * SxactGlobalXmin.
57  */
58 typedef struct SERIALIZABLEXACT
59 {
60  VirtualTransactionId vxid; /* The executing process always has one of
61  * these. */
62 
63  /*
64  * We use two numbers to track the order that transactions commit. Before
65  * commit, a transaction is marked as prepared, and prepareSeqNo is set.
66  * Shortly after commit, it's marked as committed, and commitSeqNo is set.
67  * This doesn't give a strict commit order, but these two values together
68  * are good enough for us, as we can always err on the safe side and
69  * assume that there's a conflict, if we can't be sure of the exact
70  * ordering of two commits.
71  *
72  * Note that a transaction is marked as prepared for a short period during
73  * commit processing, even if two-phase commit is not used. But with
74  * two-phase commit, a transaction can stay in prepared state for some
75  * time.
76  */
79 
80  /* these values are not both interesting at the same time */
81  union
82  {
83  SerCommitSeqNo earliestOutConflictCommit; /* when committed with
84  * conflict out */
85  SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
86  * no conflict out */
87  } SeqNo;
88  dlist_head outConflicts; /* list of write transactions whose data we
89  * couldn't read. */
90  dlist_head inConflicts; /* list of read transactions which couldn't
91  * see our write. */
92  dlist_head predicateLocks; /* list of associated PREDICATELOCK objects */
93  dlist_node finishedLink; /* list link in
94  * FinishedSerializableTransactions */
95  dlist_node xactLink; /* PredXact->activeList/availableList */
96 
97  /*
98  * perXactPredicateListLock is only used in parallel queries: it protects
99  * this SERIALIZABLEXACT's predicate lock list against other workers of
100  * the same session.
101  */
103 
104  /*
105  * for r/o transactions: list of concurrent r/w transactions that we could
106  * potentially have conflicts with, and vice versa for r/w transactions
107  */
109 
110  TransactionId topXid; /* top level xid for the transaction, if one
111  * exists; else invalid */
112  TransactionId finishedBefore; /* invalid means still running; else the
113  * struct expires when no serializable
114  * xids are before this. */
115  TransactionId xmin; /* the transaction's snapshot xmin */
116  uint32 flags; /* OR'd combination of values defined below */
117  int pid; /* pid of associated process */
118  int pgprocno; /* pgprocno of associated process */
120 
121 #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
122 #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
123 #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
124 #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
125 /*
126  * The following flag actually means that the flagged transaction has a
127  * conflict out *to a transaction which committed ahead of it*. It's hard
128  * to get that into a name of a reasonable length.
129  */
130 #define SXACT_FLAG_CONFLICT_OUT 0x00000010
131 #define SXACT_FLAG_READ_ONLY 0x00000020
132 #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
133 #define SXACT_FLAG_RO_SAFE 0x00000080
134 #define SXACT_FLAG_RO_UNSAFE 0x00000100
135 #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
136 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
137 /*
138  * The following flag means the transaction has been partially released
139  * already, but is being preserved because parallel workers might have a
140  * reference to it. It'll be recycled by the leader at end-of-transaction.
141  */
142 #define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
143 
144 typedef struct PredXactListData
145 {
148 
149  /*
150  * These global variables are maintained when registering and cleaning up
151  * serializable transactions. They must be global across all backends,
152  * but are not needed outside the predicate.c source file. Protected by
153  * SerializableXactHashLock.
154  */
155  TransactionId SxactGlobalXmin; /* global xmin for active serializable
156  * transactions */
157  int SxactGlobalXminCount; /* how many active serializable
158  * transactions have this xmin */
159  int WritableSxactCount; /* how many non-read-only serializable
160  * transactions are active */
161  SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
162  * increasing number for commits
163  * of serializable transactions */
164  /* Protected by SerializableXactHashLock. */
165  SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks and
166  * inConflicts for committed
167  * transactions through this seq
168  * no */
169  /* Protected by SerializableFinishedListLock. */
170  SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
171  * seq no */
172  SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
173 
176 
178 
179 #define PredXactListDataSize \
180  ((Size)MAXALIGN(sizeof(PredXactListData)))
181 
182 
183 /*
184  * The following types are used to provide lists of rw-conflicts between
185  * pairs of transactions. Since exactly the same information is needed,
186  * they are also used to record possible unsafe transaction relationships
187  * for purposes of identifying safe snapshots for read-only transactions.
188  *
189  * When a RWConflictData is not in use to record either type of relationship
190  * between a pair of transactions, it is kept on an "available" list. The
191  * outLink field is used for maintaining that list.
192  */
193 typedef struct RWConflictData
194 {
195  dlist_node outLink; /* link for list of conflicts out from a sxact */
196  dlist_node inLink; /* link for list of conflicts in to a sxact */
200 
201 typedef struct RWConflictData *RWConflict;
202 
203 #define RWConflictDataSize \
204  ((Size)MAXALIGN(sizeof(RWConflictData)))
205 
207 {
211 
213 
214 #define RWConflictPoolHeaderDataSize \
215  ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
216 
217 
218 /*
219  * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
220  * transaction or any of its subtransactions.
221  */
222 typedef struct SERIALIZABLEXIDTAG
223 {
226 
227 /*
228  * The SERIALIZABLEXID struct provides a link from a TransactionId for a
229  * serializable transaction to the related SERIALIZABLEXACT record, even if
230  * the transaction has completed and its connection has been closed.
231  *
232  * These are created as new top level transaction IDs are first assigned to
233  * transactions which are participating in predicate locking. This may
234  * never happen for a particular transaction if it doesn't write anything.
235  * They are removed with their related serializable transaction objects.
236  *
237  * The SubTransGetTopmostTransaction method is used where necessary to get
238  * from an XID which might be from a subtransaction to the top level XID.
239  */
240 typedef struct SERIALIZABLEXID
241 {
242  /* hash key */
244 
245  /* data */
246  SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
248 
249 
250 /*
251  * The PREDICATELOCKTARGETTAG struct identifies a database object which can
252  * be the target of predicate locks.
253  *
254  * Note that the hash function being used doesn't properly respect tag
255  * length -- if the length of the structure isn't a multiple of four bytes it
256  * will go to a four byte boundary past the end of the tag. If you change
257  * this struct, make sure any slack space is initialized, so that any random
258  * bytes in the middle or at the end are not included in the hash.
259  *
260  * TODO SSI: If we always use the same fields for the same type of value, we
261  * should rename these. Holding off until it's clear there are no exceptions.
262  * Since indexes are relations with blocks and tuples, it's looking likely that
263  * the rename will be possible. If not, we may need to divide the last field
264  * and use part of it for a target type, so that we know how to interpret the
265  * data..
266  */
268 {
269  uint32 locktag_field1; /* a 32-bit ID field */
270  uint32 locktag_field2; /* a 32-bit ID field */
271  uint32 locktag_field3; /* a 32-bit ID field */
272  uint32 locktag_field4; /* a 32-bit ID field */
274 
275 /*
276  * The PREDICATELOCKTARGET struct represents a database object on which there
277  * are predicate locks.
278  *
279  * A hash list of these objects is maintained in shared memory. An entry is
280  * added when a predicate lock is requested on an object which doesn't
281  * already have one. An entry is removed when the last lock is removed from
282  * its list.
283  */
284 typedef struct PREDICATELOCKTARGET
285 {
286  /* hash key */
287  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
288 
289  /* data */
290  dlist_head predicateLocks; /* list of PREDICATELOCK objects assoc. with
291  * predicate lock target */
293 
294 
295 /*
296  * The PREDICATELOCKTAG struct identifies an individual predicate lock.
297  *
298  * It is the combination of predicate lock target (which is a lockable
299  * object) and a serializable transaction which has acquired a lock on that
300  * target.
301  */
302 typedef struct PREDICATELOCKTAG
303 {
307 
308 /*
309  * The PREDICATELOCK struct represents an individual lock.
310  *
311  * An entry can be created here when the related database object is read, or
312  * by promotion of multiple finer-grained targets. All entries related to a
313  * serializable transaction are removed when that serializable transaction is
314  * cleaned up. Entries can also be removed when they are combined into a
315  * single coarser-grained lock entry.
316  */
317 typedef struct PREDICATELOCK
318 {
319  /* hash key */
320  PREDICATELOCKTAG tag; /* unique identifier of lock */
321 
322  /* data */
323  dlist_node targetLink; /* list link in PREDICATELOCKTARGET's list of
324  * predicate locks */
325  dlist_node xactLink; /* list link in SERIALIZABLEXACT's list of
326  * predicate locks */
327  SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
329 
330 
331 /*
332  * The LOCALPREDICATELOCK struct represents a local copy of data which is
333  * also present in the PREDICATELOCK table, organized for fast access without
334  * needing to acquire a LWLock. It is strictly for optimization.
335  *
336  * Each serializable transaction creates its own local hash table to hold a
337  * collection of these. This information is used to determine when a number
338  * of fine-grained locks should be promoted to a single coarser-grained lock.
339  * The information is maintained more-or-less in parallel to the
340  * PREDICATELOCK data, but because this data is not protected by locks and is
341  * only used in an optimization heuristic, it is allowed to drift in a few
342  * corner cases where maintaining exact data would be expensive.
343  *
344  * The hash table is created when the serializable transaction acquires its
345  * snapshot, and its memory is released upon completion of the transaction.
346  */
347 typedef struct LOCALPREDICATELOCK
348 {
349  /* hash key */
350  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
351 
352  /* data */
353  bool held; /* is lock held, or just its children? */
354  int childLocks; /* number of child locks currently held */
356 
357 
358 /*
359  * The types of predicate locks which can be acquired.
360  */
362 {
366  /* TODO SSI: Other types may be needed for index locking */
368 
369 
370 /*
371  * This structure is used to quickly capture a copy of all predicate
372  * locks. This is currently used only by the pg_lock_status function,
373  * which in turn is used by the pg_locks view.
374  */
375 typedef struct PredicateLockData
376 {
381 
382 
383 /*
384  * These macros define how we map logical IDs of lockable objects into the
385  * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
386  * rather than accessing the fields directly. Note multiple eval of target!
387  */
388 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
389  ((locktag).locktag_field1 = (dboid), \
390  (locktag).locktag_field2 = (reloid), \
391  (locktag).locktag_field3 = InvalidBlockNumber, \
392  (locktag).locktag_field4 = InvalidOffsetNumber)
393 
394 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
395  ((locktag).locktag_field1 = (dboid), \
396  (locktag).locktag_field2 = (reloid), \
397  (locktag).locktag_field3 = (blocknum), \
398  (locktag).locktag_field4 = InvalidOffsetNumber)
399 
400 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
401  ((locktag).locktag_field1 = (dboid), \
402  (locktag).locktag_field2 = (reloid), \
403  (locktag).locktag_field3 = (blocknum), \
404  (locktag).locktag_field4 = (offnum))
405 
406 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
407  ((Oid) (locktag).locktag_field1)
408 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
409  ((Oid) (locktag).locktag_field2)
410 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
411  ((BlockNumber) (locktag).locktag_field3)
412 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
413  ((OffsetNumber) (locktag).locktag_field4)
414 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
415  (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
416  (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
417  PREDLOCKTAG_RELATION))
418 
419 /*
420  * Two-phase commit statefile records. There are two types: for each
421  * transaction, we generate one per-transaction record and a variable
422  * number of per-predicate-lock records.
423  */
425 {
429 
430 /*
431  * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
432  * much is needed because most of it not meaningful for a recovered
433  * prepared transaction.
434  *
435  * In particular, we do not record the in and out conflict lists for a
436  * prepared transaction because the associated SERIALIZABLEXACTs will
437  * not be available after recovery. Instead, we simply record the
438  * existence of each type of conflict by setting the transaction's
439  * summary conflict in/out flag.
440  */
442 {
446 
447 /* Per-lock state */
449 {
451  uint32 filler; /* to avoid length change in back-patched fix */
453 
455 {
457  union
458  {
461  } data;
463 
464 /*
465  * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
466  */
467 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
468 
469 
470 /*
471  * Function definitions for functions needing awareness of predicate
472  * locking internals.
473  */
475 extern int GetSafeSnapshotBlockingPids(int blocked_pid,
476  int *output, int output_size);
477 
478 #endif /* PREDICATE_INTERNALS_H */
unsigned int uint32
Definition: c.h:493
uint32 TransactionId
Definition: c.h:639
FILE * output
struct RWConflictData RWConflictData
struct PredXactListData * PredXactList
struct TwoPhasePredicateRecord TwoPhasePredicateRecord
struct RWConflictPoolHeaderData RWConflictPoolHeaderData
PredicateLockData * GetPredicateLockStatusData(void)
Definition: predicate.c:1425
struct SERIALIZABLEXACT SERIALIZABLEXACT
TwoPhasePredicateRecordType
@ TWOPHASEPREDICATERECORD_XACT
@ TWOPHASEPREDICATERECORD_LOCK
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
Definition: predicate.c:1608
struct TwoPhasePredicateLockRecord TwoPhasePredicateLockRecord
PredicateLockTargetType
@ PREDLOCKTAG_RELATION
@ PREDLOCKTAG_PAGE
@ PREDLOCKTAG_TUPLE
struct PREDICATELOCKTAG PREDICATELOCKTAG
struct TwoPhasePredicateXactRecord TwoPhasePredicateXactRecord
struct LOCALPREDICATELOCK LOCALPREDICATELOCK
struct RWConflictPoolHeaderData * RWConflictPoolHeader
struct SERIALIZABLEXIDTAG SERIALIZABLEXIDTAG
struct PREDICATELOCKTARGET PREDICATELOCKTARGET
struct PREDICATELOCKTARGETTAG PREDICATELOCKTARGETTAG
struct SERIALIZABLEXID SERIALIZABLEXID
struct PredicateLockData PredicateLockData
struct RWConflictData * RWConflict
uint64 SerCommitSeqNo
struct PredXactListData PredXactListData
struct PREDICATELOCK PREDICATELOCK
PREDICATELOCKTARGETTAG tag
Definition: lwlock.h:41
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGET * myTarget
PREDICATELOCKTARGETTAG tag
PREDICATELOCKTAG tag
SerCommitSeqNo commitSeqNo
SERIALIZABLEXACT * element
SerCommitSeqNo LastSxactCommitSeqNo
SerCommitSeqNo CanPartialClearThrough
SERIALIZABLEXACT * OldCommittedSxact
SerCommitSeqNo HavePartialClearedThrough
TransactionId SxactGlobalXmin
PREDICATELOCKTARGETTAG * locktags
SERIALIZABLEXACT * xacts
SERIALIZABLEXACT * sxactIn
SERIALIZABLEXACT * sxactOut
VirtualTransactionId vxid
SerCommitSeqNo lastCommitBeforeSnapshot
dlist_head possibleUnsafeConflicts
SerCommitSeqNo prepareSeqNo
SerCommitSeqNo commitSeqNo
TransactionId finishedBefore
SerCommitSeqNo earliestOutConflictCommit
union SERIALIZABLEXACT::@110 SeqNo
SERIALIZABLEXIDTAG tag
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGETTAG target
TwoPhasePredicateRecordType type
union TwoPhasePredicateRecord::@111 data
TwoPhasePredicateLockRecord lockRecord
TwoPhasePredicateXactRecord xactRecord