PostgreSQL Source Code  git master
predicate_internals.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * predicate_internals.h
4  * POSTGRES internal predicate locking definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/predicate_internals.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef PREDICATE_INTERNALS_H
15 #define PREDICATE_INTERNALS_H
16 
17 #include "storage/lock.h"
18 #include "storage/lwlock.h"
19 
20 /*
21  * Commit number.
22  */
23 typedef uint64 SerCommitSeqNo;
24 
25 /*
26  * Reserved commit sequence numbers:
27  * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
28  * used as a SerCommitSeqNo, even an invalid one
29  * - InvalidSerCommitSeqNo is used to indicate a transaction that
30  * hasn't committed yet, so use a number greater than all valid
31  * ones to make comparison do the expected thing
32  * - RecoverySerCommitSeqNo is used to refer to transactions that
33  * happened before a crash/recovery, since we restart the sequence
34  * at that point. It's earlier than all normal sequence numbers,
35  * and is only used by recovered prepared transactions
36  */
37 #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
38 #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
39 #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
40 
41 /*
42  * The SERIALIZABLEXACT struct contains information needed for each
43  * serializable database transaction to support SSI techniques.
44  *
45  * A home-grown list is maintained in shared memory to manage these.
46  * An entry is used when the serializable transaction acquires a snapshot.
47  * Unless the transaction is rolled back, this entry must generally remain
48  * until all concurrent transactions have completed. (There are special
49  * optimizations for READ ONLY transactions which often allow them to be
50  * cleaned up earlier.) A transaction which is rolled back is cleaned up
51  * as soon as possible.
52  *
53  * Eligibility for cleanup of committed transactions is generally determined
54  * by comparing the transaction's finishedBefore field to
55  * SxactGlobalXmin.
56  */
57 typedef struct SERIALIZABLEXACT
58 {
59  VirtualTransactionId vxid; /* The executing process always has one of
60  * these. */
61 
62  /*
63  * We use two numbers to track the order that transactions commit. Before
64  * commit, a transaction is marked as prepared, and prepareSeqNo is set.
65  * Shortly after commit, it's marked as committed, and commitSeqNo is set.
66  * This doesn't give a strict commit order, but these two values together
67  * are good enough for us, as we can always err on the safe side and
68  * assume that there's a conflict, if we can't be sure of the exact
69  * ordering of two commits.
70  *
71  * Note that a transaction is marked as prepared for a short period during
72  * commit processing, even if two-phase commit is not used. But with
73  * two-phase commit, a transaction can stay in prepared state for some
74  * time.
75  */
78 
79  /* these values are not both interesting at the same time */
80  union
81  {
82  SerCommitSeqNo earliestOutConflictCommit; /* when committed with
83  * conflict out */
84  SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
85  * no conflict out */
86  } SeqNo;
87  SHM_QUEUE outConflicts; /* list of write transactions whose data we
88  * couldn't read. */
89  SHM_QUEUE inConflicts; /* list of read transactions which couldn't
90  * see our write. */
91  SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
92  SHM_QUEUE finishedLink; /* list link in
93  * FinishedSerializableTransactions */
94 
95  /*
96  * perXactPredicateListLock is only used in parallel queries: it protects
97  * this SERIALIZABLEXACT's predicate lock list against other workers of
98  * the same session.
99  */
101 
102  /*
103  * for r/o transactions: list of concurrent r/w transactions that we could
104  * potentially have conflicts with, and vice versa for r/w transactions
105  */
107 
108  TransactionId topXid; /* top level xid for the transaction, if one
109  * exists; else invalid */
110  TransactionId finishedBefore; /* invalid means still running; else the
111  * struct expires when no serializable
112  * xids are before this. */
113  TransactionId xmin; /* the transaction's snapshot xmin */
114  uint32 flags; /* OR'd combination of values defined below */
115  int pid; /* pid of associated process */
116  int pgprocno; /* pgprocno of associated process */
118 
119 #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
120 #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
121 #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
122 #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
123 /*
124  * The following flag actually means that the flagged transaction has a
125  * conflict out *to a transaction which committed ahead of it*. It's hard
126  * to get that into a name of a reasonable length.
127  */
128 #define SXACT_FLAG_CONFLICT_OUT 0x00000010
129 #define SXACT_FLAG_READ_ONLY 0x00000020
130 #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
131 #define SXACT_FLAG_RO_SAFE 0x00000080
132 #define SXACT_FLAG_RO_UNSAFE 0x00000100
133 #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
134 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
135 /*
136  * The following flag means the transaction has been partially released
137  * already, but is being preserved because parallel workers might have a
138  * reference to it. It'll be recycled by the leader at end-of-transaction.
139  */
140 #define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
141 
142 /*
143  * The following types are used to provide an ad hoc list for holding
144  * SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
145  * access these by key -- there are direct pointers to these objects where
146  * needed. If a shared memory list is created, these types can probably be
147  * eliminated in favor of using the general solution.
148  */
150 {
154 
156 
157 #define PredXactListElementDataSize \
158  ((Size)MAXALIGN(sizeof(PredXactListElementData)))
159 
160 typedef struct PredXactListData
161 {
164 
165  /*
166  * These global variables are maintained when registering and cleaning up
167  * serializable transactions. They must be global across all backends,
168  * but are not needed outside the predicate.c source file. Protected by
169  * SerializableXactHashLock.
170  */
171  TransactionId SxactGlobalXmin; /* global xmin for active serializable
172  * transactions */
173  int SxactGlobalXminCount; /* how many active serializable
174  * transactions have this xmin */
175  int WritableSxactCount; /* how many non-read-only serializable
176  * transactions are active */
177  SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
178  * increasing number for commits
179  * of serializable transactions */
180  /* Protected by SerializableXactHashLock. */
181  SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks and
182  * inConflicts for committed
183  * transactions through this seq
184  * no */
185  /* Protected by SerializableFinishedListLock. */
186  SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
187  * seq no */
188  SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
189 
192 
194 
195 #define PredXactListDataSize \
196  ((Size)MAXALIGN(sizeof(PredXactListData)))
197 
198 
199 /*
200  * The following types are used to provide lists of rw-conflicts between
201  * pairs of transactions. Since exactly the same information is needed,
202  * they are also used to record possible unsafe transaction relationships
203  * for purposes of identifying safe snapshots for read-only transactions.
204  *
205  * When a RWConflictData is not in use to record either type of relationship
206  * between a pair of transactions, it is kept on an "available" list. The
207  * outLink field is used for maintaining that list.
208  */
209 typedef struct RWConflictData
210 {
211  SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
212  SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
216 
217 typedef struct RWConflictData *RWConflict;
218 
219 #define RWConflictDataSize \
220  ((Size)MAXALIGN(sizeof(RWConflictData)))
221 
223 {
227 
229 
230 #define RWConflictPoolHeaderDataSize \
231  ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
232 
233 
234 /*
235  * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
236  * transaction or any of its subtransactions.
237  */
238 typedef struct SERIALIZABLEXIDTAG
239 {
242 
243 /*
244  * The SERIALIZABLEXID struct provides a link from a TransactionId for a
245  * serializable transaction to the related SERIALIZABLEXACT record, even if
246  * the transaction has completed and its connection has been closed.
247  *
248  * These are created as new top level transaction IDs are first assigned to
249  * transactions which are participating in predicate locking. This may
250  * never happen for a particular transaction if it doesn't write anything.
251  * They are removed with their related serializable transaction objects.
252  *
253  * The SubTransGetTopmostTransaction method is used where necessary to get
254  * from an XID which might be from a subtransaction to the top level XID.
255  */
256 typedef struct SERIALIZABLEXID
257 {
258  /* hash key */
260 
261  /* data */
262  SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
264 
265 
266 /*
267  * The PREDICATELOCKTARGETTAG struct identifies a database object which can
268  * be the target of predicate locks.
269  *
270  * Note that the hash function being used doesn't properly respect tag
271  * length -- if the length of the structure isn't a multiple of four bytes it
272  * will go to a four byte boundary past the end of the tag. If you change
273  * this struct, make sure any slack space is initialized, so that any random
274  * bytes in the middle or at the end are not included in the hash.
275  *
276  * TODO SSI: If we always use the same fields for the same type of value, we
277  * should rename these. Holding off until it's clear there are no exceptions.
278  * Since indexes are relations with blocks and tuples, it's looking likely that
279  * the rename will be possible. If not, we may need to divide the last field
280  * and use part of it for a target type, so that we know how to interpret the
281  * data..
282  */
284 {
285  uint32 locktag_field1; /* a 32-bit ID field */
286  uint32 locktag_field2; /* a 32-bit ID field */
287  uint32 locktag_field3; /* a 32-bit ID field */
288  uint32 locktag_field4; /* a 32-bit ID field */
290 
291 /*
292  * The PREDICATELOCKTARGET struct represents a database object on which there
293  * are predicate locks.
294  *
295  * A hash list of these objects is maintained in shared memory. An entry is
296  * added when a predicate lock is requested on an object which doesn't
297  * already have one. An entry is removed when the last lock is removed from
298  * its list.
299  */
300 typedef struct PREDICATELOCKTARGET
301 {
302  /* hash key */
303  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
304 
305  /* data */
306  SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
307  * predicate lock target */
309 
310 
311 /*
312  * The PREDICATELOCKTAG struct identifies an individual predicate lock.
313  *
314  * It is the combination of predicate lock target (which is a lockable
315  * object) and a serializable transaction which has acquired a lock on that
316  * target.
317  */
318 typedef struct PREDICATELOCKTAG
319 {
323 
324 /*
325  * The PREDICATELOCK struct represents an individual lock.
326  *
327  * An entry can be created here when the related database object is read, or
328  * by promotion of multiple finer-grained targets. All entries related to a
329  * serializable transaction are removed when that serializable transaction is
330  * cleaned up. Entries can also be removed when they are combined into a
331  * single coarser-grained lock entry.
332  */
333 typedef struct PREDICATELOCK
334 {
335  /* hash key */
336  PREDICATELOCKTAG tag; /* unique identifier of lock */
337 
338  /* data */
339  SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
340  * predicate locks */
341  SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
342  * predicate locks */
343  SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
345 
346 
347 /*
348  * The LOCALPREDICATELOCK struct represents a local copy of data which is
349  * also present in the PREDICATELOCK table, organized for fast access without
350  * needing to acquire a LWLock. It is strictly for optimization.
351  *
352  * Each serializable transaction creates its own local hash table to hold a
353  * collection of these. This information is used to determine when a number
354  * of fine-grained locks should be promoted to a single coarser-grained lock.
355  * The information is maintained more-or-less in parallel to the
356  * PREDICATELOCK data, but because this data is not protected by locks and is
357  * only used in an optimization heuristic, it is allowed to drift in a few
358  * corner cases where maintaining exact data would be expensive.
359  *
360  * The hash table is created when the serializable transaction acquires its
361  * snapshot, and its memory is released upon completion of the transaction.
362  */
363 typedef struct LOCALPREDICATELOCK
364 {
365  /* hash key */
366  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
367 
368  /* data */
369  bool held; /* is lock held, or just its children? */
370  int childLocks; /* number of child locks currently held */
372 
373 
374 /*
375  * The types of predicate locks which can be acquired.
376  */
378 {
382  /* TODO SSI: Other types may be needed for index locking */
384 
385 
386 /*
387  * This structure is used to quickly capture a copy of all predicate
388  * locks. This is currently used only by the pg_lock_status function,
389  * which in turn is used by the pg_locks view.
390  */
391 typedef struct PredicateLockData
392 {
397 
398 
399 /*
400  * These macros define how we map logical IDs of lockable objects into the
401  * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
402  * rather than accessing the fields directly. Note multiple eval of target!
403  */
404 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
405  ((locktag).locktag_field1 = (dboid), \
406  (locktag).locktag_field2 = (reloid), \
407  (locktag).locktag_field3 = InvalidBlockNumber, \
408  (locktag).locktag_field4 = InvalidOffsetNumber)
409 
410 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
411  ((locktag).locktag_field1 = (dboid), \
412  (locktag).locktag_field2 = (reloid), \
413  (locktag).locktag_field3 = (blocknum), \
414  (locktag).locktag_field4 = InvalidOffsetNumber)
415 
416 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
417  ((locktag).locktag_field1 = (dboid), \
418  (locktag).locktag_field2 = (reloid), \
419  (locktag).locktag_field3 = (blocknum), \
420  (locktag).locktag_field4 = (offnum))
421 
422 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
423  ((Oid) (locktag).locktag_field1)
424 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
425  ((Oid) (locktag).locktag_field2)
426 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
427  ((BlockNumber) (locktag).locktag_field3)
428 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
429  ((OffsetNumber) (locktag).locktag_field4)
430 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
431  (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
432  (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
433  PREDLOCKTAG_RELATION))
434 
435 /*
436  * Two-phase commit statefile records. There are two types: for each
437  * transaction, we generate one per-transaction record and a variable
438  * number of per-predicate-lock records.
439  */
441 {
445 
446 /*
447  * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
448  * much is needed because most of it not meaningful for a recovered
449  * prepared transaction.
450  *
451  * In particular, we do not record the in and out conflict lists for a
452  * prepared transaction because the associated SERIALIZABLEXACTs will
453  * not be available after recovery. Instead, we simply record the
454  * existence of each type of conflict by setting the transaction's
455  * summary conflict in/out flag.
456  */
458 {
462 
463 /* Per-lock state */
465 {
467  uint32 filler; /* to avoid length change in back-patched fix */
469 
471 {
473  union
474  {
477  } data;
479 
480 /*
481  * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
482  */
483 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
484 
485 
486 /*
487  * Function definitions for functions needing awareness of predicate
488  * locking internals.
489  */
491 extern int GetSafeSnapshotBlockingPids(int blocked_pid,
492  int *output, int output_size);
493 
494 #endif /* PREDICATE_INTERNALS_H */
unsigned int uint32
Definition: c.h:441
uint32 TransactionId
Definition: c.h:587
static void output(uint64 loop_count)
struct RWConflictData RWConflictData
struct PredXactListData * PredXactList
struct TwoPhasePredicateRecord TwoPhasePredicateRecord
struct RWConflictPoolHeaderData RWConflictPoolHeaderData
PredicateLockData * GetPredicateLockStatusData(void)
Definition: predicate.c:1444
struct SERIALIZABLEXACT SERIALIZABLEXACT
TwoPhasePredicateRecordType
@ TWOPHASEPREDICATERECORD_XACT
@ TWOPHASEPREDICATERECORD_LOCK
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
Definition: predicate.c:1629
struct TwoPhasePredicateLockRecord TwoPhasePredicateLockRecord
PredicateLockTargetType
@ PREDLOCKTAG_RELATION
@ PREDLOCKTAG_PAGE
@ PREDLOCKTAG_TUPLE
struct PREDICATELOCKTAG PREDICATELOCKTAG
struct TwoPhasePredicateXactRecord TwoPhasePredicateXactRecord
struct LOCALPREDICATELOCK LOCALPREDICATELOCK
struct RWConflictPoolHeaderData * RWConflictPoolHeader
struct SERIALIZABLEXIDTAG SERIALIZABLEXIDTAG
struct PredXactListElementData * PredXactListElement
struct PREDICATELOCKTARGET PREDICATELOCKTARGET
struct PredXactListElementData PredXactListElementData
struct PREDICATELOCKTARGETTAG PREDICATELOCKTARGETTAG
struct SERIALIZABLEXID SERIALIZABLEXID
struct PredicateLockData PredicateLockData
struct RWConflictData * RWConflict
uint64 SerCommitSeqNo
struct PredXactListData PredXactListData
struct PREDICATELOCK PREDICATELOCK
PREDICATELOCKTARGETTAG tag
Definition: lwlock.h:32
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGET * myTarget
PREDICATELOCKTARGETTAG tag
PREDICATELOCKTAG tag
SerCommitSeqNo commitSeqNo
PredXactListElement element
SerCommitSeqNo LastSxactCommitSeqNo
SerCommitSeqNo CanPartialClearThrough
SERIALIZABLEXACT * OldCommittedSxact
SerCommitSeqNo HavePartialClearedThrough
TransactionId SxactGlobalXmin
PREDICATELOCKTARGETTAG * locktags
SERIALIZABLEXACT * xacts
SERIALIZABLEXACT * sxactIn
SERIALIZABLEXACT * sxactOut
SHM_QUEUE possibleUnsafeConflicts
VirtualTransactionId vxid
union SERIALIZABLEXACT::@115 SeqNo
SerCommitSeqNo lastCommitBeforeSnapshot
SerCommitSeqNo prepareSeqNo
SerCommitSeqNo commitSeqNo
TransactionId finishedBefore
SerCommitSeqNo earliestOutConflictCommit
SERIALIZABLEXIDTAG tag
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGETTAG target
TwoPhasePredicateRecordType type
union TwoPhasePredicateRecord::@116 data
TwoPhasePredicateLockRecord lockRecord
TwoPhasePredicateXactRecord xactRecord