PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
predicate_internals.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * predicate_internals.h
4  * POSTGRES internal predicate locking definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/predicate_internals.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef PREDICATE_INTERNALS_H
15 #define PREDICATE_INTERNALS_H
16 
17 #include "storage/lock.h"
18 
19 /*
20  * Commit number.
21  */
22 typedef uint64 SerCommitSeqNo;
23 
24 /*
25  * Reserved commit sequence numbers:
26  * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
27  * used as a SerCommitSeqNo, even an invalid one
28  * - InvalidSerCommitSeqNo is used to indicate a transaction that
29  * hasn't committed yet, so use a number greater than all valid
30  * ones to make comparison do the expected thing
31  * - RecoverySerCommitSeqNo is used to refer to transactions that
32  * happened before a crash/recovery, since we restart the sequence
33  * at that point. It's earlier than all normal sequence numbers,
34  * and is only used by recovered prepared transactions
35  */
36 #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
37 #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
38 #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
39 
40 /*
41  * The SERIALIZABLEXACT struct contains information needed for each
42  * serializable database transaction to support SSI techniques.
43  *
44  * A home-grown list is maintained in shared memory to manage these.
45  * An entry is used when the serializable transaction acquires a snapshot.
46  * Unless the transaction is rolled back, this entry must generally remain
47  * until all concurrent transactions have completed. (There are special
48  * optimizations for READ ONLY transactions which often allow them to be
49  * cleaned up earlier.) A transaction which is rolled back is cleaned up
50  * as soon as possible.
51  *
52  * Eligibility for cleanup of committed transactions is generally determined
53  * by comparing the transaction's finishedBefore field to
54  * SerializableGlobalXmin.
55  */
56 typedef struct SERIALIZABLEXACT
57 {
58  VirtualTransactionId vxid; /* The executing process always has one of
59  * these. */
60 
61  /*
62  * We use two numbers to track the order that transactions commit. Before
63  * commit, a transaction is marked as prepared, and prepareSeqNo is set.
64  * Shortly after commit, it's marked as committed, and commitSeqNo is set.
65  * This doesn't give a strict commit order, but these two values together
66  * are good enough for us, as we can always err on the safe side and
67  * assume that there's a conflict, if we can't be sure of the exact
68  * ordering of two commits.
69  *
70  * Note that a transaction is marked as prepared for a short period during
71  * commit processing, even if two-phase commit is not used. But with
72  * two-phase commit, a transaction can stay in prepared state for some
73  * time.
74  */
77 
78  /* these values are not both interesting at the same time */
79  union
80  {
81  SerCommitSeqNo earliestOutConflictCommit; /* when committed with
82  * conflict out */
83  SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
84  * no conflict out */
85  } SeqNo;
86  SHM_QUEUE outConflicts; /* list of write transactions whose data we
87  * couldn't read. */
88  SHM_QUEUE inConflicts; /* list of read transactions which couldn't
89  * see our write. */
90  SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
91  SHM_QUEUE finishedLink; /* list link in
92  * FinishedSerializableTransactions */
93 
94  /*
95  * for r/o transactions: list of concurrent r/w transactions that we could
96  * potentially have conflicts with, and vice versa for r/w transactions
97  */
99 
100  TransactionId topXid; /* top level xid for the transaction, if one
101  * exists; else invalid */
102  TransactionId finishedBefore; /* invalid means still running; else
103  * the struct expires when no
104  * serializable xids are before this. */
105  TransactionId xmin; /* the transaction's snapshot xmin */
106  uint32 flags; /* OR'd combination of values defined below */
107  int pid; /* pid of associated process */
109 
110 #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
111 #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
112 #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
113 #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
114 /*
115  * The following flag actually means that the flagged transaction has a
116  * conflict out *to a transaction which committed ahead of it*. It's hard
117  * to get that into a name of a reasonable length.
118  */
119 #define SXACT_FLAG_CONFLICT_OUT 0x00000010
120 #define SXACT_FLAG_READ_ONLY 0x00000020
121 #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
122 #define SXACT_FLAG_RO_SAFE 0x00000080
123 #define SXACT_FLAG_RO_UNSAFE 0x00000100
124 #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
125 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
126 
127 /*
128  * The following types are used to provide an ad hoc list for holding
129  * SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
130  * access these by key -- there are direct pointers to these objects where
131  * needed. If a shared memory list is created, these types can probably be
132  * eliminated in favor of using the general solution.
133  */
135 {
139 
141 
142 #define PredXactListElementDataSize \
143  ((Size)MAXALIGN(sizeof(PredXactListElementData)))
144 
145 typedef struct PredXactListData
146 {
149 
150  /*
151  * These global variables are maintained when registering and cleaning up
152  * serializable transactions. They must be global across all backends,
153  * but are not needed outside the predicate.c source file. Protected by
154  * SerializableXactHashLock.
155  */
156  TransactionId SxactGlobalXmin; /* global xmin for active serializable
157  * transactions */
158  int SxactGlobalXminCount; /* how many active serializable
159  * transactions have this xmin */
160  int WritableSxactCount; /* how many non-read-only serializable
161  * transactions are active */
162  SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
163  * increasing number for
164  * commits of serializable
165  * transactions */
166  /* Protected by SerializableXactHashLock. */
167  SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks
168  * and inConflicts for
169  * committed transactions
170  * through this seq no */
171  /* Protected by SerializableFinishedListLock. */
172  SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
173  * seq no */
174  SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
175 
176  PredXactListElement element;
178 
180 
181 #define PredXactListDataSize \
182  ((Size)MAXALIGN(sizeof(PredXactListData)))
183 
184 
185 /*
186  * The following types are used to provide lists of rw-conflicts between
187  * pairs of transactions. Since exactly the same information is needed,
188  * they are also used to record possible unsafe transaction relationships
189  * for purposes of identifying safe snapshots for read-only transactions.
190  *
191  * When a RWConflictData is not in use to record either type of relationship
192  * between a pair of transactions, it is kept on an "available" list. The
193  * outLink field is used for maintaining that list.
194  */
195 typedef struct RWConflictData
196 {
197  SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
198  SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
202 
203 typedef struct RWConflictData *RWConflict;
204 
205 #define RWConflictDataSize \
206  ((Size)MAXALIGN(sizeof(RWConflictData)))
207 
209 {
211  RWConflict element;
213 
215 
216 #define RWConflictPoolHeaderDataSize \
217  ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
218 
219 
220 /*
221  * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
222  * transaction or any of its subtransactions.
223  */
224 typedef struct SERIALIZABLEXIDTAG
225 {
228 
229 /*
230  * The SERIALIZABLEXID struct provides a link from a TransactionId for a
231  * serializable transaction to the related SERIALIZABLEXACT record, even if
232  * the transaction has completed and its connection has been closed.
233  *
234  * These are created as new top level transaction IDs are first assigned to
235  * transactions which are participating in predicate locking. This may
236  * never happen for a particular transaction if it doesn't write anything.
237  * They are removed with their related serializable transaction objects.
238  *
239  * The SubTransGetTopmostTransaction method is used where necessary to get
240  * from an XID which might be from a subtransaction to the top level XID.
241  */
242 typedef struct SERIALIZABLEXID
243 {
244  /* hash key */
246 
247  /* data */
248  SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
250 
251 
252 /*
253  * The PREDICATELOCKTARGETTAG struct identifies a database object which can
254  * be the target of predicate locks.
255  *
256  * Note that the hash function being used doesn't properly respect tag
257  * length -- if the length of the structure isn't a multiple of four bytes it
258  * will go to a four byte boundary past the end of the tag. If you change
259  * this struct, make sure any slack space is initialized, so that any random
260  * bytes in the middle or at the end are not included in the hash.
261  *
262  * TODO SSI: If we always use the same fields for the same type of value, we
263  * should rename these. Holding off until it's clear there are no exceptions.
264  * Since indexes are relations with blocks and tuples, it's looking likely that
265  * the rename will be possible. If not, we may need to divide the last field
266  * and use part of it for a target type, so that we know how to interpret the
267  * data..
268  */
270 {
271  uint32 locktag_field1; /* a 32-bit ID field */
272  uint32 locktag_field2; /* a 32-bit ID field */
273  uint32 locktag_field3; /* a 32-bit ID field */
274  uint32 locktag_field4; /* a 32-bit ID field */
276 
277 /*
278  * The PREDICATELOCKTARGET struct represents a database object on which there
279  * are predicate locks.
280  *
281  * A hash list of these objects is maintained in shared memory. An entry is
282  * added when a predicate lock is requested on an object which doesn't
283  * already have one. An entry is removed when the last lock is removed from
284  * its list.
285  */
286 typedef struct PREDICATELOCKTARGET
287 {
288  /* hash key */
289  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
290 
291  /* data */
292  SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
293  * predicate lock target */
295 
296 
297 /*
298  * The PREDICATELOCKTAG struct identifies an individual predicate lock.
299  *
300  * It is the combination of predicate lock target (which is a lockable
301  * object) and a serializable transaction which has acquired a lock on that
302  * target.
303  */
304 typedef struct PREDICATELOCKTAG
305 {
309 
310 /*
311  * The PREDICATELOCK struct represents an individual lock.
312  *
313  * An entry can be created here when the related database object is read, or
314  * by promotion of multiple finer-grained targets. All entries related to a
315  * serializable transaction are removed when that serializable transaction is
316  * cleaned up. Entries can also be removed when they are combined into a
317  * single coarser-grained lock entry.
318  */
319 typedef struct PREDICATELOCK
320 {
321  /* hash key */
322  PREDICATELOCKTAG tag; /* unique identifier of lock */
323 
324  /* data */
325  SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
326  * predicate locks */
327  SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
328  * predicate locks */
329  SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
330 } PREDICATELOCK;
331 
332 
333 /*
334  * The LOCALPREDICATELOCK struct represents a local copy of data which is
335  * also present in the PREDICATELOCK table, organized for fast access without
336  * needing to acquire a LWLock. It is strictly for optimization.
337  *
338  * Each serializable transaction creates its own local hash table to hold a
339  * collection of these. This information is used to determine when a number
340  * of fine-grained locks should be promoted to a single coarser-grained lock.
341  * The information is maintained more-or-less in parallel to the
342  * PREDICATELOCK data, but because this data is not protected by locks and is
343  * only used in an optimization heuristic, it is allowed to drift in a few
344  * corner cases where maintaining exact data would be expensive.
345  *
346  * The hash table is created when the serializable transaction acquires its
347  * snapshot, and its memory is released upon completion of the transaction.
348  */
349 typedef struct LOCALPREDICATELOCK
350 {
351  /* hash key */
352  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
353 
354  /* data */
355  bool held; /* is lock held, or just its children? */
356  int childLocks; /* number of child locks currently held */
358 
359 
360 /*
361  * The types of predicate locks which can be acquired.
362  */
364 {
368  /* TODO SSI: Other types may be needed for index locking */
370 
371 
372 /*
373  * This structure is used to quickly capture a copy of all predicate
374  * locks. This is currently used only by the pg_lock_status function,
375  * which in turn is used by the pg_locks view.
376  */
377 typedef struct PredicateLockData
378 {
383 
384 
385 /*
386  * These macros define how we map logical IDs of lockable objects into the
387  * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
388  * rather than accessing the fields directly. Note multiple eval of target!
389  */
390 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
391  ((locktag).locktag_field1 = (dboid), \
392  (locktag).locktag_field2 = (reloid), \
393  (locktag).locktag_field3 = InvalidBlockNumber, \
394  (locktag).locktag_field4 = InvalidOffsetNumber)
395 
396 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
397  ((locktag).locktag_field1 = (dboid), \
398  (locktag).locktag_field2 = (reloid), \
399  (locktag).locktag_field3 = (blocknum), \
400  (locktag).locktag_field4 = InvalidOffsetNumber)
401 
402 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
403  ((locktag).locktag_field1 = (dboid), \
404  (locktag).locktag_field2 = (reloid), \
405  (locktag).locktag_field3 = (blocknum), \
406  (locktag).locktag_field4 = (offnum))
407 
408 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
409  ((Oid) (locktag).locktag_field1)
410 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
411  ((Oid) (locktag).locktag_field2)
412 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
413  ((BlockNumber) (locktag).locktag_field3)
414 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
415  ((OffsetNumber) (locktag).locktag_field4)
416 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
417  (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
418  (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
419  PREDLOCKTAG_RELATION))
420 
421 /*
422  * Two-phase commit statefile records. There are two types: for each
423  * transaction, we generate one per-transaction record and a variable
424  * number of per-predicate-lock records.
425  */
427 {
431 
432 /*
433  * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
434  * much is needed because most of it not meaningful for a recovered
435  * prepared transaction.
436  *
437  * In particular, we do not record the in and out conflict lists for a
438  * prepared transaction because the associated SERIALIZABLEXACTs will
439  * not be available after recovery. Instead, we simply record the
440  * existence of each type of conflict by setting the transaction's
441  * summary conflict in/out flag.
442  */
444 {
448 
449 /* Per-lock state */
451 {
453  uint32 filler; /* to avoid length change in back-patched fix */
455 
457 {
459  union
460  {
463  } data;
465 
466 /*
467  * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
468  */
469 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
470 
471 
472 /*
473  * Function definitions for functions needing awareness of predicate
474  * locking internals.
475  */
477 
478 #endif /* PREDICATE_INTERNALS_H */
TransactionId finishedBefore
struct RWConflictPoolHeaderData * RWConflictPoolHeader
struct TwoPhasePredicateRecord TwoPhasePredicateRecord
struct SERIALIZABLEXID SERIALIZABLEXID
PredicateLockTargetType
uint32 TransactionId
Definition: c.h:393
struct RWConflictPoolHeaderData RWConflictPoolHeaderData
PredicateLockData * GetPredicateLockStatusData(void)
Definition: predicate.c:1376
TransactionId SxactGlobalXmin
struct SERIALIZABLEXIDTAG SERIALIZABLEXIDTAG
struct PREDICATELOCKTARGET PREDICATELOCKTARGET
struct RWConflictData * RWConflict
struct TwoPhasePredicateLockRecord TwoPhasePredicateLockRecord
SERIALIZABLEXACT * xacts
SERIALIZABLEXACT * myXact
TwoPhasePredicateRecordType type
union TwoPhasePredicateRecord::@57 data
PREDICATELOCKTARGETTAG target
SERIALIZABLEXACT * sxactIn
struct PredXactListData PredXactListData
struct RWConflictData RWConflictData
SHM_QUEUE possibleUnsafeConflicts
union SERIALIZABLEXACT::@56 SeqNo
struct PREDICATELOCK PREDICATELOCK
struct PREDICATELOCKTAG PREDICATELOCKTAG
TwoPhasePredicateXactRecord xactRecord
struct LOCALPREDICATELOCK LOCALPREDICATELOCK
PREDICATELOCKTARGETTAG tag
VirtualTransactionId vxid
TwoPhasePredicateRecordType
unsigned int uint32
Definition: c.h:265
struct PredXactListElementData PredXactListElementData
SerCommitSeqNo lastCommitBeforeSnapshot
PREDICATELOCKTARGETTAG * locktags
SerCommitSeqNo commitSeqNo
SerCommitSeqNo HavePartialClearedThrough
PREDICATELOCKTAG tag
SerCommitSeqNo CanPartialClearThrough
SerCommitSeqNo earliestOutConflictCommit
PREDICATELOCKTARGETTAG tag
SerCommitSeqNo commitSeqNo
uint64 SerCommitSeqNo
struct PredicateLockData PredicateLockData
struct PREDICATELOCKTARGETTAG PREDICATELOCKTARGETTAG
SerCommitSeqNo prepareSeqNo
SERIALIZABLEXIDTAG tag
SerCommitSeqNo LastSxactCommitSeqNo
struct TwoPhasePredicateXactRecord TwoPhasePredicateXactRecord
SERIALIZABLEXACT * OldCommittedSxact
SERIALIZABLEXACT * myXact
struct PredXactListElementData * PredXactListElement
TwoPhasePredicateLockRecord lockRecord
PredXactListElement element
struct SERIALIZABLEXACT SERIALIZABLEXACT
PREDICATELOCKTARGET * myTarget
struct PredXactListData * PredXactList
SERIALIZABLEXACT * sxactOut