PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
predicate_internals.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * predicate_internals.h
4  * POSTGRES internal predicate locking definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/predicate_internals.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef PREDICATE_INTERNALS_H
15 #define PREDICATE_INTERNALS_H
16 
17 #include "storage/lock.h"
18 
19 /*
20  * Commit number.
21  */
22 typedef uint64 SerCommitSeqNo;
23 
24 /*
25  * Reserved commit sequence numbers:
26  * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
27  * used as a SerCommitSeqNo, even an invalid one
28  * - InvalidSerCommitSeqNo is used to indicate a transaction that
29  * hasn't committed yet, so use a number greater than all valid
30  * ones to make comparison do the expected thing
31  * - RecoverySerCommitSeqNo is used to refer to transactions that
32  * happened before a crash/recovery, since we restart the sequence
33  * at that point. It's earlier than all normal sequence numbers,
34  * and is only used by recovered prepared transactions
35  */
36 #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
37 #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
38 #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
39 
40 /*
41  * The SERIALIZABLEXACT struct contains information needed for each
42  * serializable database transaction to support SSI techniques.
43  *
44  * A home-grown list is maintained in shared memory to manage these.
45  * An entry is used when the serializable transaction acquires a snapshot.
46  * Unless the transaction is rolled back, this entry must generally remain
47  * until all concurrent transactions have completed. (There are special
48  * optimizations for READ ONLY transactions which often allow them to be
49  * cleaned up earlier.) A transaction which is rolled back is cleaned up
50  * as soon as possible.
51  *
52  * Eligibility for cleanup of committed transactions is generally determined
53  * by comparing the transaction's finishedBefore field to
54  * SerializableGlobalXmin.
55  */
56 typedef struct SERIALIZABLEXACT
57 {
58  VirtualTransactionId vxid; /* The executing process always has one of
59  * these. */
60 
61  /*
62  * We use two numbers to track the order that transactions commit. Before
63  * commit, a transaction is marked as prepared, and prepareSeqNo is set.
64  * Shortly after commit, it's marked as committed, and commitSeqNo is set.
65  * This doesn't give a strict commit order, but these two values together
66  * are good enough for us, as we can always err on the safe side and
67  * assume that there's a conflict, if we can't be sure of the exact
68  * ordering of two commits.
69  *
70  * Note that a transaction is marked as prepared for a short period during
71  * commit processing, even if two-phase commit is not used. But with
72  * two-phase commit, a transaction can stay in prepared state for some
73  * time.
74  */
77 
78  /* these values are not both interesting at the same time */
79  union
80  {
81  SerCommitSeqNo earliestOutConflictCommit; /* when committed with
82  * conflict out */
83  SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
84  * no conflict out */
85  } SeqNo;
86  SHM_QUEUE outConflicts; /* list of write transactions whose data we
87  * couldn't read. */
88  SHM_QUEUE inConflicts; /* list of read transactions which couldn't
89  * see our write. */
90  SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
91  SHM_QUEUE finishedLink; /* list link in
92  * FinishedSerializableTransactions */
93 
94  /*
95  * for r/o transactions: list of concurrent r/w transactions that we could
96  * potentially have conflicts with, and vice versa for r/w transactions
97  */
99 
100  TransactionId topXid; /* top level xid for the transaction, if one
101  * exists; else invalid */
102  TransactionId finishedBefore; /* invalid means still running; else
103  * the struct expires when no
104  * serializable xids are before this. */
105  TransactionId xmin; /* the transaction's snapshot xmin */
106  uint32 flags; /* OR'd combination of values defined below */
107  int pid; /* pid of associated process */
109 
110 #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
111 #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
112 #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
113 #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
114 /*
115  * The following flag actually means that the flagged transaction has a
116  * conflict out *to a transaction which committed ahead of it*. It's hard
117  * to get that into a name of a reasonable length.
118  */
119 #define SXACT_FLAG_CONFLICT_OUT 0x00000010
120 #define SXACT_FLAG_READ_ONLY 0x00000020
121 #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
122 #define SXACT_FLAG_RO_SAFE 0x00000080
123 #define SXACT_FLAG_RO_UNSAFE 0x00000100
124 #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
125 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
126 
127 /*
128  * The following types are used to provide an ad hoc list for holding
129  * SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
130  * access these by key -- there are direct pointers to these objects where
131  * needed. If a shared memory list is created, these types can probably be
132  * eliminated in favor of using the general solution.
133  */
135 {
139 
141 
142 #define PredXactListElementDataSize \
143  ((Size)MAXALIGN(sizeof(PredXactListElementData)))
144 
145 typedef struct PredXactListData
146 {
149 
150  /*
151  * These global variables are maintained when registering and cleaning up
152  * serializable transactions. They must be global across all backends,
153  * but are not needed outside the predicate.c source file. Protected by
154  * SerializableXactHashLock.
155  */
156  TransactionId SxactGlobalXmin; /* global xmin for active serializable
157  * transactions */
158  int SxactGlobalXminCount; /* how many active serializable
159  * transactions have this xmin */
160  int WritableSxactCount; /* how many non-read-only serializable
161  * transactions are active */
162  SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
163  * increasing number for
164  * commits of serializable
165  * transactions */
166  /* Protected by SerializableXactHashLock. */
167  SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks
168  * and inConflicts for
169  * committed transactions
170  * through this seq no */
171  /* Protected by SerializableFinishedListLock. */
172  SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
173  * seq no */
174  SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
175 
176  PredXactListElement element;
178 
180 
181 #define PredXactListDataSize \
182  ((Size)MAXALIGN(sizeof(PredXactListData)))
183 
184 
185 /*
186  * The following types are used to provide lists of rw-conflicts between
187  * pairs of transactions. Since exactly the same information is needed,
188  * they are also used to record possible unsafe transaction relationships
189  * for purposes of identifying safe snapshots for read-only transactions.
190  *
191  * When a RWConflictData is not in use to record either type of relationship
192  * between a pair of transactions, it is kept on an "available" list. The
193  * outLink field is used for maintaining that list.
194  */
195 typedef struct RWConflictData
196 {
197  SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
198  SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
202 
203 typedef struct RWConflictData *RWConflict;
204 
205 #define RWConflictDataSize \
206  ((Size)MAXALIGN(sizeof(RWConflictData)))
207 
209 {
211  RWConflict element;
213 
215 
216 #define RWConflictPoolHeaderDataSize \
217  ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
218 
219 
220 /*
221  * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
222  * transaction or any of its subtransactions.
223  */
224 typedef struct SERIALIZABLEXIDTAG
225 {
228 
229 /*
230  * The SERIALIZABLEXID struct provides a link from a TransactionId for a
231  * serializable transaction to the related SERIALIZABLEXACT record, even if
232  * the transaction has completed and its connection has been closed.
233  *
234  * These are created as new top level transaction IDs are first assigned to
235  * transactions which are participating in predicate locking. This may
236  * never happen for a particular transaction if it doesn't write anything.
237  * They are removed with their related serializable transaction objects.
238  *
239  * The SubTransGetTopmostTransaction method is used where necessary to get
240  * from an XID which might be from a subtransaction to the top level XID.
241  */
242 typedef struct SERIALIZABLEXID
243 {
244  /* hash key */
246 
247  /* data */
248  SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
250 
251 
252 /*
253  * The PREDICATELOCKTARGETTAG struct identifies a database object which can
254  * be the target of predicate locks.
255  *
256  * Note that the hash function being used doesn't properly respect tag
257  * length -- if the length of the structure isn't a multiple of four bytes it
258  * will go to a four byte boundary past the end of the tag. If you change
259  * this struct, make sure any slack space is initialized, so that any random
260  * bytes in the middle or at the end are not included in the hash.
261  *
262  * TODO SSI: If we always use the same fields for the same type of value, we
263  * should rename these. Holding off until it's clear there are no exceptions.
264  * Since indexes are relations with blocks and tuples, it's looking likely that
265  * the rename will be possible. If not, we may need to divide the last field
266  * and use part of it for a target type, so that we know how to interpret the
267  * data..
268  */
270 {
271  uint32 locktag_field1; /* a 32-bit ID field */
272  uint32 locktag_field2; /* a 32-bit ID field */
273  uint32 locktag_field3; /* a 32-bit ID field */
274  uint32 locktag_field4; /* a 32-bit ID field */
276 
277 /*
278  * The PREDICATELOCKTARGET struct represents a database object on which there
279  * are predicate locks.
280  *
281  * A hash list of these objects is maintained in shared memory. An entry is
282  * added when a predicate lock is requested on an object which doesn't
283  * already have one. An entry is removed when the last lock is removed from
284  * its list.
285  */
286 typedef struct PREDICATELOCKTARGET
287 {
288  /* hash key */
289  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
290 
291  /* data */
292  SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
293  * predicate lock target */
295 
296 
297 /*
298  * The PREDICATELOCKTAG struct identifies an individual predicate lock.
299  *
300  * It is the combination of predicate lock target (which is a lockable
301  * object) and a serializable transaction which has acquired a lock on that
302  * target.
303  */
304 typedef struct PREDICATELOCKTAG
305 {
309 
310 /*
311  * The PREDICATELOCK struct represents an individual lock.
312  *
313  * An entry can be created here when the related database object is read, or
314  * by promotion of multiple finer-grained targets. All entries related to a
315  * serializable transaction are removed when that serializable transaction is
316  * cleaned up. Entries can also be removed when they are combined into a
317  * single coarser-grained lock entry.
318  */
319 typedef struct PREDICATELOCK
320 {
321  /* hash key */
322  PREDICATELOCKTAG tag; /* unique identifier of lock */
323 
324  /* data */
325  SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
326  * predicate locks */
327  SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
328  * predicate locks */
329  SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
330 } PREDICATELOCK;
331 
332 
333 /*
334  * The LOCALPREDICATELOCK struct represents a local copy of data which is
335  * also present in the PREDICATELOCK table, organized for fast access without
336  * needing to acquire a LWLock. It is strictly for optimization.
337  *
338  * Each serializable transaction creates its own local hash table to hold a
339  * collection of these. This information is used to determine when a number
340  * of fine-grained locks should be promoted to a single coarser-grained lock.
341  * The information is maintained more-or-less in parallel to the
342  * PREDICATELOCK data, but because this data is not protected by locks and is
343  * only used in an optimization heuristic, it is allowed to drift in a few
344  * corner cases where maintaining exact data would be expensive.
345  *
346  * The hash table is created when the serializable transaction acquires its
347  * snapshot, and its memory is released upon completion of the transaction.
348  */
349 typedef struct LOCALPREDICATELOCK
350 {
351  /* hash key */
352  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
353 
354  /* data */
355  bool held; /* is lock held, or just its children? */
356  int childLocks; /* number of child locks currently held */
358 
359 
360 /*
361  * The types of predicate locks which can be acquired.
362  */
364 {
368  /* TODO SSI: Other types may be needed for index locking */
370 
371 
372 /*
373  * This structure is used to quickly capture a copy of all predicate
374  * locks. This is currently used only by the pg_lock_status function,
375  * which in turn is used by the pg_locks view.
376  */
377 typedef struct PredicateLockData
378 {
383 
384 
385 /*
386  * These macros define how we map logical IDs of lockable objects into the
387  * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
388  * rather than accessing the fields directly. Note multiple eval of target!
389  */
390 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
391  ((locktag).locktag_field1 = (dboid), \
392  (locktag).locktag_field2 = (reloid), \
393  (locktag).locktag_field3 = InvalidBlockNumber, \
394  (locktag).locktag_field4 = InvalidOffsetNumber)
395 
396 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
397  ((locktag).locktag_field1 = (dboid), \
398  (locktag).locktag_field2 = (reloid), \
399  (locktag).locktag_field3 = (blocknum), \
400  (locktag).locktag_field4 = InvalidOffsetNumber)
401 
402 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
403  ((locktag).locktag_field1 = (dboid), \
404  (locktag).locktag_field2 = (reloid), \
405  (locktag).locktag_field3 = (blocknum), \
406  (locktag).locktag_field4 = (offnum))
407 
408 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
409  ((Oid) (locktag).locktag_field1)
410 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
411  ((Oid) (locktag).locktag_field2)
412 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
413  ((BlockNumber) (locktag).locktag_field3)
414 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
415  ((OffsetNumber) (locktag).locktag_field4)
416 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
417  (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
418  (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
419  PREDLOCKTAG_RELATION))
420 
421 /*
422  * Two-phase commit statefile records. There are two types: for each
423  * transaction, we generate one per-transaction record and a variable
424  * number of per-predicate-lock records.
425  */
427 {
431 
432 /*
433  * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
434  * much is needed because most of it not meaningful for a recovered
435  * prepared transaction.
436  *
437  * In particular, we do not record the in and out conflict lists for a
438  * prepared transaction because the associated SERIALIZABLEXACTs will
439  * not be available after recovery. Instead, we simply record the
440  * existence of each type of conflict by setting the transaction's
441  * summary conflict in/out flag.
442  */
444 {
448 
449 /* Per-lock state */
451 {
453  uint32 filler; /* to avoid length change in back-patched fix */
455 
457 {
459  union
460  {
463  } data;
465 
466 /*
467  * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
468  */
469 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
470 
471 
472 /*
473  * Function definitions for functions needing awareness of predicate
474  * locking internals.
475  */
477 extern int GetSafeSnapshotBlockingPids(int blocked_pid,
478  int *output, int output_size);
479 
480 #endif /* PREDICATE_INTERNALS_H */
TransactionId finishedBefore
struct RWConflictPoolHeaderData * RWConflictPoolHeader
struct TwoPhasePredicateRecord TwoPhasePredicateRecord
struct SERIALIZABLEXID SERIALIZABLEXID
PredicateLockTargetType
uint32 TransactionId
Definition: c.h:397
struct RWConflictPoolHeaderData RWConflictPoolHeaderData
static void output(uint64 loop_count)
PredicateLockData * GetPredicateLockStatusData(void)
Definition: predicate.c:1383
TransactionId SxactGlobalXmin
struct SERIALIZABLEXIDTAG SERIALIZABLEXIDTAG
struct PREDICATELOCKTARGET PREDICATELOCKTARGET
struct RWConflictData * RWConflict
struct TwoPhasePredicateLockRecord TwoPhasePredicateLockRecord
SERIALIZABLEXACT * xacts
SERIALIZABLEXACT * myXact
TwoPhasePredicateRecordType type
PREDICATELOCKTARGETTAG target
SERIALIZABLEXACT * sxactIn
struct PredXactListData PredXactListData
struct RWConflictData RWConflictData
SHM_QUEUE possibleUnsafeConflicts
struct PREDICATELOCK PREDICATELOCK
struct PREDICATELOCKTAG PREDICATELOCKTAG
TwoPhasePredicateXactRecord xactRecord
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
Definition: predicate.c:1568
struct LOCALPREDICATELOCK LOCALPREDICATELOCK
PREDICATELOCKTARGETTAG tag
VirtualTransactionId vxid
TwoPhasePredicateRecordType
unsigned int uint32
Definition: c.h:268
struct PredXactListElementData PredXactListElementData
SerCommitSeqNo lastCommitBeforeSnapshot
PREDICATELOCKTARGETTAG * locktags
SerCommitSeqNo commitSeqNo
SerCommitSeqNo HavePartialClearedThrough
PREDICATELOCKTAG tag
SerCommitSeqNo CanPartialClearThrough
SerCommitSeqNo earliestOutConflictCommit
union SERIALIZABLEXACT::@100 SeqNo
PREDICATELOCKTARGETTAG tag
SerCommitSeqNo commitSeqNo
uint64 SerCommitSeqNo
struct PredicateLockData PredicateLockData
struct PREDICATELOCKTARGETTAG PREDICATELOCKTARGETTAG
SerCommitSeqNo prepareSeqNo
SERIALIZABLEXIDTAG tag
SerCommitSeqNo LastSxactCommitSeqNo
struct TwoPhasePredicateXactRecord TwoPhasePredicateXactRecord
SERIALIZABLEXACT * OldCommittedSxact
union TwoPhasePredicateRecord::@101 data
SERIALIZABLEXACT * myXact
struct PredXactListElementData * PredXactListElement
TwoPhasePredicateLockRecord lockRecord
PredXactListElement element
struct SERIALIZABLEXACT SERIALIZABLEXACT
PREDICATELOCKTARGET * myTarget
struct PredXactListData * PredXactList
SERIALIZABLEXACT * sxactOut