PostgreSQL Source Code  git master
predicate_internals.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * predicate_internals.h
4  * POSTGRES internal predicate locking definitions.
5  *
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/storage/predicate_internals.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef PREDICATE_INTERNALS_H
15 #define PREDICATE_INTERNALS_H
16 
17 #include "storage/lock.h"
18 
19 /*
20  * Commit number.
21  */
22 typedef uint64 SerCommitSeqNo;
23 
24 /*
25  * Reserved commit sequence numbers:
26  * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
27  * used as a SerCommitSeqNo, even an invalid one
28  * - InvalidSerCommitSeqNo is used to indicate a transaction that
29  * hasn't committed yet, so use a number greater than all valid
30  * ones to make comparison do the expected thing
31  * - RecoverySerCommitSeqNo is used to refer to transactions that
32  * happened before a crash/recovery, since we restart the sequence
33  * at that point. It's earlier than all normal sequence numbers,
34  * and is only used by recovered prepared transactions
35  */
36 #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
37 #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
38 #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
39 
40 /*
41  * The SERIALIZABLEXACT struct contains information needed for each
42  * serializable database transaction to support SSI techniques.
43  *
44  * A home-grown list is maintained in shared memory to manage these.
45  * An entry is used when the serializable transaction acquires a snapshot.
46  * Unless the transaction is rolled back, this entry must generally remain
47  * until all concurrent transactions have completed. (There are special
48  * optimizations for READ ONLY transactions which often allow them to be
49  * cleaned up earlier.) A transaction which is rolled back is cleaned up
50  * as soon as possible.
51  *
52  * Eligibility for cleanup of committed transactions is generally determined
53  * by comparing the transaction's finishedBefore field to
54  * SerializableGlobalXmin.
55  */
56 typedef struct SERIALIZABLEXACT
57 {
58  VirtualTransactionId vxid; /* The executing process always has one of
59  * these. */
60 
61  /*
62  * We use two numbers to track the order that transactions commit. Before
63  * commit, a transaction is marked as prepared, and prepareSeqNo is set.
64  * Shortly after commit, it's marked as committed, and commitSeqNo is set.
65  * This doesn't give a strict commit order, but these two values together
66  * are good enough for us, as we can always err on the safe side and
67  * assume that there's a conflict, if we can't be sure of the exact
68  * ordering of two commits.
69  *
70  * Note that a transaction is marked as prepared for a short period during
71  * commit processing, even if two-phase commit is not used. But with
72  * two-phase commit, a transaction can stay in prepared state for some
73  * time.
74  */
77 
78  /* these values are not both interesting at the same time */
79  union
80  {
81  SerCommitSeqNo earliestOutConflictCommit; /* when committed with
82  * conflict out */
83  SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
84  * no conflict out */
85  } SeqNo;
86  SHM_QUEUE outConflicts; /* list of write transactions whose data we
87  * couldn't read. */
88  SHM_QUEUE inConflicts; /* list of read transactions which couldn't
89  * see our write. */
90  SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
91  SHM_QUEUE finishedLink; /* list link in
92  * FinishedSerializableTransactions */
93 
94  /*
95  * for r/o transactions: list of concurrent r/w transactions that we could
96  * potentially have conflicts with, and vice versa for r/w transactions
97  */
99 
100  TransactionId topXid; /* top level xid for the transaction, if one
101  * exists; else invalid */
102  TransactionId finishedBefore; /* invalid means still running; else the
103  * struct expires when no serializable
104  * xids are before this. */
105  TransactionId xmin; /* the transaction's snapshot xmin */
106  uint32 flags; /* OR'd combination of values defined below */
107  int pid; /* pid of associated process */
109 
110 #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
111 #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
112 #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
113 #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
114 /*
115  * The following flag actually means that the flagged transaction has a
116  * conflict out *to a transaction which committed ahead of it*. It's hard
117  * to get that into a name of a reasonable length.
118  */
119 #define SXACT_FLAG_CONFLICT_OUT 0x00000010
120 #define SXACT_FLAG_READ_ONLY 0x00000020
121 #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
122 #define SXACT_FLAG_RO_SAFE 0x00000080
123 #define SXACT_FLAG_RO_UNSAFE 0x00000100
124 #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
125 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
126 
127 /*
128  * The following types are used to provide an ad hoc list for holding
129  * SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
130  * access these by key -- there are direct pointers to these objects where
131  * needed. If a shared memory list is created, these types can probably be
132  * eliminated in favor of using the general solution.
133  */
135 {
139 
141 
142 #define PredXactListElementDataSize \
143  ((Size)MAXALIGN(sizeof(PredXactListElementData)))
144 
145 typedef struct PredXactListData
146 {
149 
150  /*
151  * These global variables are maintained when registering and cleaning up
152  * serializable transactions. They must be global across all backends,
153  * but are not needed outside the predicate.c source file. Protected by
154  * SerializableXactHashLock.
155  */
156  TransactionId SxactGlobalXmin; /* global xmin for active serializable
157  * transactions */
158  int SxactGlobalXminCount; /* how many active serializable
159  * transactions have this xmin */
160  int WritableSxactCount; /* how many non-read-only serializable
161  * transactions are active */
162  SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
163  * increasing number for commits
164  * of serializable transactions */
165  /* Protected by SerializableXactHashLock. */
166  SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks and
167  * inConflicts for committed
168  * transactions through this seq
169  * no */
170  /* Protected by SerializableFinishedListLock. */
171  SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
172  * seq no */
173  SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
174 
175  PredXactListElement element;
177 
179 
180 #define PredXactListDataSize \
181  ((Size)MAXALIGN(sizeof(PredXactListData)))
182 
183 
184 /*
185  * The following types are used to provide lists of rw-conflicts between
186  * pairs of transactions. Since exactly the same information is needed,
187  * they are also used to record possible unsafe transaction relationships
188  * for purposes of identifying safe snapshots for read-only transactions.
189  *
190  * When a RWConflictData is not in use to record either type of relationship
191  * between a pair of transactions, it is kept on an "available" list. The
192  * outLink field is used for maintaining that list.
193  */
194 typedef struct RWConflictData
195 {
196  SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
197  SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
201 
202 typedef struct RWConflictData *RWConflict;
203 
204 #define RWConflictDataSize \
205  ((Size)MAXALIGN(sizeof(RWConflictData)))
206 
208 {
210  RWConflict element;
212 
214 
215 #define RWConflictPoolHeaderDataSize \
216  ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
217 
218 
219 /*
220  * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
221  * transaction or any of its subtransactions.
222  */
223 typedef struct SERIALIZABLEXIDTAG
224 {
227 
228 /*
229  * The SERIALIZABLEXID struct provides a link from a TransactionId for a
230  * serializable transaction to the related SERIALIZABLEXACT record, even if
231  * the transaction has completed and its connection has been closed.
232  *
233  * These are created as new top level transaction IDs are first assigned to
234  * transactions which are participating in predicate locking. This may
235  * never happen for a particular transaction if it doesn't write anything.
236  * They are removed with their related serializable transaction objects.
237  *
238  * The SubTransGetTopmostTransaction method is used where necessary to get
239  * from an XID which might be from a subtransaction to the top level XID.
240  */
241 typedef struct SERIALIZABLEXID
242 {
243  /* hash key */
245 
246  /* data */
247  SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
249 
250 
251 /*
252  * The PREDICATELOCKTARGETTAG struct identifies a database object which can
253  * be the target of predicate locks.
254  *
255  * Note that the hash function being used doesn't properly respect tag
256  * length -- if the length of the structure isn't a multiple of four bytes it
257  * will go to a four byte boundary past the end of the tag. If you change
258  * this struct, make sure any slack space is initialized, so that any random
259  * bytes in the middle or at the end are not included in the hash.
260  *
261  * TODO SSI: If we always use the same fields for the same type of value, we
262  * should rename these. Holding off until it's clear there are no exceptions.
263  * Since indexes are relations with blocks and tuples, it's looking likely that
264  * the rename will be possible. If not, we may need to divide the last field
265  * and use part of it for a target type, so that we know how to interpret the
266  * data..
267  */
269 {
270  uint32 locktag_field1; /* a 32-bit ID field */
271  uint32 locktag_field2; /* a 32-bit ID field */
272  uint32 locktag_field3; /* a 32-bit ID field */
273  uint32 locktag_field4; /* a 32-bit ID field */
275 
276 /*
277  * The PREDICATELOCKTARGET struct represents a database object on which there
278  * are predicate locks.
279  *
280  * A hash list of these objects is maintained in shared memory. An entry is
281  * added when a predicate lock is requested on an object which doesn't
282  * already have one. An entry is removed when the last lock is removed from
283  * its list.
284  */
285 typedef struct PREDICATELOCKTARGET
286 {
287  /* hash key */
288  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
289 
290  /* data */
291  SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
292  * predicate lock target */
294 
295 
296 /*
297  * The PREDICATELOCKTAG struct identifies an individual predicate lock.
298  *
299  * It is the combination of predicate lock target (which is a lockable
300  * object) and a serializable transaction which has acquired a lock on that
301  * target.
302  */
303 typedef struct PREDICATELOCKTAG
304 {
308 
309 /*
310  * The PREDICATELOCK struct represents an individual lock.
311  *
312  * An entry can be created here when the related database object is read, or
313  * by promotion of multiple finer-grained targets. All entries related to a
314  * serializable transaction are removed when that serializable transaction is
315  * cleaned up. Entries can also be removed when they are combined into a
316  * single coarser-grained lock entry.
317  */
318 typedef struct PREDICATELOCK
319 {
320  /* hash key */
321  PREDICATELOCKTAG tag; /* unique identifier of lock */
322 
323  /* data */
324  SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
325  * predicate locks */
326  SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
327  * predicate locks */
328  SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
329 } PREDICATELOCK;
330 
331 
332 /*
333  * The LOCALPREDICATELOCK struct represents a local copy of data which is
334  * also present in the PREDICATELOCK table, organized for fast access without
335  * needing to acquire a LWLock. It is strictly for optimization.
336  *
337  * Each serializable transaction creates its own local hash table to hold a
338  * collection of these. This information is used to determine when a number
339  * of fine-grained locks should be promoted to a single coarser-grained lock.
340  * The information is maintained more-or-less in parallel to the
341  * PREDICATELOCK data, but because this data is not protected by locks and is
342  * only used in an optimization heuristic, it is allowed to drift in a few
343  * corner cases where maintaining exact data would be expensive.
344  *
345  * The hash table is created when the serializable transaction acquires its
346  * snapshot, and its memory is released upon completion of the transaction.
347  */
348 typedef struct LOCALPREDICATELOCK
349 {
350  /* hash key */
351  PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
352 
353  /* data */
354  bool held; /* is lock held, or just its children? */
355  int childLocks; /* number of child locks currently held */
357 
358 
359 /*
360  * The types of predicate locks which can be acquired.
361  */
363 {
367  /* TODO SSI: Other types may be needed for index locking */
369 
370 
371 /*
372  * This structure is used to quickly capture a copy of all predicate
373  * locks. This is currently used only by the pg_lock_status function,
374  * which in turn is used by the pg_locks view.
375  */
376 typedef struct PredicateLockData
377 {
382 
383 
384 /*
385  * These macros define how we map logical IDs of lockable objects into the
386  * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
387  * rather than accessing the fields directly. Note multiple eval of target!
388  */
389 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
390  ((locktag).locktag_field1 = (dboid), \
391  (locktag).locktag_field2 = (reloid), \
392  (locktag).locktag_field3 = InvalidBlockNumber, \
393  (locktag).locktag_field4 = InvalidOffsetNumber)
394 
395 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
396  ((locktag).locktag_field1 = (dboid), \
397  (locktag).locktag_field2 = (reloid), \
398  (locktag).locktag_field3 = (blocknum), \
399  (locktag).locktag_field4 = InvalidOffsetNumber)
400 
401 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
402  ((locktag).locktag_field1 = (dboid), \
403  (locktag).locktag_field2 = (reloid), \
404  (locktag).locktag_field3 = (blocknum), \
405  (locktag).locktag_field4 = (offnum))
406 
407 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
408  ((Oid) (locktag).locktag_field1)
409 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
410  ((Oid) (locktag).locktag_field2)
411 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
412  ((BlockNumber) (locktag).locktag_field3)
413 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
414  ((OffsetNumber) (locktag).locktag_field4)
415 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
416  (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
417  (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
418  PREDLOCKTAG_RELATION))
419 
420 /*
421  * Two-phase commit statefile records. There are two types: for each
422  * transaction, we generate one per-transaction record and a variable
423  * number of per-predicate-lock records.
424  */
426 {
430 
431 /*
432  * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
433  * much is needed because most of it not meaningful for a recovered
434  * prepared transaction.
435  *
436  * In particular, we do not record the in and out conflict lists for a
437  * prepared transaction because the associated SERIALIZABLEXACTs will
438  * not be available after recovery. Instead, we simply record the
439  * existence of each type of conflict by setting the transaction's
440  * summary conflict in/out flag.
441  */
443 {
447 
448 /* Per-lock state */
450 {
452  uint32 filler; /* to avoid length change in back-patched fix */
454 
456 {
458  union
459  {
462  } data;
464 
465 /*
466  * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
467  */
468 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
469 
470 
471 /*
472  * Function definitions for functions needing awareness of predicate
473  * locking internals.
474  */
476 extern int GetSafeSnapshotBlockingPids(int blocked_pid,
477  int *output, int output_size);
478 
479 #endif /* PREDICATE_INTERNALS_H */
TransactionId finishedBefore
struct RWConflictPoolHeaderData * RWConflictPoolHeader
struct TwoPhasePredicateRecord TwoPhasePredicateRecord
struct SERIALIZABLEXID SERIALIZABLEXID
PredicateLockTargetType
uint32 TransactionId
Definition: c.h:463
static void output(uint64 loop_count)
PredicateLockData * GetPredicateLockStatusData(void)
Definition: predicate.c:1399
TransactionId SxactGlobalXmin
struct SERIALIZABLEXIDTAG SERIALIZABLEXIDTAG
struct PREDICATELOCKTARGET PREDICATELOCKTARGET
struct RWConflictData * RWConflict
struct TwoPhasePredicateLockRecord TwoPhasePredicateLockRecord
SERIALIZABLEXACT * xacts
SERIALIZABLEXACT * myXact
TwoPhasePredicateRecordType type
PREDICATELOCKTARGETTAG target
struct RWConflictPoolHeaderData RWConflictPoolHeaderData
SERIALIZABLEXACT * sxactIn
SHM_QUEUE possibleUnsafeConflicts
union SERIALIZABLEXACT::@109 SeqNo
struct PREDICATELOCK PREDICATELOCK
struct PREDICATELOCKTAG PREDICATELOCKTAG
TwoPhasePredicateXactRecord xactRecord
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
Definition: predicate.c:1584
struct LOCALPREDICATELOCK LOCALPREDICATELOCK
PREDICATELOCKTARGETTAG tag
VirtualTransactionId vxid
TwoPhasePredicateRecordType
unsigned int uint32
Definition: c.h:314
SerCommitSeqNo lastCommitBeforeSnapshot
PREDICATELOCKTARGETTAG * locktags
SerCommitSeqNo commitSeqNo
SerCommitSeqNo HavePartialClearedThrough
PREDICATELOCKTAG tag
SerCommitSeqNo CanPartialClearThrough
SerCommitSeqNo earliestOutConflictCommit
PREDICATELOCKTARGETTAG tag
SerCommitSeqNo commitSeqNo
uint64 SerCommitSeqNo
struct PredicateLockData PredicateLockData
struct PREDICATELOCKTARGETTAG PREDICATELOCKTARGETTAG
SerCommitSeqNo prepareSeqNo
SERIALIZABLEXIDTAG tag
SerCommitSeqNo LastSxactCommitSeqNo
struct TwoPhasePredicateXactRecord TwoPhasePredicateXactRecord
SERIALIZABLEXACT * OldCommittedSxact
struct PredXactListData PredXactListData
struct RWConflictData RWConflictData
SERIALIZABLEXACT * myXact
struct PredXactListElementData * PredXactListElement
struct PredXactListElementData PredXactListElementData
TwoPhasePredicateLockRecord lockRecord
PredXactListElement element
struct SERIALIZABLEXACT SERIALIZABLEXACT
PREDICATELOCKTARGET * myTarget
struct PredXactListData * PredXactList
SERIALIZABLEXACT * sxactOut