PostgreSQL Source Code git master
Loading...
Searching...
No Matches
predicate.c File Reference
#include "postgres.h"
#include "access/parallel.h"
#include "access/slru.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_lfind.h"
#include "storage/predicate.h"
#include "storage/predicate_internals.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/shmem.h"
#include "storage/subsystems.h"
#include "utils/guc_hooks.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
#include "utils/wait_event.h"
Include dependency graph for predicate.c:

Go to the source code of this file.

Data Structures

struct  SerialControlData
 

Macros

#define TargetTagIsCoveredBy(covered_target, covering_target)
 
#define PredicateLockHashPartition(hashcode)    ((hashcode) % NUM_PREDICATELOCK_PARTITIONS)
 
#define PredicateLockHashPartitionLock(hashcode)
 
#define PredicateLockHashPartitionLockByIndex(i)    (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
 
#define NPREDICATELOCKTARGETENTS()    mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
 
#define SxactIsOnFinishedList(sxact)   (!dlist_node_is_detached(&(sxact)->finishedLink))
 
#define SxactIsCommitted(sxact)   (((sxact)->flags & SXACT_FLAG_COMMITTED) != 0)
 
#define SxactIsPrepared(sxact)   (((sxact)->flags & SXACT_FLAG_PREPARED) != 0)
 
#define SxactIsRolledBack(sxact)   (((sxact)->flags & SXACT_FLAG_ROLLED_BACK) != 0)
 
#define SxactIsDoomed(sxact)   (((sxact)->flags & SXACT_FLAG_DOOMED) != 0)
 
#define SxactIsReadOnly(sxact)   (((sxact)->flags & SXACT_FLAG_READ_ONLY) != 0)
 
#define SxactHasSummaryConflictIn(sxact)   (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_IN) != 0)
 
#define SxactHasSummaryConflictOut(sxact)   (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_OUT) != 0)
 
#define SxactHasConflictOut(sxact)   (((sxact)->flags & SXACT_FLAG_CONFLICT_OUT) != 0)
 
#define SxactIsDeferrableWaiting(sxact)   (((sxact)->flags & SXACT_FLAG_DEFERRABLE_WAITING) != 0)
 
#define SxactIsROSafe(sxact)   (((sxact)->flags & SXACT_FLAG_RO_SAFE) != 0)
 
#define SxactIsROUnsafe(sxact)   (((sxact)->flags & SXACT_FLAG_RO_UNSAFE) != 0)
 
#define SxactIsPartiallyReleased(sxact)   (((sxact)->flags & SXACT_FLAG_PARTIALLY_RELEASED) != 0)
 
#define PredicateLockTargetTagHashCode(predicatelocktargettag)    get_hash_value(PredicateLockTargetHash, predicatelocktargettag)
 
#define PredicateLockHashCodeFromTargetHashCode(predicatelocktag, targethash)
 
#define SerialSlruCtl   (&SerialSlruDesc)
 
#define SERIAL_PAGESIZE   BLCKSZ
 
#define SERIAL_ENTRYSIZE   sizeof(SerCommitSeqNo)
 
#define SERIAL_ENTRIESPERPAGE   (SERIAL_PAGESIZE / SERIAL_ENTRYSIZE)
 
#define SERIAL_MAX_PAGE   (MaxTransactionId / SERIAL_ENTRIESPERPAGE)
 
#define SerialNextPage(page)   (((page) >= SERIAL_MAX_PAGE) ? 0 : (page) + 1)
 
#define SerialValue(slotno, xid)
 
#define SerialPage(xid)   (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
 

Typedefs

typedef struct SerialControlData SerialControlData
 
typedef struct SerialControlDataSerialControl
 

Functions

static bool SerialPagePrecedesLogically (int64 page1, int64 page2)
 
static int serial_errdetail_for_io_error (const void *opaque_data)
 
static void PredicateLockShmemRequest (void *arg)
 
static void PredicateLockShmemInit (void *arg)
 
static void PredicateLockShmemAttach (void *arg)
 
static SERIALIZABLEXACTCreatePredXact (void)
 
static void ReleasePredXact (SERIALIZABLEXACT *sxact)
 
static bool RWConflictExists (const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer)
 
static void SetRWConflict (SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
 
static void SetPossibleUnsafeConflict (SERIALIZABLEXACT *roXact, SERIALIZABLEXACT *activeXact)
 
static void ReleaseRWConflict (RWConflict conflict)
 
static void FlagSxactUnsafe (SERIALIZABLEXACT *sxact)
 
static void SerialAdd (TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 
static SerCommitSeqNo SerialGetMinConflictCommitSeqNo (TransactionId xid)
 
static void SerialSetActiveSerXmin (TransactionId xid)
 
static uint32 predicatelock_hash (const void *key, Size keysize)
 
static void SummarizeOldestCommittedSxact (void)
 
static Snapshot GetSafeSnapshot (Snapshot origSnapshot)
 
static Snapshot GetSerializableTransactionSnapshotInt (Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
 
static bool PredicateLockExists (const PREDICATELOCKTARGETTAG *targettag)
 
static bool GetParentPredicateLockTag (const PREDICATELOCKTARGETTAG *tag, PREDICATELOCKTARGETTAG *parent)
 
static bool CoarserLockCovers (const PREDICATELOCKTARGETTAG *newtargettag)
 
static void RemoveScratchTarget (bool lockheld)
 
static void RestoreScratchTarget (bool lockheld)
 
static void RemoveTargetIfNoLongerUsed (PREDICATELOCKTARGET *target, uint32 targettaghash)
 
static void DeleteChildTargetLocks (const PREDICATELOCKTARGETTAG *newtargettag)
 
static int MaxPredicateChildLocks (const PREDICATELOCKTARGETTAG *tag)
 
static bool CheckAndPromotePredicateLockRequest (const PREDICATELOCKTARGETTAG *reqtag)
 
static void DecrementParentLocks (const PREDICATELOCKTARGETTAG *targettag)
 
static void CreatePredicateLock (const PREDICATELOCKTARGETTAG *targettag, uint32 targettaghash, SERIALIZABLEXACT *sxact)
 
static void DeleteLockTarget (PREDICATELOCKTARGET *target, uint32 targettaghash)
 
static bool TransferPredicateLocksToNewTarget (PREDICATELOCKTARGETTAG oldtargettag, PREDICATELOCKTARGETTAG newtargettag, bool removeOld)
 
static void PredicateLockAcquire (const PREDICATELOCKTARGETTAG *targettag)
 
static void DropAllPredicateLocksFromTable (Relation relation, bool transfer)
 
static void SetNewSxactGlobalXmin (void)
 
static void ClearOldPredicateLocks (void)
 
static void ReleaseOneSerializableXact (SERIALIZABLEXACT *sxact, bool partial, bool summarize)
 
static bool XidIsConcurrent (TransactionId xid)
 
static void CheckTargetForConflictsIn (PREDICATELOCKTARGETTAG *targettag)
 
static void FlagRWConflict (SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
 
static void OnConflict_CheckForSerializationFailure (const SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
 
static void CreateLocalPredicateLockHash (void)
 
static void ReleasePredicateLocksLocal (void)
 
static bool PredicateLockingNeededForRelation (Relation relation)
 
static bool SerializationNeededForRead (Relation relation, Snapshot snapshot)
 
static bool SerializationNeededForWrite (Relation relation)
 
bool check_serial_buffers (int *newval, void **extra, GucSource source)
 
void CheckPointPredicate (void)
 
PredicateLockDataGetPredicateLockStatusData (void)
 
int GetSafeSnapshotBlockingPids (int blocked_pid, int *output, int output_size)
 
Snapshot GetSerializableTransactionSnapshot (Snapshot snapshot)
 
void SetSerializableTransactionSnapshot (Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
 
void RegisterPredicateLockingXid (TransactionId xid)
 
bool PageIsPredicateLocked (Relation relation, BlockNumber blkno)
 
void PredicateLockRelation (Relation relation, Snapshot snapshot)
 
void PredicateLockPage (Relation relation, BlockNumber blkno, Snapshot snapshot)
 
void PredicateLockTID (Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
 
void TransferPredicateLocksToHeapRelation (Relation relation)
 
void PredicateLockPageSplit (Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
 
void PredicateLockPageCombine (Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
 
void ReleasePredicateLocks (bool isCommit, bool isReadOnlySafe)
 
bool CheckForSerializableConflictOutNeeded (Relation relation, Snapshot snapshot)
 
void CheckForSerializableConflictOut (Relation relation, TransactionId xid, Snapshot snapshot)
 
void CheckForSerializableConflictIn (Relation relation, const ItemPointerData *tid, BlockNumber blkno)
 
void CheckTableForSerializableConflictIn (Relation relation)
 
void PreCommit_CheckForSerializationFailure (void)
 
void AtPrepare_PredicateLocks (void)
 
void PostPrepare_PredicateLocks (FullTransactionId fxid)
 
void PredicateLockTwoPhaseFinish (FullTransactionId fxid, bool isCommit)
 
void predicatelock_twophase_recover (FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
 
SerializableXactHandle ShareSerializableXact (void)
 
void AttachSerializableXact (SerializableXactHandle handle)
 

Variables

static SlruDesc SerialSlruDesc
 
static SerialControl serialControl
 
static SERIALIZABLEXACTOldCommittedSxact
 
int max_predicate_locks_per_xact
 
int max_predicate_locks_per_relation
 
int max_predicate_locks_per_page
 
static PredXactList PredXact
 
const ShmemCallbacks PredicateLockShmemCallbacks
 
static RWConflictPoolHeader RWConflictPool
 
static HTABSerializableXidHash
 
static HTABPredicateLockTargetHash
 
static HTABPredicateLockHash
 
static dlist_headFinishedSerializableTransactions
 
static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0}
 
static uint32 ScratchTargetTagHash
 
static LWLockScratchPartitionLock
 
static HTABLocalPredicateLockHash = NULL
 
static SERIALIZABLEXACTMySerializableXact = InvalidSerializableXact
 
static bool MyXactDidWrite = false
 
static SERIALIZABLEXACTSavedSerializableXact = InvalidSerializableXact
 
static int64 max_serializable_xacts
 

Macro Definition Documentation

◆ NPREDICATELOCKTARGETENTS

Definition at line 263 of file predicate.c.

338 : (page) + 1)
339
340#define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \
341 (SerialSlruCtl->shared->page_buffer[slotno] + \
342 ((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
343
344#define SerialPage(xid) (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
345
346typedef struct SerialControlData
347{
348 int64 headPage; /* newest initialized page */
349 TransactionId headXid; /* newest valid Xid in the SLRU */
350 TransactionId tailXid; /* oldest xmin we might be interested in */
352
353typedef struct SerialControlData *SerialControl;
354
356
357/*
358 * When the oldest committed transaction on the "finished" list is moved to
359 * SLRU, its predicate locks will be moved to this "dummy" transaction,
360 * collapsing duplicate targets. When a duplicate is found, the later
361 * commitSeqNo is used.
362 */
364
365
366/*
367 * These configuration variables are used to set the predicate lock table size
368 * and to control promotion of predicate locks to coarser granularity in an
369 * attempt to degrade performance (mostly as false positive serialization
370 * failure) gracefully in the face of memory pressure.
371 */
372int max_predicate_locks_per_xact; /* in guc_tables.c */
373int max_predicate_locks_per_relation; /* in guc_tables.c */
374int max_predicate_locks_per_page; /* in guc_tables.c */
375
376/*
377 * This provides a list of objects in order to track transactions
378 * participating in predicate locking. Entries in the list are fixed size,
379 * and reside in shared memory. The memory address of an entry must remain
380 * fixed during its lifetime. The list will be protected from concurrent
381 * update externally; no provision is made in this code to manage that. The
382 * number of entries in the list, and the size allowed for each entry is
383 * fixed upon creation.
384 */
386
387static void PredicateLockShmemRequest(void *arg);
388static void PredicateLockShmemInit(void *arg);
389static void PredicateLockShmemAttach(void *arg);
390
393 .init_fn = PredicateLockShmemInit,
394 .attach_fn = PredicateLockShmemAttach,
395};
396
397
398/*
399 * This provides a pool of RWConflict data elements to use in conflict lists
400 * between transactions.
401 */
403
404/*
405 * The predicate locking hash tables are in shared memory.
406 * Each backend keeps pointers to them.
407 */
410static HTAB *PredicateLockHash;
412
413/*
414 * Tag for a dummy entry in PredicateLockTargetHash. By temporarily removing
415 * this entry, you can ensure that there's enough scratch space available for
416 * inserting one entry in the hash table. This is an otherwise-invalid tag.
417 */
418static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0};
421
422/*
423 * The local hash table used to determine when to combine multiple fine-
424 * grained locks into a single courser-grained lock.
425 */
427
428/*
429 * Keep a pointer to the currently-running serializable transaction (if any)
430 * for quick reference. Also, remember if we have written anything that could
431 * cause a rw-conflict.
432 */
434static bool MyXactDidWrite = false;
435
436/*
437 * The SXACT_FLAG_RO_UNSAFE optimization might lead us to release
438 * MySerializableXact early. If that happens in a parallel query, the leader
439 * needs to defer the destruction of the SERIALIZABLEXACT until end of
440 * transaction, because the workers still have a reference to it. In that
441 * case, the leader stores it here.
442 */
444
446
447/* local functions */
448
449static SERIALIZABLEXACT *CreatePredXact(void);
451
452static bool RWConflictExists(const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer);
457
461
462static uint32 predicatelock_hash(const void *key, Size keysize);
463
464static void SummarizeOldestCommittedSxact(void);
468 int sourcepid);
471 PREDICATELOCKTARGETTAG *parent);
473static void RemoveScratchTarget(bool lockheld);
474static void RestoreScratchTarget(bool lockheld);
487 bool removeOld);
489static void DropAllPredicateLocksFromTable(Relation relation,
490 bool transfer);
491static void SetNewSxactGlobalXmin(void);
492static void ClearOldPredicateLocks(void);
493static void ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
494 bool summarize);
495static bool XidIsConcurrent(TransactionId xid);
500static void CreateLocalPredicateLockHash(void);
501static void ReleasePredicateLocksLocal(void);
502
503
504/*------------------------------------------------------------------------*/
505
506/*
507 * Does this relation participate in predicate locking? Temporary and system
508 * relations are exempt.
509 */
510static inline bool
512{
513 return !(relation->rd_id < FirstUnpinnedObjectId ||
514 RelationUsesLocalBuffers(relation));
515}
516
517/*
518 * When a public interface method is called for a read, this is the test to
519 * see if we should do a quick return.
520 *
521 * Note: this function has side-effects! If this transaction has been flagged
522 * as RO-safe since the last call, we release all predicate locks and reset
523 * MySerializableXact. That makes subsequent calls to return quickly.
524 *
525 * This is marked as 'inline' to eliminate the function call overhead in the
526 * common case that serialization is not needed.
527 */
528static inline bool
530{
531 /* Nothing to do if this is not a serializable transaction */
533 return false;
534
535 /*
536 * Don't acquire locks or conflict when scanning with a special snapshot.
537 * This excludes things like CLUSTER and REINDEX. They use the wholesale
538 * functions TransferPredicateLocksToHeapRelation() and
539 * CheckTableForSerializableConflictIn() to participate in serialization,
540 * but the scans involved don't need serialization.
541 */
542 if (!IsMVCCSnapshot(snapshot))
543 return false;
544
545 /*
546 * Check if we have just become "RO-safe". If we have, immediately release
547 * all locks as they're not needed anymore. This also resets
548 * MySerializableXact, so that subsequent calls to this function can exit
549 * quickly.
550 *
551 * A transaction is flagged as RO_SAFE if all concurrent R/W transactions
552 * commit without having conflicts out to an earlier snapshot, thus
553 * ensuring that no conflicts are possible for this transaction.
554 */
556 {
557 ReleasePredicateLocks(false, true);
558 return false;
559 }
560
561 /* Check if the relation doesn't participate in predicate locking */
563 return false;
564
565 return true; /* no excuse to skip predicate locking */
566}
567
568/*
569 * Like SerializationNeededForRead(), but called on writes.
570 * The logic is the same, but there is no snapshot and we can't be RO-safe.
571 */
572static inline bool
574{
575 /* Nothing to do if this is not a serializable transaction */
577 return false;
578
579 /* Check if the relation doesn't participate in predicate locking */
581 return false;
582
583 return true; /* no excuse to skip predicate locking */
584}
585
586
587/*------------------------------------------------------------------------*/
588
589/*
590 * These functions are a simple implementation of a list for this specific
591 * type of struct. If there is ever a generalized shared memory list, we
592 * should probably switch to that.
593 */
594static SERIALIZABLEXACT *
595CreatePredXact(void)
596{
598
600 return NULL;
601
605 return sxact;
606}
607
608static void
610{
612
613 dlist_delete(&sxact->xactLink);
615}
616
617/*------------------------------------------------------------------------*/
618
619/*
620 * These functions manage primitive access to the RWConflict pool and lists.
621 */
622static bool
624{
625 dlist_iter iter;
626
627 Assert(reader != writer);
628
629 /* Check the ends of the purported conflict first. */
630 if (SxactIsDoomed(reader)
632 || dlist_is_empty(&reader->outConflicts)
633 || dlist_is_empty(&writer->inConflicts))
634 return false;
635
636 /*
637 * A conflict is possible; walk the list to find out.
638 *
639 * The unconstify is needed as we have no const version of
640 * dlist_foreach().
641 */
642 dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->outConflicts)
643 {
645 dlist_container(RWConflictData, outLink, iter.cur);
646
647 if (conflict->sxactIn == writer)
648 return true;
649 }
650
651 /* No conflict found. */
652 return false;
653}
654
655static void
657{
659
660 Assert(reader != writer);
661 Assert(!RWConflictExists(reader, writer));
662
666 errmsg("not enough elements in RWConflictPool to record a read/write conflict"),
667 errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
668
670 dlist_delete(&conflict->outLink);
671
672 conflict->sxactOut = reader;
673 conflict->sxactIn = writer;
674 dlist_push_tail(&reader->outConflicts, &conflict->outLink);
675 dlist_push_tail(&writer->inConflicts, &conflict->inLink);
676}
677
678static void
681{
683
687
691 errmsg("not enough elements in RWConflictPool to record a potential read/write conflict"),
692 errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
693
695 dlist_delete(&conflict->outLink);
696
697 conflict->sxactOut = activeXact;
698 conflict->sxactIn = roXact;
699 dlist_push_tail(&activeXact->possibleUnsafeConflicts, &conflict->outLink);
700 dlist_push_tail(&roXact->possibleUnsafeConflicts, &conflict->inLink);
701}
702
703static void
705{
706 dlist_delete(&conflict->inLink);
707 dlist_delete(&conflict->outLink);
709}
710
711static void
713{
715
718
719 sxact->flags |= SXACT_FLAG_RO_UNSAFE;
720
721 /*
722 * We know this isn't a safe snapshot, so we can stop looking for other
723 * potential conflicts.
724 */
725 dlist_foreach_modify(iter, &sxact->possibleUnsafeConflicts)
726 {
728 dlist_container(RWConflictData, inLink, iter.cur);
729
730 Assert(!SxactIsReadOnly(conflict->sxactOut));
731 Assert(sxact == conflict->sxactIn);
732
734 }
735}
736
737/*------------------------------------------------------------------------*/
738
739/*
740 * Decide whether a Serial page number is "older" for truncation purposes.
741 * Analogous to CLOGPagePrecedes().
742 */
743static bool
745{
748
753
754 return (TransactionIdPrecedes(xid1, xid2) &&
756}
757
758static int
760{
761 TransactionId xid = *(const TransactionId *) opaque_data;
762
763 return errdetail("Could not access serializable CSN of transaction %u.", xid);
764}
765
766#ifdef USE_ASSERT_CHECKING
767static void
769{
771 offset = per_page / 2;
774 headPage,
777 oldestXact;
778
779 /* GetNewTransactionId() has assigned the last XID it can safely use. */
780 newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; /* nothing special */
781 newestXact = newestPage * per_page + offset;
783 oldestXact = newestXact + 1;
784 oldestXact -= 1U << 31;
785 oldestPage = oldestXact / per_page;
786
787 /*
788 * In this scenario, the SLRU headPage pertains to the last ~1000 XIDs
789 * assigned. oldestXact finishes, ~2B XIDs having elapsed since it
790 * started. Further transactions cause us to summarize oldestXact to
791 * tailPage. Function must return false so SerialAdd() doesn't zero
792 * tailPage (which may contain entries for other old, recently-finished
793 * XIDs) and half the SLRU. Reaching this requires burning ~2B XIDs in
794 * single-user mode, a negligible possibility.
795 */
799
800 /*
801 * In this scenario, the SLRU headPage pertains to oldestXact. We're
802 * summarizing an XID near newestXact. (Assume few other XIDs used
803 * SERIALIZABLE, hence the minimal headPage advancement. Assume
804 * oldestXact was long-running and only recently reached the SLRU.)
805 * Function must return true to make SerialAdd() create targetPage.
806 *
807 * Today's implementation mishandles this case, but it doesn't matter
808 * enough to fix. Verify that the defect affects just one page by
809 * asserting correct treatment of its prior page. Reaching this case
810 * requires burning ~2B XIDs in single-user mode, a negligible
811 * possibility. Moreover, if it does happen, the consequence would be
812 * mild, namely a new transaction failing in SimpleLruReadPage().
813 */
817#if 0
819#endif
820}
821#endif
822
823/*
824 * GUC check_hook for serializable_buffers
825 */
826bool
827check_serial_buffers(int *newval, void **extra, GucSource source)
828{
829 return check_slru_buffers("serializable_buffers", newval);
830}
831
832/*
833 * Record a committed read write serializable xid and the minimum
834 * commitSeqNo of any transactions to which this xid had a rw-conflict out.
835 * An invalid commitSeqNo means that there were no conflicts out from xid.
836 */
837static void
839{
842 int slotno;
844 bool isNewPage;
845 LWLock *lock;
846
848
849 targetPage = SerialPage(xid);
851
852 /*
853 * In this routine, we must hold both SerialControlLock and the SLRU bank
854 * lock simultaneously while making the SLRU data catch up with the new
855 * state that we determine.
856 */
858
859 /*
860 * If 'xid' is older than the global xmin (== tailXid), there's no need to
861 * store it, after all. This can happen if the oldest transaction holding
862 * back the global xmin just finished, making 'xid' uninteresting, but
863 * ClearOldPredicateLocks() has not yet run.
864 */
867 {
869 return;
870 }
871
872 /*
873 * If the SLRU is currently unused, zero out the whole active region from
874 * tailXid to headXid before taking it into use. Otherwise zero out only
875 * any new pages that enter the tailXid-headXid range as we advance
876 * headXid.
877 */
878 if (serialControl->headPage < 0)
879 {
881 isNewPage = true;
882 }
883 else
884 {
887 targetPage);
888 }
889
892 serialControl->headXid = xid;
893 if (isNewPage)
895
896 if (isNewPage)
897 {
898 /* Initialize intervening pages; might involve trading locks */
899 for (;;)
900 {
905 break;
907 LWLockRelease(lock);
908 }
909 }
910 else
911 {
914 }
915
917 SerialSlruCtl->shared->page_dirty[slotno] = true;
918
919 LWLockRelease(lock);
921}
922
923/*
924 * Get the minimum commitSeqNo for any conflict out for the given xid. For
925 * a transaction which exists but has no conflict out, InvalidSerCommitSeqNo
926 * will be returned.
927 */
928static SerCommitSeqNo
930{
934 int slotno;
935
937
942
944 return 0;
945
947
950 return 0;
951
952 /*
953 * The following function must be called without holding SLRU bank lock,
954 * but will return with that lock held, which must then be released.
955 */
957 SerialPage(xid), &xid);
958 val = SerialValue(slotno, xid);
960 return val;
961}
962
963/*
964 * Call this whenever there is a new xmin for active serializable
965 * transactions. We don't need to keep information on transactions which
966 * precede that. InvalidTransactionId means none active, so everything in
967 * the SLRU can be discarded.
968 */
969static void
971{
973
974 /*
975 * When no sxacts are active, nothing overlaps, set the xid values to
976 * invalid to show that there are no valid entries. Don't clear headPage,
977 * though. A new xmin might still land on that page, and we don't want to
978 * repeatedly zero out the same page.
979 */
980 if (!TransactionIdIsValid(xid))
981 {
985 return;
986 }
987
988 /*
989 * When we're recovering prepared transactions, the global xmin might move
990 * backwards depending on the order they're recovered. Normally that's not
991 * OK, but during recovery no serializable transactions will commit, so
992 * the SLRU is empty and we can get away with it.
993 */
994 if (RecoveryInProgress())
995 {
999 {
1000 serialControl->tailXid = xid;
1001 }
1003 return;
1004 }
1005
1008
1009 serialControl->tailXid = xid;
1010
1012}
1013
1014/*
1015 * Perform a checkpoint --- either during shutdown, or on-the-fly
1016 *
1017 * We don't have any data that needs to survive a restart, but this is a
1018 * convenient place to truncate the SLRU.
1019 */
1020void
1022{
1024
1026
1027 /* Exit quickly if the SLRU is currently not in use. */
1028 if (serialControl->headPage < 0)
1029 {
1031 return;
1032 }
1033
1035 {
1037
1039
1040 /*
1041 * It is possible for the tailXid to be ahead of the headXid. This
1042 * occurs if we checkpoint while there are in-progress serializable
1043 * transaction(s) advancing the tail but we are yet to summarize the
1044 * transactions. In this case, we cutoff up to the headPage and the
1045 * next summary will advance the headXid.
1046 */
1048 {
1049 /* We can truncate the SLRU up to the page containing tailXid */
1051 }
1052 else
1054 }
1055 else
1056 {
1057 /*----------
1058 * The SLRU is no longer needed. Truncate to head before we set head
1059 * invalid.
1060 *
1061 * XXX: It's possible that the SLRU is not needed again until XID
1062 * wrap-around has happened, so that the segment containing headPage
1063 * that we leave behind will appear to be new again. In that case it
1064 * won't be removed until XID horizon advances enough to make it
1065 * current again.
1066 *
1067 * XXX: This should happen in vac_truncate_clog(), not in checkpoints.
1068 * Consider this scenario, starting from a system with no in-progress
1069 * transactions and VACUUM FREEZE having maximized oldestXact:
1070 * - Start a SERIALIZABLE transaction.
1071 * - Start, finish, and summarize a SERIALIZABLE transaction, creating
1072 * one SLRU page.
1073 * - Consume XIDs to reach xidStopLimit.
1074 * - Finish all transactions. Due to the long-running SERIALIZABLE
1075 * transaction, earlier checkpoints did not touch headPage. The
1076 * next checkpoint will change it, but that checkpoint happens after
1077 * the end of the scenario.
1078 * - VACUUM to advance XID limits.
1079 * - Consume ~2M XIDs, crossing the former xidWrapLimit.
1080 * - Start, finish, and summarize a SERIALIZABLE transaction.
1081 * SerialAdd() declines to create the targetPage, because headPage
1082 * is not regarded as in the past relative to that targetPage. The
1083 * transaction instigating the summarize fails in
1084 * SimpleLruReadPage().
1085 */
1087 serialControl->headPage = -1;
1088 }
1089
1091
1092 /*
1093 * Truncate away pages that are no longer required. Note that no
1094 * additional locking is required, because this is only called as part of
1095 * a checkpoint, and the validity limits have already been determined.
1096 */
1098
1099 /*
1100 * Write dirty SLRU pages to disk
1101 *
1102 * This is not actually necessary from a correctness point of view. We do
1103 * it merely as a debugging aid.
1104 *
1105 * We're doing this after the truncation to avoid writing pages right
1106 * before deleting the file in which they sit, which would be completely
1107 * pointless.
1108 */
1110}
1111
1112/*------------------------------------------------------------------------*/
1113
1114/*
1115 * PredicateLockShmemRequest -- Register the predicate locking data structures.
1116 */
1117static void
1119{
1123
1124 /*
1125 * Register hash table for PREDICATELOCKTARGET structs. This stores
1126 * per-predicate-lock-target information.
1127 */
1129
1130 ShmemRequestHash(.name = "PREDICATELOCKTARGET hash",
1133 .hash_info.keysize = sizeof(PREDICATELOCKTARGETTAG),
1134 .hash_info.entrysize = sizeof(PREDICATELOCKTARGET),
1135 .hash_info.num_partitions = NUM_PREDICATELOCK_PARTITIONS,
1137 );
1138
1139 /*
1140 * Allocate hash table for PREDICATELOCK structs. This stores per
1141 * xact-lock-of-a-target information.
1142 *
1143 * Assume an average of 2 xacts per target.
1144 */
1146
1147 ShmemRequestHash(.name = "PREDICATELOCK hash",
1148 .nelems = max_predicate_locks,
1149 .ptr = &PredicateLockHash,
1150 .hash_info.keysize = sizeof(PREDICATELOCKTAG),
1151 .hash_info.entrysize = sizeof(PREDICATELOCK),
1152 .hash_info.hash = predicatelock_hash,
1153 .hash_info.num_partitions = NUM_PREDICATELOCK_PARTITIONS,
1155 );
1156
1157 /*
1158 * Compute size for serializable transaction hashtable. Note these
1159 * calculations must agree with PredicateLockShmemSize!
1160 *
1161 * Assume an average of 10 predicate locking transactions per backend.
1162 * This allows aggressive cleanup while detail is present before data must
1163 * be summarized for storage in SLRU and the "dummy" transaction.
1164 */
1166
1167 /*
1168 * Register a list to hold information on transactions participating in
1169 * predicate locking.
1170 */
1171 ShmemRequestStruct(.name = "PredXactList",
1174 sizeof(SERIALIZABLEXACT)))),
1175 .ptr = (void **) &PredXact,
1176 );
1177
1178 /*
1179 * Register hash table for SERIALIZABLEXID structs. This stores per-xid
1180 * information for serializable transactions which have accessed data.
1181 */
1182 ShmemRequestHash(.name = "SERIALIZABLEXID hash",
1183 .nelems = max_serializable_xacts,
1184 .ptr = &SerializableXidHash,
1185 .hash_info.keysize = sizeof(SERIALIZABLEXIDTAG),
1186 .hash_info.entrysize = sizeof(SERIALIZABLEXID),
1187 .hash_flags = HASH_ELEM | HASH_BLOBS | HASH_FIXED_SIZE,
1188 );
1189
1190 /*
1191 * Allocate space for tracking rw-conflicts in lists attached to the
1192 * transactions.
1193 *
1194 * Assume an average of 5 conflicts per transaction. Calculations suggest
1195 * that this will prevent resource exhaustion in even the most pessimal
1196 * loads up to max_connections = 200 with all 200 connections pounding the
1197 * database with serializable transactions. Beyond that, there may be
1198 * occasional transactions canceled when trying to flag conflicts. That's
1199 * probably OK.
1200 */
1202
1203 ShmemRequestStruct(.name = "RWConflictPool",
1206 .ptr = (void **) &RWConflictPool,
1207 );
1208
1209 ShmemRequestStruct(.name = "FinishedSerializableTransactions",
1210 .size = sizeof(dlist_head),
1211 .ptr = (void **) &FinishedSerializableTransactions,
1212 );
1213
1214 /*
1215 * Initialize the SLRU storage for old committed serializable
1216 * transactions.
1217 */
1219 .name = "serializable",
1220 .Dir = "pg_serial",
1221 .long_segment_names = false,
1222
1223 .nslots = serializable_buffers,
1224
1225 .sync_handler = SYNC_HANDLER_NONE,
1226 .PagePrecedes = SerialPagePrecedesLogically,
1227 .errdetail_for_io_error = serial_errdetail_for_io_error,
1228
1229 .buffer_tranche_id = LWTRANCHE_SERIAL_BUFFER,
1230 .bank_tranche_id = LWTRANCHE_SERIAL_SLRU,
1231 );
1232#ifdef USE_ASSERT_CHECKING
1234#endif
1235
1236 ShmemRequestStruct(.name = "SerialControlData",
1237 .size = sizeof(SerialControlData),
1238 .ptr = (void **) &serialControl,
1239 );
1240}
1241
1242static void
1244{
1245 int max_rw_conflicts;
1246 bool found;
1247
1248 /*
1249 * Reserve a dummy entry in the hash table; we use it to make sure there's
1250 * always one entry available when we need to split or combine a page,
1251 * because running out of space there could mean aborting a
1252 * non-serializable transaction.
1253 */
1255 HASH_ENTER, &found);
1256 Assert(!found);
1257
1268 /* Add all elements to available list, clean. */
1269 for (int i = 0; i < max_serializable_xacts; i++)
1270 {
1274 }
1291
1292 /* Initialize the rw-conflict pool */
1296
1298
1299 /* Add all elements to available list, clean. */
1300 for (int i = 0; i < max_rw_conflicts; i++)
1301 {
1304 }
1305
1306 /* Initialize the list of finished serializable transactions */
1308
1309 /* Initialize SerialControl to reflect empty SLRU. */
1311 serialControl->headPage = -1;
1315
1317
1318 /* This never changes, so let's keep a local copy. */
1320
1321 /* Pre-calculate the hash and partition lock of the scratch entry */
1324}
1325
1326static void
1328{
1329 /* This never changes, so let's keep a local copy. */
1331
1332 /* Pre-calculate the hash and partition lock of the scratch entry */
1335}
1336
1337/*
1338 * Compute the hash code associated with a PREDICATELOCKTAG.
1339 *
1340 * Because we want to use just one set of partition locks for both the
1341 * PREDICATELOCKTARGET and PREDICATELOCK hash tables, we have to make sure
1342 * that PREDICATELOCKs fall into the same partition number as their
1343 * associated PREDICATELOCKTARGETs. dynahash.c expects the partition number
1344 * to be the low-order bits of the hash code, and therefore a
1345 * PREDICATELOCKTAG's hash code must have the same low-order bits as the
1346 * associated PREDICATELOCKTARGETTAG's hash code. We achieve this with this
1347 * specialized hash function.
1348 */
1349static uint32
1350predicatelock_hash(const void *key, Size keysize)
1351{
1352 const PREDICATELOCKTAG *predicatelocktag = (const PREDICATELOCKTAG *) key;
1354
1355 Assert(keysize == sizeof(PREDICATELOCKTAG));
1356
1357 /* Look into the associated target object, and compute its hash code */
1359
1361}
1362
1363
1364/*
1365 * GetPredicateLockStatusData
1366 * Return a table containing the internal state of the predicate
1367 * lock manager for use in pg_lock_status.
1368 *
1369 * Like GetLockStatusData, this function tries to hold the partition LWLocks
1370 * for as short a time as possible by returning two arrays that simply
1371 * contain the PREDICATELOCKTARGETTAG and SERIALIZABLEXACT for each lock
1372 * table entry. Multiple copies of the same PREDICATELOCKTARGETTAG and
1373 * SERIALIZABLEXACT will likely appear.
1374 */
1377{
1379 int i;
1380 int els,
1381 el;
1384
1386
1387 /*
1388 * To ensure consistency, take simultaneous locks on all partition locks
1389 * in ascending order, then SerializableXactHashLock.
1390 */
1391 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
1394
1395 /* Get number of locks and allocate appropriately-sized arrays. */
1397 data->nelements = els;
1400
1401
1402 /* Scan through PredicateLockHash and copy contents */
1404
1405 el = 0;
1406
1408 {
1409 data->locktags[el] = predlock->tag.myTarget->tag;
1410 data->xacts[el] = *predlock->tag.myXact;
1411 el++;
1412 }
1413
1414 Assert(el == els);
1415
1416 /* Release locks in reverse order */
1418 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
1420
1421 return data;
1422}
1423
1424/*
1425 * Free up shared memory structures by pushing the oldest sxact (the one at
1426 * the front of the SummarizeOldestCommittedSxact queue) into summary form.
1427 * Each call will free exactly one SERIALIZABLEXACT structure and may also
1428 * free one or more of these structures: SERIALIZABLEXID, PREDICATELOCK,
1429 * PREDICATELOCKTARGET, RWConflictData.
1430 */
1431static void
1433{
1435
1437
1438 /*
1439 * This function is only called if there are no sxact slots available.
1440 * Some of them must belong to old, already-finished transactions, so
1441 * there should be something in FinishedSerializableTransactions list that
1442 * we can summarize. However, there's a race condition: while we were not
1443 * holding any locks, a transaction might have ended and cleaned up all
1444 * the finished sxact entries already, freeing up their sxact slots. In
1445 * that case, we have nothing to do here. The caller will find one of the
1446 * slots released by the other backend when it retries.
1447 */
1449 {
1451 return;
1452 }
1453
1454 /*
1455 * Grab the first sxact off the finished list -- this will be the earliest
1456 * commit. Remove it from the list.
1457 */
1460 dlist_delete_thoroughly(&sxact->finishedLink);
1461
1462 /* Add to SLRU summary information. */
1465 ? sxact->SeqNo.earliestOutConflictCommit : InvalidSerCommitSeqNo);
1466
1467 /* Summarize and release the detail. */
1468 ReleaseOneSerializableXact(sxact, false, true);
1469
1471}
1472
1473/*
1474 * GetSafeSnapshot
1475 * Obtain and register a snapshot for a READ ONLY DEFERRABLE
1476 * transaction. Ensures that the snapshot is "safe", i.e. a
1477 * read-only transaction running on it can execute serializably
1478 * without further checks. This requires waiting for concurrent
1479 * transactions to complete, and retrying with a new snapshot if
1480 * one of them could possibly create a conflict.
1481 *
1482 * As with GetSerializableTransactionSnapshot (which this is a subroutine
1483 * for), the passed-in Snapshot pointer should reference a static data
1484 * area that can safely be passed to GetSnapshotData.
1485 */
1486static Snapshot
1488{
1489 Snapshot snapshot;
1490
1492
1493 while (true)
1494 {
1495 /*
1496 * GetSerializableTransactionSnapshotInt is going to call
1497 * GetSnapshotData, so we need to provide it the static snapshot area
1498 * our caller passed to us. The pointer returned is actually the same
1499 * one passed to it, but we avoid assuming that here.
1500 */
1502 NULL, InvalidPid);
1503
1505 return snapshot; /* no concurrent r/w xacts; it's safe */
1506
1508
1509 /*
1510 * Wait for concurrent transactions to finish. Stop early if one of
1511 * them marked us as conflicted.
1512 */
1516 {
1520 }
1522
1524 {
1526 break; /* success */
1527 }
1528
1530
1531 /* else, need to retry... */
1534 errmsg_internal("deferrable snapshot was unsafe; trying a new one")));
1535 ReleasePredicateLocks(false, false);
1536 }
1537
1538 /*
1539 * Now we have a safe snapshot, so we don't need to do any further checks.
1540 */
1542 ReleasePredicateLocks(false, true);
1543
1544 return snapshot;
1545}
1546
1547/*
1548 * GetSafeSnapshotBlockingPids
1549 * If the specified process is currently blocked in GetSafeSnapshot,
1550 * write the process IDs of all processes that it is blocked by
1551 * into the caller-supplied buffer output[]. The list is truncated at
1552 * output_size, and the number of PIDs written into the buffer is
1553 * returned. Returns zero if the given PID is not currently blocked
1554 * in GetSafeSnapshot.
1555 */
1556int
1558{
1559 int num_written = 0;
1560 dlist_iter iter;
1562
1564
1565 /* Find blocked_pid's SERIALIZABLEXACT by linear search. */
1567 {
1569 dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
1570
1571 if (sxact->pid == blocked_pid)
1572 {
1574 break;
1575 }
1576 }
1577
1578 /* Did we find it, and is it currently waiting in GetSafeSnapshot? */
1580 {
1581 /* Traverse the list of possible unsafe conflicts collecting PIDs. */
1582 dlist_foreach(iter, &blocking_sxact->possibleUnsafeConflicts)
1583 {
1585 dlist_container(RWConflictData, inLink, iter.cur);
1586
1587 output[num_written++] = possibleUnsafeConflict->sxactOut->pid;
1588
1589 if (num_written >= output_size)
1590 break;
1591 }
1592 }
1593
1595
1596 return num_written;
1597}
1598
1599/*
1600 * Acquire a snapshot that can be used for the current transaction.
1601 *
1602 * Make sure we have a SERIALIZABLEXACT reference in MySerializableXact.
1603 * It should be current for this process and be contained in PredXact.
1604 *
1605 * The passed-in Snapshot pointer should reference a static data area that
1606 * can safely be passed to GetSnapshotData. The return value is actually
1607 * always this same pointer; no new snapshot data structure is allocated
1608 * within this function.
1609 */
1612{
1614
1615 /*
1616 * Can't use serializable mode while recovery is still active, as it is,
1617 * for example, on a hot standby. We could get here despite the check in
1618 * check_transaction_isolation() if default_transaction_isolation is set
1619 * to serializable, so phrase the hint accordingly.
1620 */
1621 if (RecoveryInProgress())
1622 ereport(ERROR,
1624 errmsg("cannot use serializable mode in a hot standby"),
1625 errdetail("\"default_transaction_isolation\" is set to \"serializable\"."),
1626 errhint("You can use \"SET default_transaction_isolation = 'repeatable read'\" to change the default.")));
1627
1628 /*
1629 * A special optimization is available for SERIALIZABLE READ ONLY
1630 * DEFERRABLE transactions -- we can wait for a suitable snapshot and
1631 * thereby avoid all SSI overhead once it's running.
1632 */
1634 return GetSafeSnapshot(snapshot);
1635
1637 NULL, InvalidPid);
1638}
1639
1640/*
1641 * Import a snapshot to be used for the current transaction.
1642 *
1643 * This is nearly the same as GetSerializableTransactionSnapshot, except that
1644 * we don't take a new snapshot, but rather use the data we're handed.
1645 *
1646 * The caller must have verified that the snapshot came from a serializable
1647 * transaction; and if we're read-write, the source transaction must not be
1648 * read-only.
1649 */
1650void
1653 int sourcepid)
1654{
1656
1657 /*
1658 * If this is called by parallel.c in a parallel worker, we don't want to
1659 * create a SERIALIZABLEXACT just yet because the leader's
1660 * SERIALIZABLEXACT will be installed with AttachSerializableXact(). We
1661 * also don't want to reject SERIALIZABLE READ ONLY DEFERRABLE in this
1662 * case, because the leader has already determined that the snapshot it
1663 * has passed us is safe. So there is nothing for us to do.
1664 */
1665 if (IsParallelWorker())
1666 return;
1667
1668 /*
1669 * We do not allow SERIALIZABLE READ ONLY DEFERRABLE transactions to
1670 * import snapshots, since there's no way to wait for a safe snapshot when
1671 * we're using the snap we're told to. (XXX instead of throwing an error,
1672 * we could just ignore the XactDeferrable flag?)
1673 */
1675 ereport(ERROR,
1677 errmsg("a snapshot-importing transaction must not be READ ONLY DEFERRABLE")));
1678
1680 sourcepid);
1681}
1682
1683/*
1684 * Guts of GetSerializableTransactionSnapshot
1685 *
1686 * If sourcevxid is valid, this is actually an import operation and we should
1687 * skip calling GetSnapshotData, because the snapshot contents are already
1688 * loaded up. HOWEVER: to avoid race conditions, we must check that the
1689 * source xact is still running after we acquire SerializableXactHashLock.
1690 * We do that by calling ProcArrayInstallImportedXmin.
1691 */
1692static Snapshot
1695 int sourcepid)
1696{
1697 PGPROC *proc;
1700 *othersxact;
1701
1702 /* We only do this for serializable transactions. Once. */
1704
1706
1707 /*
1708 * Since all parts of a serializable transaction must use the same
1709 * snapshot, it is too late to establish one after a parallel operation
1710 * has begun.
1711 */
1712 if (IsInParallelMode())
1713 elog(ERROR, "cannot establish serializable snapshot during a parallel operation");
1714
1715 proc = MyProc;
1716 Assert(proc != NULL);
1717 GET_VXID_FROM_PGPROC(vxid, *proc);
1718
1719 /*
1720 * First we get the sxact structure, which may involve looping and access
1721 * to the "finished" list to free a structure for use.
1722 *
1723 * We must hold SerializableXactHashLock when taking/checking the snapshot
1724 * to avoid race conditions, for much the same reasons that
1725 * GetSnapshotData takes the ProcArrayLock. Since we might have to
1726 * release SerializableXactHashLock to call SummarizeOldestCommittedSxact,
1727 * this means we have to create the sxact first, which is a bit annoying
1728 * (in particular, an elog(ERROR) in procarray.c would cause us to leak
1729 * the sxact). Consider refactoring to avoid this.
1730 */
1731#ifdef TEST_SUMMARIZE_SERIAL
1733#endif
1735 do
1736 {
1738 /* If null, push out committed sxact to SLRU summary & retry. */
1739 if (!sxact)
1740 {
1744 }
1745 } while (!sxact);
1746
1747 /* Get the snapshot, or check that it's safe to use */
1748 if (!sourcevxid)
1749 snapshot = GetSnapshotData(snapshot);
1750 else if (!ProcArrayInstallImportedXmin(snapshot->xmin, sourcevxid))
1751 {
1754 ereport(ERROR,
1756 errmsg("could not import the requested snapshot"),
1757 errdetail("The source process with PID %d is not running anymore.",
1758 sourcepid)));
1759 }
1760
1761 /*
1762 * If there are no serializable transactions which are not read-only, we
1763 * can "opt out" of predicate locking and conflict checking for a
1764 * read-only transaction.
1765 *
1766 * The reason this is safe is that a read-only transaction can only become
1767 * part of a dangerous structure if it overlaps a writable transaction
1768 * which in turn overlaps a writable transaction which committed before
1769 * the read-only transaction started. A new writable transaction can
1770 * overlap this one, but it can't meet the other condition of overlapping
1771 * a transaction which committed before this one started.
1772 */
1774 {
1777 return snapshot;
1778 }
1779
1780 /* Initialize the structure. */
1781 sxact->vxid = vxid;
1782 sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo;
1783 sxact->prepareSeqNo = InvalidSerCommitSeqNo;
1784 sxact->commitSeqNo = InvalidSerCommitSeqNo;
1785 dlist_init(&(sxact->outConflicts));
1786 dlist_init(&(sxact->inConflicts));
1787 dlist_init(&(sxact->possibleUnsafeConflicts));
1788 sxact->topXid = GetTopTransactionIdIfAny();
1789 sxact->finishedBefore = InvalidTransactionId;
1790 sxact->xmin = snapshot->xmin;
1791 sxact->pid = MyProcPid;
1792 sxact->pgprocno = MyProcNumber;
1793 dlist_init(&sxact->predicateLocks);
1794 dlist_node_init(&sxact->finishedLink);
1795 sxact->flags = 0;
1796 if (XactReadOnly)
1797 {
1798 dlist_iter iter;
1799
1800 sxact->flags |= SXACT_FLAG_READ_ONLY;
1801
1802 /*
1803 * Register all concurrent r/w transactions as possible conflicts; if
1804 * all of them commit without any outgoing conflicts to earlier
1805 * transactions then this snapshot can be deemed safe (and we can run
1806 * without tracking predicate locks).
1807 */
1809 {
1811
1815 {
1817 }
1818 }
1819
1820 /*
1821 * If we didn't find any possibly unsafe conflicts because every
1822 * uncommitted writable transaction turned out to be doomed, then we
1823 * can "opt out" immediately. See comments above the earlier check
1824 * for PredXact->WritableSxactCount == 0.
1825 */
1826 if (dlist_is_empty(&sxact->possibleUnsafeConflicts))
1827 {
1830 return snapshot;
1831 }
1832 }
1833 else
1834 {
1838 }
1839
1840 /* Maintain serializable global xmin info. */
1842 {
1844 PredXact->SxactGlobalXmin = snapshot->xmin;
1846 SerialSetActiveSerXmin(snapshot->xmin);
1847 }
1848 else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin))
1849 {
1852 }
1853 else
1854 {
1856 }
1857
1859 MyXactDidWrite = false; /* haven't written anything yet */
1860
1862
1864
1865 return snapshot;
1866}
1867
1868static void
1870{
1872
1873 /* Initialize the backend-local hash table of parent locks */
1875 hash_ctl.keysize = sizeof(PREDICATELOCKTARGETTAG);
1876 hash_ctl.entrysize = sizeof(LOCALPREDICATELOCK);
1877 LocalPredicateLockHash = hash_create("Local predicate lock",
1879 &hash_ctl,
1881}
1882
1883/*
1884 * Register the top level XID in SerializableXidHash.
1885 * Also store it for easy reference in MySerializableXact.
1886 */
1887void
1889{
1892 bool found;
1893
1894 /*
1895 * If we're not tracking predicate lock data for this transaction, we
1896 * should ignore the request and return quickly.
1897 */
1899 return;
1900
1901 /* We should have a valid XID and be at the top level. */
1903
1905
1906 /* This should only be done once per transaction. */
1908
1910
1911 sxidtag.xid = xid;
1913 &sxidtag,
1914 HASH_ENTER, &found);
1915 Assert(!found);
1916
1917 /* Initialize the structure. */
1918 sxid->myXact = MySerializableXact;
1920}
1921
1922
1923/*
1924 * Check whether there are any predicate locks held by any transaction
1925 * for the page at the given block number.
1926 *
1927 * Note that the transaction may be completed but not yet subject to
1928 * cleanup due to overlapping serializable transactions. This must
1929 * return valid information regardless of transaction isolation level.
1930 *
1931 * Also note that this doesn't check for a conflicting relation lock,
1932 * just a lock specifically on the given page.
1933 *
1934 * One use is to support proper behavior during GiST index vacuum.
1935 */
1936bool
1938{
1942 PREDICATELOCKTARGET *target;
1943
1945 relation->rd_locator.dbOid,
1946 relation->rd_id,
1947 blkno);
1948
1952 target = (PREDICATELOCKTARGET *)
1955 HASH_FIND, NULL);
1957
1958 return (target != NULL);
1959}
1960
1961
1962/*
1963 * Check whether a particular lock is held by this transaction.
1964 *
1965 * Important note: this function may return false even if the lock is
1966 * being held, because it uses the local lock table which is not
1967 * updated if another transaction modifies our lock list (e.g. to
1968 * split an index page). It can also return true when a coarser
1969 * granularity lock that covers this target is being held. Be careful
1970 * to only use this function in circumstances where such errors are
1971 * acceptable!
1972 */
1973static bool
1975{
1976 LOCALPREDICATELOCK *lock;
1977
1978 /* check local hash table */
1980 targettag,
1981 HASH_FIND, NULL);
1982
1983 if (!lock)
1984 return false;
1985
1986 /*
1987 * Found entry in the table, but still need to check whether it's actually
1988 * held -- it could just be a parent of some held lock.
1989 */
1990 return lock->held;
1991}
1992
1993/*
1994 * Return the parent lock tag in the lock hierarchy: the next coarser
1995 * lock that covers the provided tag.
1996 *
1997 * Returns true and sets *parent to the parent tag if one exists,
1998 * returns false if none exists.
1999 */
2000static bool
2002 PREDICATELOCKTARGETTAG *parent)
2003{
2004 switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2005 {
2007 /* relation locks have no parent lock */
2008 return false;
2009
2010 case PREDLOCKTAG_PAGE:
2011 /* parent lock is relation lock */
2015
2016 return true;
2017
2018 case PREDLOCKTAG_TUPLE:
2019 /* parent lock is page lock */
2024 return true;
2025 }
2026
2027 /* not reachable */
2028 Assert(false);
2029 return false;
2030}
2031
2032/*
2033 * Check whether the lock we are considering is already covered by a
2034 * coarser lock for our transaction.
2035 *
2036 * Like PredicateLockExists, this function might return a false
2037 * negative, but it will never return a false positive.
2038 */
2039static bool
2041{
2043 parenttag;
2044
2046
2047 /* check parents iteratively until no more */
2049 {
2052 return true;
2053 }
2054
2055 /* no more parents to check; lock is not covered */
2056 return false;
2057}
2058
2059/*
2060 * Remove the dummy entry from the predicate lock target hash, to free up some
2061 * scratch space. The caller must be holding SerializablePredicateListLock,
2062 * and must restore the entry with RestoreScratchTarget() before releasing the
2063 * lock.
2064 *
2065 * If lockheld is true, the caller is already holding the partition lock
2066 * of the partition containing the scratch entry.
2067 */
2068static void
2070{
2071 bool found;
2072
2074
2075 if (!lockheld)
2080 HASH_REMOVE, &found);
2081 Assert(found);
2082 if (!lockheld)
2084}
2085
2086/*
2087 * Re-insert the dummy entry in predicate lock target hash.
2088 */
2089static void
2091{
2092 bool found;
2093
2095
2096 if (!lockheld)
2101 HASH_ENTER, &found);
2102 Assert(!found);
2103 if (!lockheld)
2105}
2106
2107/*
2108 * Check whether the list of related predicate locks is empty for a
2109 * predicate lock target, and remove the target if it is.
2110 */
2111static void
2113{
2115
2117
2118 /* Can't remove it until no locks at this target. */
2119 if (!dlist_is_empty(&target->predicateLocks))
2120 return;
2121
2122 /* Actually remove the target. */
2124 &target->tag,
2126 HASH_REMOVE, NULL);
2127 Assert(rmtarget == target);
2128}
2129
2130/*
2131 * Delete child target locks owned by this process.
2132 * This implementation is assuming that the usage of each target tag field
2133 * is uniform. No need to make this hard if we don't have to.
2134 *
2135 * We acquire an LWLock in the case of parallel mode, because worker
2136 * backends have access to the leader's SERIALIZABLEXACT. Otherwise,
2137 * we aren't acquiring LWLocks for the predicate lock or lock
2138 * target structures associated with this transaction unless we're going
2139 * to modify them, because no other process is permitted to modify our
2140 * locks.
2141 */
2142static void
2144{
2147 dlist_mutable_iter iter;
2148
2151 if (IsInParallelMode())
2152 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2153
2154 dlist_foreach_modify(iter, &sxact->predicateLocks)
2155 {
2159
2160 predlock = dlist_container(PREDICATELOCK, xactLink, iter.cur);
2161
2162 oldlocktag = predlock->tag;
2163 Assert(oldlocktag.myXact == sxact);
2164 oldtarget = oldlocktag.myTarget;
2165 oldtargettag = oldtarget->tag;
2166
2168 {
2172
2175
2177
2178 dlist_delete(&predlock->xactLink);
2179 dlist_delete(&predlock->targetLink);
2182 &oldlocktag,
2185 HASH_REMOVE, NULL);
2187
2189
2191
2193 }
2194 }
2195 if (IsInParallelMode())
2196 LWLockRelease(&sxact->perXactPredicateListLock);
2198}
2199
2200/*
2201 * Returns the promotion limit for a given predicate lock target. This is the
2202 * max number of descendant locks allowed before promoting to the specified
2203 * tag. Note that the limit includes non-direct descendants (e.g., both tuples
2204 * and pages for a relation lock).
2205 *
2206 * Currently the default limit is 2 for a page lock, and half of the value of
2207 * max_pred_locks_per_transaction - 1 for a relation lock, to match behavior
2208 * of earlier releases when upgrading.
2209 *
2210 * TODO SSI: We should probably add additional GUCs to allow a maximum ratio
2211 * of page and tuple locks based on the pages in a relation, and the maximum
2212 * ratio of tuple locks to tuples in a page. This would provide more
2213 * generally "balanced" allocation of locks to where they are most useful,
2214 * while still allowing the absolute numbers to prevent one relation from
2215 * tying up all predicate lock resources.
2216 */
2217static int
2219{
2220 switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2221 {
2227
2228 case PREDLOCKTAG_PAGE:
2230
2231 case PREDLOCKTAG_TUPLE:
2232
2233 /*
2234 * not reachable: nothing is finer-granularity than a tuple, so we
2235 * should never try to promote to it.
2236 */
2237 Assert(false);
2238 return 0;
2239 }
2240
2241 /* not reachable */
2242 Assert(false);
2243 return 0;
2244}
2245
2246/*
2247 * For all ancestors of a newly-acquired predicate lock, increment
2248 * their child count in the parent hash table. If any of them have
2249 * more descendants than their promotion threshold, acquire the
2250 * coarsest such lock.
2251 *
2252 * Returns true if a parent lock was acquired and false otherwise.
2253 */
2254static bool
2256{
2258 nexttag,
2261 bool found,
2262 promote;
2263
2264 promote = false;
2265
2266 targettag = *reqtag;
2267
2268 /* check parents iteratively */
2270 {
2273 &targettag,
2274 HASH_ENTER,
2275 &found);
2276 if (!found)
2277 {
2278 parentlock->held = false;
2279 parentlock->childLocks = 1;
2280 }
2281 else
2282 parentlock->childLocks++;
2283
2284 if (parentlock->childLocks >
2286 {
2287 /*
2288 * We should promote to this parent lock. Continue to check its
2289 * ancestors, however, both to get their child counts right and to
2290 * check whether we should just go ahead and promote to one of
2291 * them.
2292 */
2294 promote = true;
2295 }
2296 }
2297
2298 if (promote)
2299 {
2300 /* acquire coarsest ancestor eligible for promotion */
2302 return true;
2303 }
2304 else
2305 return false;
2306}
2307
2308/*
2309 * When releasing a lock, decrement the child count on all ancestor
2310 * locks.
2311 *
2312 * This is called only when releasing a lock via
2313 * DeleteChildTargetLocks (i.e. when a lock becomes redundant because
2314 * we've acquired its parent, possibly due to promotion) or when a new
2315 * MVCC write lock makes the predicate lock unnecessary. There's no
2316 * point in calling it when locks are released at transaction end, as
2317 * this information is no longer needed.
2318 */
2319static void
2321{
2323 nexttag;
2324
2326
2328 {
2332
2338 HASH_FIND, NULL);
2339
2340 /*
2341 * There's a small chance the parent lock doesn't exist in the lock
2342 * table. This can happen if we prematurely removed it because an
2343 * index split caused the child refcount to be off.
2344 */
2345 if (parentlock == NULL)
2346 continue;
2347
2348 parentlock->childLocks--;
2349
2350 /*
2351 * Under similar circumstances the parent lock's refcount might be
2352 * zero. This only happens if we're holding that lock (otherwise we
2353 * would have removed the entry).
2354 */
2355 if (parentlock->childLocks < 0)
2356 {
2357 Assert(parentlock->held);
2358 parentlock->childLocks = 0;
2359 }
2360
2361 if ((parentlock->childLocks == 0) && (!parentlock->held))
2362 {
2366 HASH_REMOVE, NULL);
2368 }
2369 }
2370}
2371
2372/*
2373 * Indicate that a predicate lock on the given target is held by the
2374 * specified transaction. Has no effect if the lock is already held.
2375 *
2376 * This updates the lock table and the sxact's lock list, and creates
2377 * the lock target if necessary, but does *not* do anything related to
2378 * granularity promotion or the local lock table. See
2379 * PredicateLockAcquire for that.
2380 */
2381static void
2385{
2386 PREDICATELOCKTARGET *target;
2387 PREDICATELOCKTAG locktag;
2388 PREDICATELOCK *lock;
2390 bool found;
2391
2393
2395 if (IsInParallelMode())
2396 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2398
2399 /* Make sure that the target is represented. */
2400 target = (PREDICATELOCKTARGET *)
2403 HASH_ENTER_NULL, &found);
2404 if (!target)
2405 ereport(ERROR,
2407 errmsg("out of shared memory"),
2408 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2409 if (!found)
2410 dlist_init(&target->predicateLocks);
2411
2412 /* We've got the sxact and target, make sure they're joined. */
2413 locktag.myTarget = target;
2414 locktag.myXact = sxact;
2415 lock = (PREDICATELOCK *)
2418 HASH_ENTER_NULL, &found);
2419 if (!lock)
2420 ereport(ERROR,
2422 errmsg("out of shared memory"),
2423 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2424
2425 if (!found)
2426 {
2427 dlist_push_tail(&target->predicateLocks, &lock->targetLink);
2428 dlist_push_tail(&sxact->predicateLocks, &lock->xactLink);
2430 }
2431
2433 if (IsInParallelMode())
2434 LWLockRelease(&sxact->perXactPredicateListLock);
2436}
2437
2438/*
2439 * Acquire a predicate lock on the specified target for the current
2440 * connection if not already held. This updates the local lock table
2441 * and uses it to implement granularity promotion. It will consolidate
2442 * multiple locks into a coarser lock if warranted, and will release
2443 * any finer-grained locks covered by the new one.
2444 */
2445static void
2447{
2449 bool found;
2451
2452 /* Do we have the lock already, or a covering lock? */
2454 return;
2455
2457 return;
2458
2459 /* the same hash and LW lock apply to the lock target and the local lock. */
2461
2462 /* Acquire lock in local table */
2466 HASH_ENTER, &found);
2467 locallock->held = true;
2468 if (!found)
2469 locallock->childLocks = 0;
2470
2471 /* Actually create the lock */
2473
2474 /*
2475 * Lock has been acquired. Check whether it should be promoted to a
2476 * coarser granularity, or whether there are finer-granularity locks to
2477 * clean up.
2478 */
2480 {
2481 /*
2482 * Lock request was promoted to a coarser-granularity lock, and that
2483 * lock was acquired. It will delete this lock and any of its
2484 * children, so we're done.
2485 */
2486 }
2487 else
2488 {
2489 /* Clean up any finer-granularity locks */
2492 }
2493}
2494
2495
2496/*
2497 * PredicateLockRelation
2498 *
2499 * Gets a predicate lock at the relation level.
2500 * Skip if not in full serializable transaction isolation level.
2501 * Skip if this is a temporary table.
2502 * Clear any finer-grained predicate locks this session has on the relation.
2503 */
2504void
2505PredicateLockRelation(Relation relation, Snapshot snapshot)
2506{
2508
2509 if (!SerializationNeededForRead(relation, snapshot))
2510 return;
2511
2513 relation->rd_locator.dbOid,
2514 relation->rd_id);
2516}
2517
2518/*
2519 * PredicateLockPage
2520 *
2521 * Gets a predicate lock at the page level.
2522 * Skip if not in full serializable transaction isolation level.
2523 * Skip if this is a temporary table.
2524 * Skip if a coarser predicate lock already covers this page.
2525 * Clear any finer-grained predicate locks this session has on the relation.
2526 */
2527void
2528PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
2529{
2531
2532 if (!SerializationNeededForRead(relation, snapshot))
2533 return;
2534
2536 relation->rd_locator.dbOid,
2537 relation->rd_id,
2538 blkno);
2540}
2541
2542/*
2543 * PredicateLockTID
2544 *
2545 * Gets a predicate lock at the tuple level.
2546 * Skip if not in full serializable transaction isolation level.
2547 * Skip if this is a temporary table.
2548 */
2549void
2550PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot,
2552{
2554
2555 if (!SerializationNeededForRead(relation, snapshot))
2556 return;
2557
2558 /*
2559 * Return if this xact wrote it.
2560 */
2561 if (relation->rd_index == NULL)
2562 {
2563 /* If we wrote it; we already have a write lock. */
2565 return;
2566 }
2567
2568 /*
2569 * Do quick-but-not-definitive test for a relation lock first. This will
2570 * never cause a return when the relation is *not* locked, but will
2571 * occasionally let the check continue when there really *is* a relation
2572 * level lock.
2573 */
2575 relation->rd_locator.dbOid,
2576 relation->rd_id);
2577 if (PredicateLockExists(&tag))
2578 return;
2579
2581 relation->rd_locator.dbOid,
2582 relation->rd_id,
2586}
2587
2588
2589/*
2590 * DeleteLockTarget
2591 *
2592 * Remove a predicate lock target along with any locks held for it.
2593 *
2594 * Caller must hold SerializablePredicateListLock and the
2595 * appropriate hash partition lock for the target.
2596 */
2597static void
2599{
2600 dlist_mutable_iter iter;
2601
2603 LW_EXCLUSIVE));
2605
2607
2608 dlist_foreach_modify(iter, &target->predicateLocks)
2609 {
2611 dlist_container(PREDICATELOCK, targetLink, iter.cur);
2612 bool found;
2613
2614 dlist_delete(&(predlock->xactLink));
2615 dlist_delete(&(predlock->targetLink));
2616
2619 &predlock->tag,
2622 HASH_REMOVE, &found);
2623 Assert(found);
2624 }
2626
2627 /* Remove the target itself, if possible. */
2629}
2630
2631
2632/*
2633 * TransferPredicateLocksToNewTarget
2634 *
2635 * Move or copy all the predicate locks for a lock target, for use by
2636 * index page splits/combines and other things that create or replace
2637 * lock targets. If 'removeOld' is true, the old locks and the target
2638 * will be removed.
2639 *
2640 * Returns true on success, or false if we ran out of shared memory to
2641 * allocate the new target or locks. Guaranteed to always succeed if
2642 * removeOld is set (by using the scratch entry in PredicateLockTargetHash
2643 * for scratch space).
2644 *
2645 * Warning: the "removeOld" option should be used only with care,
2646 * because this function does not (indeed, can not) update other
2647 * backends' LocalPredicateLockHash. If we are only adding new
2648 * entries, this is not a problem: the local lock table is used only
2649 * as a hint, so missing entries for locks that are held are
2650 * OK. Having entries for locks that are no longer held, as can happen
2651 * when using "removeOld", is not in general OK. We can only use it
2652 * safely when replacing a lock with a coarser-granularity lock that
2653 * covers it, or if we are absolutely certain that no one will need to
2654 * refer to that lock in the future.
2655 *
2656 * Caller must hold SerializablePredicateListLock exclusively.
2657 */
2658static bool
2661 bool removeOld)
2662{
2668 bool found;
2669 bool outOfShmem = false;
2670
2672 LW_EXCLUSIVE));
2673
2678
2679 if (removeOld)
2680 {
2681 /*
2682 * Remove the dummy entry to give us scratch space, so we know we'll
2683 * be able to create the new lock target.
2684 */
2685 RemoveScratchTarget(false);
2686 }
2687
2688 /*
2689 * We must get the partition locks in ascending sequence to avoid
2690 * deadlocks. If old and new partitions are the same, we must request the
2691 * lock only once.
2692 */
2694 {
2698 }
2700 {
2704 }
2705 else
2707
2708 /*
2709 * Look for the old target. If not found, that's OK; no predicate locks
2710 * are affected, so we can just clean up and return. If it does exist,
2711 * walk its list of predicate locks and move or copy them to the new
2712 * target.
2713 */
2715 &oldtargettag,
2717 HASH_FIND, NULL);
2718
2719 if (oldtarget)
2720 {
2723 dlist_mutable_iter iter;
2724
2726 &newtargettag,
2728 HASH_ENTER_NULL, &found);
2729
2730 if (!newtarget)
2731 {
2732 /* Failed to allocate due to insufficient shmem */
2733 outOfShmem = true;
2734 goto exit;
2735 }
2736
2737 /* If we created a new entry, initialize it */
2738 if (!found)
2739 dlist_init(&newtarget->predicateLocks);
2740
2741 newpredlocktag.myTarget = newtarget;
2742
2743 /*
2744 * Loop through all the locks on the old target, replacing them with
2745 * locks on the new target.
2746 */
2748
2749 dlist_foreach_modify(iter, &oldtarget->predicateLocks)
2750 {
2752 dlist_container(PREDICATELOCK, targetLink, iter.cur);
2755
2756 newpredlocktag.myXact = oldpredlock->tag.myXact;
2757
2758 if (removeOld)
2759 {
2760 dlist_delete(&(oldpredlock->xactLink));
2761 dlist_delete(&(oldpredlock->targetLink));
2762
2765 &oldpredlock->tag,
2768 HASH_REMOVE, &found);
2769 Assert(found);
2770 }
2771
2778 &found);
2779 if (!newpredlock)
2780 {
2781 /* Out of shared memory. Undo what we've done so far. */
2784 outOfShmem = true;
2785 goto exit;
2786 }
2787 if (!found)
2788 {
2789 dlist_push_tail(&(newtarget->predicateLocks),
2790 &(newpredlock->targetLink));
2791 dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
2792 &(newpredlock->xactLink));
2793 newpredlock->commitSeqNo = oldCommitSeqNo;
2794 }
2795 else
2796 {
2797 if (newpredlock->commitSeqNo < oldCommitSeqNo)
2798 newpredlock->commitSeqNo = oldCommitSeqNo;
2799 }
2800
2801 Assert(newpredlock->commitSeqNo != 0);
2802 Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
2803 || (newpredlock->tag.myXact == OldCommittedSxact));
2804 }
2806
2807 if (removeOld)
2808 {
2809 Assert(dlist_is_empty(&oldtarget->predicateLocks));
2811 }
2812 }
2813
2814
2815exit:
2816 /* Release partition locks in reverse order of acquisition. */
2818 {
2821 }
2823 {
2826 }
2827 else
2829
2830 if (removeOld)
2831 {
2832 /* We shouldn't run out of memory if we're moving locks */
2834
2835 /* Put the scratch entry back */
2836 RestoreScratchTarget(false);
2837 }
2838
2839 return !outOfShmem;
2840}
2841
2842/*
2843 * Drop all predicate locks of any granularity from the specified relation,
2844 * which can be a heap relation or an index relation. If 'transfer' is true,
2845 * acquire a relation lock on the heap for any transactions with any lock(s)
2846 * on the specified relation.
2847 *
2848 * This requires grabbing a lot of LW locks and scanning the entire lock
2849 * target table for matches. That makes this more expensive than most
2850 * predicate lock management functions, but it will only be called for DDL
2851 * type commands that are expensive anyway, and there are fast returns when
2852 * no serializable transactions are active or the relation is temporary.
2853 *
2854 * We don't use the TransferPredicateLocksToNewTarget function because it
2855 * acquires its own locks on the partitions of the two targets involved,
2856 * and we'll already be holding all partition locks.
2857 *
2858 * We can't throw an error from here, because the call could be from a
2859 * transaction which is not serializable.
2860 *
2861 * NOTE: This is currently only called with transfer set to true, but that may
2862 * change. If we decide to clean up the locks from a table on commit of a
2863 * transaction which executed DROP TABLE, the false condition will be useful.
2864 */
2865static void
2867{
2871 Oid dbId;
2872 Oid relId;
2873 Oid heapId;
2874 int i;
2875 bool isIndex;
2876 bool found;
2878
2879 /*
2880 * Bail out quickly if there are no serializable transactions running.
2881 * It's safe to check this without taking locks because the caller is
2882 * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
2883 * would matter here can be acquired while that is held.
2884 */
2886 return;
2887
2888 if (!PredicateLockingNeededForRelation(relation))
2889 return;
2890
2891 dbId = relation->rd_locator.dbOid;
2892 relId = relation->rd_id;
2893 if (relation->rd_index == NULL)
2894 {
2895 isIndex = false;
2896 heapId = relId;
2897 }
2898 else
2899 {
2900 isIndex = true;
2901 heapId = relation->rd_index->indrelid;
2902 }
2904 Assert(transfer || !isIndex); /* index OID only makes sense with
2905 * transfer */
2906
2907 /* Retrieve first time needed, then keep. */
2909 heaptarget = NULL;
2910
2911 /* Acquire locks on all lock partitions */
2913 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
2916
2917 /*
2918 * Remove the dummy entry to give us scratch space, so we know we'll be
2919 * able to create the new lock target.
2920 */
2921 if (transfer)
2922 RemoveScratchTarget(true);
2923
2924 /* Scan through target map */
2926
2928 {
2929 dlist_mutable_iter iter;
2930
2931 /*
2932 * Check whether this is a target which needs attention.
2933 */
2935 continue; /* wrong relation id */
2936 if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId)
2937 continue; /* wrong database id */
2938 if (transfer && !isIndex
2940 continue; /* already the right lock */
2941
2942 /*
2943 * If we made it here, we have work to do. We make sure the heap
2944 * relation lock exists, then we walk the list of predicate locks for
2945 * the old target we found, moving all locks to the heap relation lock
2946 * -- unless they already hold that.
2947 */
2948
2949 /*
2950 * First make sure we have the heap relation target. We only need to
2951 * do this once.
2952 */
2953 if (transfer && heaptarget == NULL)
2954 {
2956
2962 HASH_ENTER, &found);
2963 if (!found)
2964 dlist_init(&heaptarget->predicateLocks);
2965 }
2966
2967 /*
2968 * Loop through all the locks on the old target, replacing them with
2969 * locks on the new target.
2970 */
2971 dlist_foreach_modify(iter, &oldtarget->predicateLocks)
2972 {
2974 dlist_container(PREDICATELOCK, targetLink, iter.cur);
2978
2979 /*
2980 * Remove the old lock first. This avoids the chance of running
2981 * out of lock structure entries for the hash table.
2982 */
2984 oldXact = oldpredlock->tag.myXact;
2985
2986 dlist_delete(&(oldpredlock->xactLink));
2987
2988 /*
2989 * No need for retail delete from oldtarget list, we're removing
2990 * the whole target anyway.
2991 */
2993 &oldpredlock->tag,
2994 HASH_REMOVE, &found);
2995 Assert(found);
2996
2997 if (transfer)
2998 {
3000
3002 newpredlocktag.myXact = oldXact;
3008 HASH_ENTER,
3009 &found);
3010 if (!found)
3011 {
3012 dlist_push_tail(&(heaptarget->predicateLocks),
3013 &(newpredlock->targetLink));
3014 dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
3015 &(newpredlock->xactLink));
3016 newpredlock->commitSeqNo = oldCommitSeqNo;
3017 }
3018 else
3019 {
3020 if (newpredlock->commitSeqNo < oldCommitSeqNo)
3021 newpredlock->commitSeqNo = oldCommitSeqNo;
3022 }
3023
3024 Assert(newpredlock->commitSeqNo != 0);
3025 Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
3026 || (newpredlock->tag.myXact == OldCommittedSxact));
3027 }
3028 }
3029
3031 &found);
3032 Assert(found);
3033 }
3034
3035 /* Put the scratch entry back */
3036 if (transfer)
3038
3039 /* Release locks in reverse order */
3041 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
3044}
3045
3046/*
3047 * TransferPredicateLocksToHeapRelation
3048 * For all transactions, transfer all predicate locks for the given
3049 * relation to a single relation lock on the heap.
3050 */
3051void
3053{
3054 DropAllPredicateLocksFromTable(relation, true);
3055}
3056
3057
3058/*
3059 * PredicateLockPageSplit
3060 *
3061 * Copies any predicate locks for the old page to the new page.
3062 * Skip if this is a temporary table or toast table.
3063 *
3064 * NOTE: A page split (or overflow) affects all serializable transactions,
3065 * even if it occurs in the context of another transaction isolation level.
3066 *
3067 * NOTE: This currently leaves the local copy of the locks without
3068 * information on the new lock which is in shared memory. This could cause
3069 * problems if enough page splits occur on locked pages without the processes
3070 * which hold the locks getting in and noticing.
3071 */
3072void
3075{
3078 bool success;
3079
3080 /*
3081 * Bail out quickly if there are no serializable transactions running.
3082 *
3083 * It's safe to do this check without taking any additional locks. Even if
3084 * a serializable transaction starts concurrently, we know it can't take
3085 * any SIREAD locks on the page being split because the caller is holding
3086 * the associated buffer page lock. Memory reordering isn't an issue; the
3087 * memory barrier in the LWLock acquisition guarantees that this read
3088 * occurs while the buffer page lock is held.
3089 */
3091 return;
3092
3093 if (!PredicateLockingNeededForRelation(relation))
3094 return;
3095
3099
3101 relation->rd_locator.dbOid,
3102 relation->rd_id,
3103 oldblkno);
3105 relation->rd_locator.dbOid,
3106 relation->rd_id,
3107 newblkno);
3108
3110
3111 /*
3112 * Try copying the locks over to the new page's tag, creating it if
3113 * necessary.
3114 */
3117 false);
3118
3119 if (!success)
3120 {
3121 /*
3122 * No more predicate lock entries are available. Failure isn't an
3123 * option here, so promote the page lock to a relation lock.
3124 */
3125
3126 /* Get the parent relation lock's lock tag */
3128 &newtargettag);
3129 Assert(success);
3130
3131 /*
3132 * Move the locks to the parent. This shouldn't fail.
3133 *
3134 * Note that here we are removing locks held by other backends,
3135 * leading to a possible inconsistency in their local lock hash table.
3136 * This is OK because we're replacing it with a lock that covers the
3137 * old one.
3138 */
3141 true);
3142 Assert(success);
3143 }
3144
3146}
3147
3148/*
3149 * PredicateLockPageCombine
3150 *
3151 * Combines predicate locks for two existing pages.
3152 * Skip if this is a temporary table or toast table.
3153 *
3154 * NOTE: A page combine affects all serializable transactions, even if it
3155 * occurs in the context of another transaction isolation level.
3156 */
3157void
3160{
3161 /*
3162 * Page combines differ from page splits in that we ought to be able to
3163 * remove the locks on the old page after transferring them to the new
3164 * page, instead of duplicating them. However, because we can't edit other
3165 * backends' local lock tables, removing the old lock would leave them
3166 * with an entry in their LocalPredicateLockHash for a lock they're not
3167 * holding, which isn't acceptable. So we wind up having to do the same
3168 * work as a page split, acquiring a lock on the new page and keeping the
3169 * old page locked too. That can lead to some false positives, but should
3170 * be rare in practice.
3171 */
3173}
3174
3175/*
3176 * Walk the list of in-progress serializable transactions and find the new
3177 * xmin.
3178 */
3179static void
3181{
3182 dlist_iter iter;
3183
3185
3188
3190 {
3192 dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
3193
3197 {
3202 {
3205 }
3206 else if (TransactionIdEquals(sxact->xmin,
3209 }
3210 }
3211
3213}
3214
3215/*
3216 * ReleasePredicateLocks
3217 *
3218 * Releases predicate locks based on completion of the current transaction,
3219 * whether committed or rolled back. It can also be called for a read only
3220 * transaction when it becomes impossible for the transaction to become
3221 * part of a dangerous structure.
3222 *
3223 * We do nothing unless this is a serializable transaction.
3224 *
3225 * This method must ensure that shared memory hash tables are cleaned
3226 * up in some relatively timely fashion.
3227 *
3228 * If this transaction is committing and is holding any predicate locks,
3229 * it must be added to a list of completed serializable transactions still
3230 * holding locks.
3231 *
3232 * If isReadOnlySafe is true, then predicate locks are being released before
3233 * the end of the transaction because MySerializableXact has been determined
3234 * to be RO_SAFE. In non-parallel mode we can release it completely, but it
3235 * in parallel mode we partially release the SERIALIZABLEXACT and keep it
3236 * around until the end of the transaction, allowing each backend to clear its
3237 * MySerializableXact variable and benefit from the optimization in its own
3238 * time.
3239 */
3240void
3242{
3243 bool partiallyReleasing = false;
3244 bool needToClear;
3246 dlist_mutable_iter iter;
3247
3248 /*
3249 * We can't trust XactReadOnly here, because a transaction which started
3250 * as READ WRITE can show as READ ONLY later, e.g., within
3251 * subtransactions. We want to flag a transaction as READ ONLY if it
3252 * commits without writing so that de facto READ ONLY transactions get the
3253 * benefit of some RO optimizations, so we will use this local variable to
3254 * get some cleanup logic right which is based on whether the transaction
3255 * was declared READ ONLY at the top level.
3256 */
3258
3259 /* We can't be both committing and releasing early due to RO_SAFE. */
3261
3262 /* Are we at the end of a transaction, that is, a commit or abort? */
3263 if (!isReadOnlySafe)
3264 {
3265 /*
3266 * Parallel workers mustn't release predicate locks at the end of
3267 * their transaction. The leader will do that at the end of its
3268 * transaction.
3269 */
3270 if (IsParallelWorker())
3271 {
3273 return;
3274 }
3275
3276 /*
3277 * By the time the leader in a parallel query reaches end of
3278 * transaction, it has waited for all workers to exit.
3279 */
3281
3282 /*
3283 * If the leader in a parallel query earlier stashed a partially
3284 * released SERIALIZABLEXACT for final clean-up at end of transaction
3285 * (because workers might still have been accessing it), then it's
3286 * time to restore it.
3287 */
3289 {
3294 }
3295 }
3296
3298 {
3300 return;
3301 }
3302
3304
3305 /*
3306 * If the transaction is committing, but it has been partially released
3307 * already, then treat this as a roll back. It was marked as rolled back.
3308 */
3310 isCommit = false;
3311
3312 /*
3313 * If we're called in the middle of a transaction because we discovered
3314 * that the SXACT_FLAG_RO_SAFE flag was set, then we'll partially release
3315 * it (that is, release the predicate locks and conflicts, but not the
3316 * SERIALIZABLEXACT itself) if we're the first backend to have noticed.
3317 */
3319 {
3320 /*
3321 * The leader needs to stash a pointer to it, so that it can
3322 * completely release it at end-of-transaction.
3323 */
3324 if (!IsParallelWorker())
3326
3327 /*
3328 * The first backend to reach this condition will partially release
3329 * the SERIALIZABLEXACT. All others will just clear their
3330 * backend-local state so that they stop doing SSI checks for the rest
3331 * of the transaction.
3332 */
3334 {
3337 return;
3338 }
3339 else
3340 {
3342 partiallyReleasing = true;
3343 /* ... and proceed to perform the partial release below. */
3344 }
3345 }
3351
3352 /* may not be serializable during COMMIT/ROLLBACK PREPARED */
3354
3355 /* We'd better not already be on the cleanup list. */
3357
3359
3360 /*
3361 * We don't hold XidGenLock lock here, assuming that TransactionId is
3362 * atomic!
3363 *
3364 * If this value is changing, we don't care that much whether we get the
3365 * old or new value -- it is just used to determine how far
3366 * SxactGlobalXmin must advance before this transaction can be fully
3367 * cleaned up. The worst that could happen is we wait for one more
3368 * transaction to complete before freeing some RAM; correctness of visible
3369 * behavior is not affected.
3370 */
3372
3373 /*
3374 * If it's not a commit it's either a rollback or a read-only transaction
3375 * flagged SXACT_FLAG_RO_SAFE, and we can clear our locks immediately.
3376 */
3377 if (isCommit)
3378 {
3381 /* Recognize implicit read-only transaction (commit without write). */
3382 if (!MyXactDidWrite)
3384 }
3385 else
3386 {
3387 /*
3388 * The DOOMED flag indicates that we intend to roll back this
3389 * transaction and so it should not cause serialization failures for
3390 * other transactions that conflict with it. Note that this flag might
3391 * already be set, if another backend marked this transaction for
3392 * abort.
3393 *
3394 * The ROLLED_BACK flag further indicates that ReleasePredicateLocks
3395 * has been called, and so the SerializableXact is eligible for
3396 * cleanup. This means it should not be considered when calculating
3397 * SxactGlobalXmin.
3398 */
3401
3402 /*
3403 * If the transaction was previously prepared, but is now failing due
3404 * to a ROLLBACK PREPARED or (hopefully very rare) error after the
3405 * prepare, clear the prepared flag. This simplifies conflict
3406 * checking.
3407 */
3409 }
3410
3412 {
3414 if (--(PredXact->WritableSxactCount) == 0)
3415 {
3416 /*
3417 * Release predicate locks and rw-conflicts in for all committed
3418 * transactions. There are no longer any transactions which might
3419 * conflict with the locks and no chance for new transactions to
3420 * overlap. Similarly, existing conflicts in can't cause pivots,
3421 * and any conflicts in which could have completed a dangerous
3422 * structure would already have caused a rollback, so any
3423 * remaining ones must be benign.
3424 */
3426 }
3427 }
3428 else
3429 {
3430 /*
3431 * Read-only transactions: clear the list of transactions that might
3432 * make us unsafe. Note that we use 'inLink' for the iteration as
3433 * opposed to 'outLink' for the r/w xacts.
3434 */
3436 {
3438 dlist_container(RWConflictData, inLink, iter.cur);
3439
3442
3444 }
3445 }
3446
3447 /* Check for conflict out to old committed transactions. */
3448 if (isCommit
3451 {
3452 /*
3453 * we don't know which old committed transaction we conflicted with,
3454 * so be conservative and use FirstNormalSerCommitSeqNo here
3455 */
3459 }
3460
3461 /*
3462 * Release all outConflicts to committed transactions. If we're rolling
3463 * back clear them all. Set SXACT_FLAG_CONFLICT_OUT if any point to
3464 * previously committed transactions.
3465 */
3467 {
3469 dlist_container(RWConflictData, outLink, iter.cur);
3470
3471 if (isCommit
3473 && SxactIsCommitted(conflict->sxactIn))
3474 {
3476 || conflict->sxactIn->prepareSeqNo < MySerializableXact->SeqNo.earliestOutConflictCommit)
3479 }
3480
3481 if (!isCommit
3482 || SxactIsCommitted(conflict->sxactIn)
3483 || (conflict->sxactIn->SeqNo.lastCommitBeforeSnapshot >= PredXact->LastSxactCommitSeqNo))
3485 }
3486
3487 /*
3488 * Release all inConflicts from committed and read-only transactions. If
3489 * we're rolling back, clear them all.
3490 */
3492 {
3494 dlist_container(RWConflictData, inLink, iter.cur);
3495
3496 if (!isCommit
3497 || SxactIsCommitted(conflict->sxactOut)
3498 || SxactIsReadOnly(conflict->sxactOut))
3500 }
3501
3503 {
3504 /*
3505 * Remove ourselves from the list of possible conflicts for concurrent
3506 * READ ONLY transactions, flagging them as unsafe if we have a
3507 * conflict out. If any are waiting DEFERRABLE transactions, wake them
3508 * up if they are known safe or known unsafe.
3509 */
3511 {
3513 dlist_container(RWConflictData, outLink, iter.cur);
3514
3515 roXact = possibleUnsafeConflict->sxactIn;
3518
3519 /* Mark conflicted if necessary. */
3520 if (isCommit
3524 <= roXact->SeqNo.lastCommitBeforeSnapshot))
3525 {
3526 /*
3527 * This releases possibleUnsafeConflict (as well as all other
3528 * possible conflicts for roXact)
3529 */
3531 }
3532 else
3533 {
3535
3536 /*
3537 * If we were the last possible conflict, flag it safe. The
3538 * transaction can now safely release its predicate locks (but
3539 * that transaction's backend has to do that itself).
3540 */
3541 if (dlist_is_empty(&roXact->possibleUnsafeConflicts))
3542 roXact->flags |= SXACT_FLAG_RO_SAFE;
3543 }
3544
3545 /*
3546 * Wake up the process for a waiting DEFERRABLE transaction if we
3547 * now know it's either safe or conflicted.
3548 */
3551 ProcSendSignal(roXact->pgprocno);
3552 }
3553 }
3554
3555 /*
3556 * Check whether it's time to clean up old transactions. This can only be
3557 * done when the last serializable transaction with the oldest xmin among
3558 * serializable transactions completes. We then find the "new oldest"
3559 * xmin and purge any transactions which finished before this transaction
3560 * was launched.
3561 *
3562 * For parallel queries in read-only transactions, it might run twice. We
3563 * only release the reference on the first call.
3564 */
3565 needToClear = false;
3566 if ((partiallyReleasing ||
3570 {
3572 if (--(PredXact->SxactGlobalXminCount) == 0)
3573 {
3575 needToClear = true;
3576 }
3577 }
3578
3580
3582
3583 /* Add this to the list of transactions to check for later cleanup. */
3584 if (isCommit)
3587
3588 /*
3589 * If we're releasing a RO_SAFE transaction in parallel mode, we'll only
3590 * partially release it. That's necessary because other backends may have
3591 * a reference to it. The leader will release the SERIALIZABLEXACT itself
3592 * at the end of the transaction after workers have stopped running.
3593 */
3594 if (!isCommit)
3597 false);
3598
3600
3601 if (needToClear)
3603
3605}
3606
3607static void
3609{
3611 MyXactDidWrite = false;
3612
3613 /* Delete per-transaction lock table */
3615 {
3618 }
3619}
3620
3621/*
3622 * Clear old predicate locks, belonging to committed transactions that are no
3623 * longer interesting to any in-progress transaction.
3624 */
3625static void
3627{
3628 dlist_mutable_iter iter;
3629
3630 /*
3631 * Loop through finished transactions. They are in commit order, so we can
3632 * stop as soon as we find one that's still interesting.
3633 */
3637 {
3639 dlist_container(SERIALIZABLEXACT, finishedLink, iter.cur);
3640
3644 {
3645 /*
3646 * This transaction committed before any in-progress transaction
3647 * took its snapshot. It's no longer interesting.
3648 */
3650 dlist_delete_thoroughly(&finishedSxact->finishedLink);
3653 }
3654 else if (finishedSxact->commitSeqNo > PredXact->HavePartialClearedThrough
3655 && finishedSxact->commitSeqNo <= PredXact->CanPartialClearThrough)
3656 {
3657 /*
3658 * Any active transactions that took their snapshot before this
3659 * transaction committed are read-only, so we can clear part of
3660 * its state.
3661 */
3663
3665 {
3666 /* A read-only transaction can be removed entirely */
3667 dlist_delete_thoroughly(&(finishedSxact->finishedLink));
3669 }
3670 else
3671 {
3672 /*
3673 * A read-write transaction can only be partially cleared. We
3674 * need to keep the SERIALIZABLEXACT but can release the
3675 * SIREAD locks and conflicts in.
3676 */
3678 }
3679
3682 }
3683 else
3684 {
3685 /* Still interesting. */
3686 break;
3687 }
3688 }
3690
3691 /*
3692 * Loop through predicate locks on dummy transaction for summarized data.
3693 */
3696 {
3698 dlist_container(PREDICATELOCK, xactLink, iter.cur);
3700
3702 Assert(predlock->commitSeqNo != 0);
3703 Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3706
3707 /*
3708 * If this lock originally belonged to an old enough transaction, we
3709 * can release it.
3710 */
3712 {
3713 PREDICATELOCKTAG tag;
3714 PREDICATELOCKTARGET *target;
3718
3719 tag = predlock->tag;
3720 target = tag.myTarget;
3721 targettag = target->tag;
3724
3726
3727 dlist_delete(&(predlock->targetLink));
3728 dlist_delete(&(predlock->xactLink));
3729
3733 HASH_REMOVE, NULL);
3735
3737 }
3738 }
3739
3742}
3743
3744/*
3745 * This is the normal way to delete anything from any of the predicate
3746 * locking hash tables. Given a transaction which we know can be deleted:
3747 * delete all predicate locks held by that transaction and any predicate
3748 * lock targets which are now unreferenced by a lock; delete all conflicts
3749 * for the transaction; delete all xid values for the transaction; then
3750 * delete the transaction.
3751 *
3752 * When the partial flag is set, we can release all predicate locks and
3753 * in-conflict information -- we've established that there are no longer
3754 * any overlapping read write transactions for which this transaction could
3755 * matter -- but keep the transaction entry itself and any outConflicts.
3756 *
3757 * When the summarize flag is set, we've run short of room for sxact data
3758 * and must summarize to the SLRU. Predicate locks are transferred to a
3759 * dummy "old" transaction, with duplicate locks on a single target
3760 * collapsing to a single lock with the "latest" commitSeqNo from among
3761 * the conflicting locks..
3762 */
3763static void
3765 bool summarize)
3766{
3768 dlist_mutable_iter iter;
3769
3770 Assert(sxact != NULL);
3772 Assert(partial || !SxactIsOnFinishedList(sxact));
3774
3775 /*
3776 * First release all the predicate locks held by this xact (or transfer
3777 * them to OldCommittedSxact if summarize is true)
3778 */
3780 if (IsInParallelMode())
3781 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
3782 dlist_foreach_modify(iter, &sxact->predicateLocks)
3783 {
3785 dlist_container(PREDICATELOCK, xactLink, iter.cur);
3786 PREDICATELOCKTAG tag;
3787 PREDICATELOCKTARGET *target;
3791
3792 tag = predlock->tag;
3793 target = tag.myTarget;
3794 targettag = target->tag;
3797
3799
3800 dlist_delete(&predlock->targetLink);
3801
3805 HASH_REMOVE, NULL);
3806 if (summarize)
3807 {
3808 bool found;
3809
3810 /* Fold into dummy transaction list. */
3815 HASH_ENTER_NULL, &found);
3816 if (!predlock)
3817 ereport(ERROR,
3819 errmsg("out of shared memory"),
3820 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
3821 if (found)
3822 {
3823 Assert(predlock->commitSeqNo != 0);
3824 Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3825 if (predlock->commitSeqNo < sxact->commitSeqNo)
3826 predlock->commitSeqNo = sxact->commitSeqNo;
3827 }
3828 else
3829 {
3831 &predlock->targetLink);
3833 &predlock->xactLink);
3834 predlock->commitSeqNo = sxact->commitSeqNo;
3835 }
3836 }
3837 else
3839
3841 }
3842
3843 /*
3844 * Rather than retail removal, just re-init the head after we've run
3845 * through the list.
3846 */
3847 dlist_init(&sxact->predicateLocks);
3848
3849 if (IsInParallelMode())
3850 LWLockRelease(&sxact->perXactPredicateListLock);
3852
3853 sxidtag.xid = sxact->topXid;
3855
3856 /* Release all outConflicts (unless 'partial' is true) */
3857 if (!partial)
3858 {
3859 dlist_foreach_modify(iter, &sxact->outConflicts)
3860 {
3862 dlist_container(RWConflictData, outLink, iter.cur);
3863
3864 if (summarize)
3865 conflict->sxactIn->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
3867 }
3868 }
3869
3870 /* Release all inConflicts. */
3871 dlist_foreach_modify(iter, &sxact->inConflicts)
3872 {
3874 dlist_container(RWConflictData, inLink, iter.cur);
3875
3876 if (summarize)
3877 conflict->sxactOut->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
3879 }
3880
3881 /* Finally, get rid of the xid and the record of the transaction itself. */
3882 if (!partial)
3883 {
3884 if (sxidtag.xid != InvalidTransactionId)
3887 }
3888
3890}
3891
3892/*
3893 * Tests whether the given top level transaction is concurrent with
3894 * (overlaps) our current transaction.
3895 *
3896 * We need to identify the top level transaction for SSI, anyway, so pass
3897 * that to this function to save the overhead of checking the snapshot's
3898 * subxip array.
3899 */
3900static bool
3902{
3903 Snapshot snap;
3904
3907
3909
3910 if (TransactionIdPrecedes(xid, snap->xmin))
3911 return false;
3912
3913 if (TransactionIdFollowsOrEquals(xid, snap->xmax))
3914 return true;
3915
3916 return pg_lfind32(xid, snap->xip, snap->xcnt);
3917}
3918
3919bool
3921{
3922 if (!SerializationNeededForRead(relation, snapshot))
3923 return false;
3924
3925 /* Check if someone else has already decided that we need to die */
3927 {
3928 ereport(ERROR,
3930 errmsg("could not serialize access due to read/write dependencies among transactions"),
3931 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
3932 errhint("The transaction might succeed if retried.")));
3933 }
3934
3935 return true;
3936}
3937
3938/*
3939 * CheckForSerializableConflictOut
3940 * A table AM is reading a tuple that has been modified. If it determines
3941 * that the tuple version it is reading is not visible to us, it should
3942 * pass in the top level xid of the transaction that created it.
3943 * Otherwise, if it determines that it is visible to us but it has been
3944 * deleted or there is a newer version available due to an update, it
3945 * should pass in the top level xid of the modifying transaction.
3946 *
3947 * This function will check for overlap with our own transaction. If the given
3948 * xid is also serializable and the transactions overlap (i.e., they cannot see
3949 * each other's writes), then we have a conflict out.
3950 */
3951void
3953{
3957
3958 if (!SerializationNeededForRead(relation, snapshot))
3959 return;
3960
3961 /* Check if someone else has already decided that we need to die */
3963 {
3964 ereport(ERROR,
3966 errmsg("could not serialize access due to read/write dependencies among transactions"),
3967 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
3968 errhint("The transaction might succeed if retried.")));
3969 }
3971
3973 return;
3974
3975 /*
3976 * Find sxact or summarized info for the top level xid.
3977 */
3978 sxidtag.xid = xid;
3980 sxid = (SERIALIZABLEXID *)
3982 if (!sxid)
3983 {
3984 /*
3985 * Transaction not found in "normal" SSI structures. Check whether it
3986 * got pushed out to SLRU storage for "old committed" transactions.
3987 */
3989
3991 if (conflictCommitSeqNo != 0)
3992 {
3997 ereport(ERROR,
3999 errmsg("could not serialize access due to read/write dependencies among transactions"),
4000 errdetail_internal("Reason code: Canceled on conflict out to old pivot %u.", xid),
4001 errhint("The transaction might succeed if retried.")));
4002
4005 ereport(ERROR,
4007 errmsg("could not serialize access due to read/write dependencies among transactions"),
4008 errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction %u.", xid),
4009 errhint("The transaction might succeed if retried.")));
4010
4012 }
4013
4014 /* It's not serializable or otherwise not important. */
4016 return;
4017 }
4018 sxact = sxid->myXact;
4019 Assert(TransactionIdEquals(sxact->topXid, xid));
4021 {
4022 /* Can't conflict with ourself or a transaction that will roll back. */
4024 return;
4025 }
4026
4027 /*
4028 * We have a conflict out to a transaction which has a conflict out to a
4029 * summarized transaction. That summarized transaction must have
4030 * committed first, and we can't tell when it committed in relation to our
4031 * snapshot acquisition, so something needs to be canceled.
4032 */
4034 {
4035 if (!SxactIsPrepared(sxact))
4036 {
4037 sxact->flags |= SXACT_FLAG_DOOMED;
4039 return;
4040 }
4041 else
4042 {
4044 ereport(ERROR,
4046 errmsg("could not serialize access due to read/write dependencies among transactions"),
4047 errdetail_internal("Reason code: Canceled on conflict out to old pivot."),
4048 errhint("The transaction might succeed if retried.")));
4049 }
4050 }
4051
4052 /*
4053 * If this is a read-only transaction and the writing transaction has
4054 * committed, and it doesn't have a rw-conflict to a transaction which
4055 * committed before it, no conflict.
4056 */
4061 || MySerializableXact->SeqNo.lastCommitBeforeSnapshot < sxact->SeqNo.earliestOutConflictCommit))
4062 {
4063 /* Read-only transaction will appear to run first. No conflict. */
4065 return;
4066 }
4067
4068 if (!XidIsConcurrent(xid))
4069 {
4070 /* This write was already in our snapshot; no conflict. */
4072 return;
4073 }
4074
4076 {
4077 /* We don't want duplicate conflict records in the list. */
4079 return;
4080 }
4081
4082 /*
4083 * Flag the conflict. But first, if this conflict creates a dangerous
4084 * structure, ereport an error.
4085 */
4088}
4089
4090/*
4091 * Check a particular target for rw-dependency conflict in. A subroutine of
4092 * CheckForSerializableConflictIn().
4093 */
4094static void
4096{
4099 PREDICATELOCKTARGET *target;
4102 dlist_mutable_iter iter;
4103
4105
4106 /*
4107 * The same hash and LW lock apply to the lock target and the lock itself.
4108 */
4112 target = (PREDICATELOCKTARGET *)
4115 HASH_FIND, NULL);
4116 if (!target)
4117 {
4118 /* Nothing has this target locked; we're done here. */
4120 return;
4121 }
4122
4123 /*
4124 * Each lock for an overlapping transaction represents a conflict: a
4125 * rw-dependency in to this transaction.
4126 */
4128
4129 dlist_foreach_modify(iter, &target->predicateLocks)
4130 {
4132 dlist_container(PREDICATELOCK, targetLink, iter.cur);
4133 SERIALIZABLEXACT *sxact = predlock->tag.myXact;
4134
4136 {
4137 /*
4138 * If we're getting a write lock on a tuple, we don't need a
4139 * predicate (SIREAD) lock on the same tuple. We can safely remove
4140 * our SIREAD lock, but we'll defer doing so until after the loop
4141 * because that requires upgrading to an exclusive partition lock.
4142 *
4143 * We can't use this optimization within a subtransaction because
4144 * the subtransaction could roll back, and we would be left
4145 * without any lock at the top level.
4146 */
4147 if (!IsSubTransaction()
4149 {
4151 mypredlocktag = predlock->tag;
4152 }
4153 }
4154 else if (!SxactIsDoomed(sxact)
4157 sxact->finishedBefore))
4159 {
4162
4163 /*
4164 * Re-check after getting exclusive lock because the other
4165 * transaction may have flagged a conflict.
4166 */
4167 if (!SxactIsDoomed(sxact)
4170 sxact->finishedBefore))
4172 {
4174 }
4175
4178 }
4179 }
4182
4183 /*
4184 * If we found one of our own SIREAD locks to remove, remove it now.
4185 *
4186 * At this point our transaction already has a RowExclusiveLock on the
4187 * relation, so we are OK to drop the predicate lock on the tuple, if
4188 * found, without fearing that another write against the tuple will occur
4189 * before the MVCC information makes it to the buffer.
4190 */
4191 if (mypredlock != NULL)
4192 {
4195
4197 if (IsInParallelMode())
4201
4202 /*
4203 * Remove the predicate lock from shared memory, if it wasn't removed
4204 * while the locks were released. One way that could happen is from
4205 * autovacuum cleaning up an index.
4206 */
4213 HASH_FIND, NULL);
4214 if (rmpredlock != NULL)
4215 {
4217
4218 dlist_delete(&(mypredlock->targetLink));
4219 dlist_delete(&(mypredlock->xactLink));
4220
4225 HASH_REMOVE, NULL);
4227
4229 }
4230
4233 if (IsInParallelMode())
4236
4237 if (rmpredlock != NULL)
4238 {
4239 /*
4240 * Remove entry in local lock table if it exists. It's OK if it
4241 * doesn't exist; that means the lock was transferred to a new
4242 * target by a different backend.
4243 */
4246 HASH_REMOVE, NULL);
4247
4249 }
4250 }
4251}
4252
4253/*
4254 * CheckForSerializableConflictIn
4255 * We are writing the given tuple. If that indicates a rw-conflict
4256 * in from another serializable transaction, take appropriate action.
4257 *
4258 * Skip checking for any granularity for which a parameter is missing.
4259 *
4260 * A tuple update or delete is in conflict if we have a predicate lock
4261 * against the relation or page in which the tuple exists, or against the
4262 * tuple itself.
4263 */
4264void
4266{
4268
4269 if (!SerializationNeededForWrite(relation))
4270 return;
4271
4272 /* Check if someone else has already decided that we need to die */
4274 ereport(ERROR,
4276 errmsg("could not serialize access due to read/write dependencies among transactions"),
4277 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict in checking."),
4278 errhint("The transaction might succeed if retried.")));
4279
4280 /*
4281 * We're doing a write which might cause rw-conflicts now or later.
4282 * Memorize that fact.
4283 */
4284 MyXactDidWrite = true;
4285
4286 /*
4287 * It is important that we check for locks from the finest granularity to
4288 * the coarsest granularity, so that granularity promotion doesn't cause
4289 * us to miss a lock. The new (coarser) lock will be acquired before the
4290 * old (finer) locks are released.
4291 *
4292 * It is not possible to take and hold a lock across the checks for all
4293 * granularities because each target could be in a separate partition.
4294 */
4295 if (tid != NULL)
4296 {
4298 relation->rd_locator.dbOid,
4299 relation->rd_id,
4303 }
4304
4305 if (blkno != InvalidBlockNumber)
4306 {
4308 relation->rd_locator.dbOid,
4309 relation->rd_id,
4310 blkno);
4312 }
4313
4315 relation->rd_locator.dbOid,
4316 relation->rd_id);
4318}
4319
4320/*
4321 * CheckTableForSerializableConflictIn
4322 * The entire table is going through a DDL-style logical mass delete
4323 * like TRUNCATE or DROP TABLE. If that causes a rw-conflict in from
4324 * another serializable transaction, take appropriate action.
4325 *
4326 * While these operations do not operate entirely within the bounds of
4327 * snapshot isolation, they can occur inside a serializable transaction, and
4328 * will logically occur after any reads which saw rows which were destroyed
4329 * by these operations, so we do what we can to serialize properly under
4330 * SSI.
4331 *
4332 * The relation passed in must be a heap relation. Any predicate lock of any
4333 * granularity on the heap will cause a rw-conflict in to this transaction.
4334 * Predicate locks on indexes do not matter because they only exist to guard
4335 * against conflicting inserts into the index, and this is a mass *delete*.
4336 * When a table is truncated or dropped, the index will also be truncated
4337 * or dropped, and we'll deal with locks on the index when that happens.
4338 *
4339 * Dropping or truncating a table also needs to drop any existing predicate
4340 * locks on heap tuples or pages, because they're about to go away. This
4341 * should be done before altering the predicate locks because the transaction
4342 * could be rolled back because of a conflict, in which case the lock changes
4343 * are not needed. (At the moment, we don't actually bother to drop the
4344 * existing locks on a dropped or truncated table at the moment. That might
4345 * lead to some false positives, but it doesn't seem worth the trouble.)
4346 */
4347void
4349{
4351 PREDICATELOCKTARGET *target;
4352 Oid dbId;
4353 Oid heapId;
4354 int i;
4355
4356 /*
4357 * Bail out quickly if there are no serializable transactions running.
4358 * It's safe to check this without taking locks because the caller is
4359 * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
4360 * would matter here can be acquired while that is held.
4361 */
4363 return;
4364
4365 if (!SerializationNeededForWrite(relation))
4366 return;
4367
4368 /*
4369 * We're doing a write which might cause rw-conflicts now or later.
4370 * Memorize that fact.
4371 */
4372 MyXactDidWrite = true;
4373
4374 Assert(relation->rd_index == NULL); /* not an index relation */
4375
4376 dbId = relation->rd_locator.dbOid;
4377 heapId = relation->rd_id;
4378
4380 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
4383
4384 /* Scan through target list */
4386
4387 while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
4388 {
4389 dlist_mutable_iter iter;
4390
4391 /*
4392 * Check whether this is a target which needs attention.
4393 */
4395 continue; /* wrong relation id */
4396 if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId)
4397 continue; /* wrong database id */
4398
4399 /*
4400 * Loop through locks for this target and flag conflicts.
4401 */
4402 dlist_foreach_modify(iter, &target->predicateLocks)
4403 {
4405 dlist_container(PREDICATELOCK, targetLink, iter.cur);
4406
4407 if (predlock->tag.myXact != MySerializableXact
4409 {
4411 }
4412 }
4413 }
4414
4415 /* Release locks in reverse order */
4417 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
4420}
4421
4422
4423/*
4424 * Flag a rw-dependency between two serializable transactions.
4425 *
4426 * The caller is responsible for ensuring that we have a LW lock on
4427 * the transaction hash table.
4428 */
4429static void
4431{
4432 Assert(reader != writer);
4433
4434 /* First, see if this conflict causes failure. */
4436
4437 /* Actually do the conflict flagging. */
4438 if (reader == OldCommittedSxact)
4440 else if (writer == OldCommittedSxact)
4442 else
4443 SetRWConflict(reader, writer);
4444}
4445
4446/*----------------------------------------------------------------------------
4447 * We are about to add a RW-edge to the dependency graph - check that we don't
4448 * introduce a dangerous structure by doing so, and abort one of the
4449 * transactions if so.
4450 *
4451 * A serialization failure can only occur if there is a dangerous structure
4452 * in the dependency graph:
4453 *
4454 * Tin ------> Tpivot ------> Tout
4455 * rw rw
4456 *
4457 * Furthermore, Tout must commit first.
4458 *
4459 * One more optimization is that if Tin is declared READ ONLY (or commits
4460 * without writing), we can only have a problem if Tout committed before Tin
4461 * acquired its snapshot.
4462 *----------------------------------------------------------------------------
4463 */
4464static void
4467{
4468 bool failure;
4469
4471
4472 failure = false;
4473
4474 /*------------------------------------------------------------------------
4475 * Check for already-committed writer with rw-conflict out flagged
4476 * (conflict-flag on W means that T2 committed before W):
4477 *
4478 * R ------> W ------> T2
4479 * rw rw
4480 *
4481 * That is a dangerous structure, so we must abort. (Since the writer
4482 * has already committed, we must be the reader)
4483 *------------------------------------------------------------------------
4484 */
4487 failure = true;
4488
4489 /*------------------------------------------------------------------------
4490 * Check whether the writer has become a pivot with an out-conflict
4491 * committed transaction (T2), and T2 committed first:
4492 *
4493 * R ------> W ------> T2
4494 * rw rw
4495 *
4496 * Because T2 must've committed first, there is no anomaly if:
4497 * - the reader committed before T2
4498 * - the writer committed before T2
4499 * - the reader is a READ ONLY transaction and the reader was concurrent
4500 * with T2 (= reader acquired its snapshot before T2 committed)
4501 *
4502 * We also handle the case that T2 is prepared but not yet committed
4503 * here. In that case T2 has already checked for conflicts, so if it
4504 * commits first, making the above conflict real, it's too late for it
4505 * to abort.
4506 *------------------------------------------------------------------------
4507 */
4509 failure = true;
4510 else if (!failure)
4511 {
4512 dlist_iter iter;
4513
4514 dlist_foreach(iter, &writer->outConflicts)
4515 {
4517 dlist_container(RWConflictData, outLink, iter.cur);
4518 SERIALIZABLEXACT *t2 = conflict->sxactIn;
4519
4520 if (SxactIsPrepared(t2)
4521 && (!SxactIsCommitted(reader)
4522 || t2->prepareSeqNo <= reader->commitSeqNo)
4524 || t2->prepareSeqNo <= writer->commitSeqNo)
4525 && (!SxactIsReadOnly(reader)
4526 || t2->prepareSeqNo <= reader->SeqNo.lastCommitBeforeSnapshot))
4527 {
4528 failure = true;
4529 break;
4530 }
4531 }
4532 }
4533
4534 /*------------------------------------------------------------------------
4535 * Check whether the reader has become a pivot with a writer
4536 * that's committed (or prepared):
4537 *
4538 * T0 ------> R ------> W
4539 * rw rw
4540 *
4541 * Because W must've committed first for an anomaly to occur, there is no
4542 * anomaly if:
4543 * - T0 committed before the writer
4544 * - T0 is READ ONLY, and overlaps the writer
4545 *------------------------------------------------------------------------
4546 */
4547 if (!failure && SxactIsPrepared(writer) && !SxactIsReadOnly(reader))
4548 {
4549 if (SxactHasSummaryConflictIn(reader))
4550 {
4551 failure = true;
4552 }
4553 else
4554 {
4555 dlist_iter iter;
4556
4557 /*
4558 * The unconstify is needed as we have no const version of
4559 * dlist_foreach().
4560 */
4561 dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->inConflicts)
4562 {
4563 const RWConflict conflict =
4564 dlist_container(RWConflictData, inLink, iter.cur);
4565 const SERIALIZABLEXACT *t0 = conflict->sxactOut;
4566
4567 if (!SxactIsDoomed(t0)
4568 && (!SxactIsCommitted(t0)
4569 || t0->commitSeqNo >= writer->prepareSeqNo)
4570 && (!SxactIsReadOnly(t0)
4571 || t0->SeqNo.lastCommitBeforeSnapshot >= writer->prepareSeqNo))
4572 {
4573 failure = true;
4574 break;
4575 }
4576 }
4577 }
4578 }
4579
4580 if (failure)
4581 {
4582 /*
4583 * We have to kill a transaction to avoid a possible anomaly from
4584 * occurring. If the writer is us, we can just ereport() to cause a
4585 * transaction abort. Otherwise we flag the writer for termination,
4586 * causing it to abort when it tries to commit. However, if the writer
4587 * is a prepared transaction, already prepared, we can't abort it
4588 * anymore, so we have to kill the reader instead.
4589 */
4591 {
4593 ereport(ERROR,
4595 errmsg("could not serialize access due to read/write dependencies among transactions"),
4596 errdetail_internal("Reason code: Canceled on identification as a pivot, during write."),
4597 errhint("The transaction might succeed if retried.")));
4598 }
4599 else if (SxactIsPrepared(writer))
4600 {
4602
4603 /* if we're not the writer, we have to be the reader */
4604 Assert(MySerializableXact == reader);
4605 ereport(ERROR,
4607 errmsg("could not serialize access due to read/write dependencies among transactions"),
4608 errdetail_internal("Reason code: Canceled on conflict out to pivot %u, during read.", writer->topXid),
4609 errhint("The transaction might succeed if retried.")));
4610 }
4611 writer->flags |= SXACT_FLAG_DOOMED;
4612 }
4613}
4614
4615/*
4616 * PreCommit_CheckForSerializationFailure
4617 * Check for dangerous structures in a serializable transaction
4618 * at commit.
4619 *
4620 * We're checking for a dangerous structure as each conflict is recorded.
4621 * The only way we could have a problem at commit is if this is the "out"
4622 * side of a pivot, and neither the "in" side nor the pivot has yet
4623 * committed.
4624 *
4625 * If a dangerous structure is found, the pivot (the near conflict) is
4626 * marked for death, because rolling back another transaction might mean
4627 * that we fail without ever making progress. This transaction is
4628 * committing writes, so letting it commit ensures progress. If we
4629 * canceled the far conflict, it might immediately fail again on retry.
4630 */
4631void
4633{
4635
4637 return;
4638
4640
4642
4643 /*
4644 * Check if someone else has already decided that we need to die. Since
4645 * we set our own DOOMED flag when partially releasing, ignore in that
4646 * case.
4647 */
4650 {
4652 ereport(ERROR,
4654 errmsg("could not serialize access due to read/write dependencies among transactions"),
4655 errdetail_internal("Reason code: Canceled on identification as a pivot, during commit attempt."),
4656 errhint("The transaction might succeed if retried.")));
4657 }
4658
4660 {
4663
4664 if (!SxactIsCommitted(nearConflict->sxactOut)
4665 && !SxactIsDoomed(nearConflict->sxactOut))
4666 {
4668
4669 dlist_foreach(far_iter, &nearConflict->sxactOut->inConflicts)
4670 {
4673
4674 if (farConflict->sxactOut == MySerializableXact
4675 || (!SxactIsCommitted(farConflict->sxactOut)
4676 && !SxactIsReadOnly(farConflict->sxactOut)
4677 && !SxactIsDoomed(farConflict->sxactOut)))
4678 {
4679 /*
4680 * Normally, we kill the pivot transaction to make sure we
4681 * make progress if the failing transaction is retried.
4682 * However, we can't kill it if it's already prepared, so
4683 * in that case we commit suicide instead.
4684 */
4685 if (SxactIsPrepared(nearConflict->sxactOut))
4686 {
4688 ereport(ERROR,
4690 errmsg("could not serialize access due to read/write dependencies among transactions"),
4691 errdetail_internal("Reason code: Canceled on commit attempt with conflict in from prepared pivot."),
4692 errhint("The transaction might succeed if retried.")));
4693 }
4694 nearConflict->sxactOut->flags |= SXACT_FLAG_DOOMED;
4695 break;
4696 }
4697 }
4698 }
4699 }
4700
4703
4705}
4706
4707/*------------------------------------------------------------------------*/
4708
4709/*
4710 * Two-phase commit support
4711 */
4712
4713/*
4714 * AtPrepare_Locks
4715 * Do the preparatory work for a PREPARE: make 2PC state file
4716 * records for all predicate locks currently held.
4717 */
4718void
4720{
4723 TwoPhasePredicateXactRecord *xactRecord;
4724 TwoPhasePredicateLockRecord *lockRecord;
4725 dlist_iter iter;
4726
4728 xactRecord = &(record.data.xactRecord);
4729 lockRecord = &(record.data.lockRecord);
4730
4732 return;
4733
4734 /* Generate an xact record for our SERIALIZABLEXACT */
4736 xactRecord->xmin = MySerializableXact->xmin;
4737 xactRecord->flags = MySerializableXact->flags;
4738
4739 /*
4740 * Note that we don't include the list of conflicts in our out in the
4741 * statefile, because new conflicts can be added even after the
4742 * transaction prepares. We'll just make a conservative assumption during
4743 * recovery instead.
4744 */
4745
4747 &record, sizeof(record));
4748
4749 /*
4750 * Generate a lock record for each lock.
4751 *
4752 * To do this, we need to walk the predicate lock list in our sxact rather
4753 * than using the local predicate lock table because the latter is not
4754 * guaranteed to be accurate.
4755 */
4757
4758 /*
4759 * No need to take sxact->perXactPredicateListLock in parallel mode
4760 * because there cannot be any parallel workers running while we are
4761 * preparing a transaction.
4762 */
4764
4765 dlist_foreach(iter, &sxact->predicateLocks)
4766 {
4768 dlist_container(PREDICATELOCK, xactLink, iter.cur);
4769
4771 lockRecord->target = predlock->tag.myTarget->tag;
4772
4774 &record, sizeof(record));
4775 }
4776
4778}
4779
4780/*
4781 * PostPrepare_Locks
4782 * Clean up after successful PREPARE. Unlike the non-predicate
4783 * lock manager, we do not need to transfer locks to a dummy
4784 * PGPROC because our SERIALIZABLEXACT will stay around
4785 * anyway. We only need to clean up our local state.
4786 */
4787void
4789{
4791 return;
4792
4794
4797
4800
4802 MyXactDidWrite = false;
4803}
4804
4805/*
4806 * PredicateLockTwoPhaseFinish
4807 * Release a prepared transaction's predicate locks once it
4808 * commits or aborts.
4809 */
4810void
4812{
4815
4817
4819 sxid = (SERIALIZABLEXID *)
4822
4823 /* xid will not be found if it wasn't a serializable transaction */
4824 if (sxid == NULL)
4825 return;
4826
4827 /* Release its locks */
4828 MySerializableXact = sxid->myXact;
4829 MyXactDidWrite = true; /* conservatively assume that we wrote
4830 * something */
4832}
4833
4834/*
4835 * Re-acquire a predicate lock belonging to a transaction that was prepared.
4836 */
4837void
4839 void *recdata, uint32 len)
4840{
4843
4845
4846 record = (TwoPhasePredicateRecord *) recdata;
4847
4849 (record->type == TWOPHASEPREDICATERECORD_LOCK));
4850
4851 if (record->type == TWOPHASEPREDICATERECORD_XACT)
4852 {
4853 /* Per-transaction record. Set up a SERIALIZABLEXACT. */
4854 TwoPhasePredicateXactRecord *xactRecord;
4858 bool found;
4859
4860 xactRecord = (TwoPhasePredicateXactRecord *) &record->data.xactRecord;
4861
4864 if (!sxact)
4865 ereport(ERROR,
4867 errmsg("out of shared memory")));
4868
4869 /* vxid for a prepared xact is INVALID_PROC_NUMBER/xid; no pid */
4870 sxact->vxid.procNumber = INVALID_PROC_NUMBER;
4871 sxact->vxid.localTransactionId = (LocalTransactionId) xid;
4872 sxact->pid = 0;
4873 sxact->pgprocno = INVALID_PROC_NUMBER;
4874
4875 /* a prepared xact hasn't committed yet */
4876 sxact->prepareSeqNo = RecoverySerCommitSeqNo;
4877 sxact->commitSeqNo = InvalidSerCommitSeqNo;
4878 sxact->finishedBefore = InvalidTransactionId;
4879
4880 sxact->SeqNo.lastCommitBeforeSnapshot = RecoverySerCommitSeqNo;
4881
4882 /*
4883 * Don't need to track this; no transactions running at the time the
4884 * recovered xact started are still active, except possibly other
4885 * prepared xacts and we don't care whether those are RO_SAFE or not.
4886 */
4887 dlist_init(&(sxact->possibleUnsafeConflicts));
4888
4889 dlist_init(&(sxact->predicateLocks));
4890 dlist_node_init(&sxact->finishedLink);
4891
4892 sxact->topXid = xid;
4893 sxact->xmin = xactRecord->xmin;
4894 sxact->flags = xactRecord->flags;
4896 if (!SxactIsReadOnly(sxact))
4897 {
4901 }
4902
4903 /*
4904 * We don't know whether the transaction had any conflicts or not, so
4905 * we'll conservatively assume that it had both a conflict in and a
4906 * conflict out, and represent that with the summary conflict flags.
4907 */
4908 dlist_init(&(sxact->outConflicts));
4909 dlist_init(&(sxact->inConflicts));
4912
4913 /* Register the transaction's xid */
4914 sxidtag.xid = xid;
4916 &sxidtag,
4917 HASH_ENTER, &found);
4918 Assert(sxid != NULL);
4919 Assert(!found);
4920 sxid->myXact = sxact;
4921
4922 /*
4923 * Update global xmin. Note that this is a special case compared to
4924 * registering a normal transaction, because the global xmin might go
4925 * backwards. That's OK, because until recovery is over we're not
4926 * going to complete any transactions or create any non-prepared
4927 * transactions, so there's no danger of throwing away.
4928 */
4931 {
4935 }
4937 {
4940 }
4941
4943 }
4944 else if (record->type == TWOPHASEPREDICATERECORD_LOCK)
4945 {
4946 /* Lock record. Recreate the PREDICATELOCK */
4947 TwoPhasePredicateLockRecord *lockRecord;
4952
4953 lockRecord = (TwoPhasePredicateLockRecord *) &record->data.lockRecord;
4955
4957 sxidtag.xid = xid;
4958 sxid = (SERIALIZABLEXID *)
4961
4962 Assert(sxid != NULL);
4963 sxact = sxid->myXact;
4965
4967 }
4968}
4969
4970/*
4971 * Prepare to share the current SERIALIZABLEXACT with parallel workers.
4972 * Return a handle object that can be used by AttachSerializableXact() in a
4973 * parallel worker.
4974 */
4977{
4978 return MySerializableXact;
4979}
4980
4981/*
4982 * Allow parallel workers to import the leader's SERIALIZABLEXACT.
4983 */
4984void
4986{
4987
4989
4993}
bool ParallelContextActive(void)
Definition parallel.c:1033
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
#define unconstify(underlying_type, expr)
Definition c.h:1325
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:249
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
uint16_t uint16
Definition c.h:623
uint32_t uint32
Definition c.h:624
uint32 LocalTransactionId
Definition c.h:738
uint32 TransactionId
Definition c.h:736
size_t Size
Definition c.h:689
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:889
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:360
void hash_destroy(HTAB *hashp)
Definition dynahash.c:802
void * hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, uint32 hashvalue, HASHACTION action, bool *foundPtr)
Definition dynahash.c:902
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1352
int64 hash_get_num_entries(HTAB *hashp)
Definition dynahash.c:1273
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1317
Datum arg
Definition elog.c:1322
int errcode(int sqlerrcode)
Definition elog.c:874
int int errdetail_internal(const char *fmt,...) pg_attribute_printf(1
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:30
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define ereport(elevel,...)
Definition elog.h:152
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
int MyProcPid
Definition globals.c:49
ProcNumber MyProcNumber
Definition globals.c:92
int MaxBackends
Definition globals.c:149
int serializable_buffers
Definition globals.c:168
#define newval
GucSource
Definition guc.h:112
@ HASH_FIND
Definition hsearch.h:108
@ HASH_REMOVE
Definition hsearch.h:110
@ HASH_ENTER
Definition hsearch.h:109
@ HASH_ENTER_NULL
Definition hsearch.h:111
#define HASH_ELEM
Definition hsearch.h:90
#define HASH_FUNCTION
Definition hsearch.h:93
#define HASH_BLOBS
Definition hsearch.h:92
#define HASH_FIXED_SIZE
Definition hsearch.h:100
#define HASH_PARTITION
Definition hsearch.h:87
static dlist_node * dlist_pop_head_node(dlist_head *head)
Definition ilist.h:450
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_init(dlist_head *head)
Definition ilist.h:314
#define dlist_head_element(type, membername, lhead)
Definition ilist.h:603
static void dlist_delete_thoroughly(dlist_node *node)
Definition ilist.h:416
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition ilist.h:364
static void dlist_node_init(dlist_node *node)
Definition ilist.h:325
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
#define IsParallelWorker()
Definition parallel.h:62
FILE * output
long val
Definition informix.c:689
static bool success
Definition initdb.c:188
int i
Definition isn.c:77
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
#define GET_VXID_FROM_PGPROC(vxid_dst, proc)
Definition lock.h:80
#define SetInvalidVirtualTransactionId(vxid)
Definition lock.h:77
bool LWLockHeldByMe(LWLock *lock)
Definition lwlock.c:1885
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1150
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1929
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1767
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:670
@ LW_SHARED
Definition lwlock.h:105
@ LW_EXCLUSIVE
Definition lwlock.h:104
#define NUM_PREDICATELOCK_PARTITIONS
Definition lwlock.h:91
#define InvalidPid
Definition miscadmin.h:32
static char * errmsg
#define SLRU_PAGES_PER_SEGMENT
const void size_t len
const void * data
static bool pg_lfind32(uint32 key, const uint32 *base, uint32 nelem)
Definition pg_lfind.h:153
static rewind_source * source
Definition pg_rewind.c:89
#define ERRCODE_T_R_SERIALIZATION_FAILURE
Definition pgbench.c:77
#define InvalidOid
unsigned int Oid
PredicateLockData * GetPredicateLockStatusData(void)
Definition predicate.c:1377
void CheckPointPredicate(void)
Definition predicate.c:1022
void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition predicate.c:3074
static void DecrementParentLocks(const PREDICATELOCKTARGETTAG *targettag)
Definition predicate.c:2321
static HTAB * PredicateLockHash
Definition predicate.c:411
static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT *activeXact)
Definition predicate.c:680
#define PredicateLockTargetTagHashCode(predicatelocktargettag)
Definition predicate.c:302
static void SetNewSxactGlobalXmin(void)
Definition predicate.c:3181
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4266
#define SerialPage(xid)
Definition predicate.c:345
static void ReleasePredXact(SERIALIZABLEXACT *sxact)
Definition predicate.c:610
static void PredicateLockShmemInit(void *arg)
Definition predicate.c:1244
void SetSerializableTransactionSnapshot(Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
Definition predicate.c:1652
static bool RWConflictExists(const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer)
Definition predicate.c:624
static bool PredicateLockingNeededForRelation(Relation relation)
Definition predicate.c:512
static bool SerializationNeededForRead(Relation relation, Snapshot snapshot)
Definition predicate.c:530
static Snapshot GetSafeSnapshot(Snapshot origSnapshot)
Definition predicate.c:1488
#define SxactIsCommitted(sxact)
Definition predicate.c:276
static SerialControl serialControl
Definition predicate.c:356
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition predicate.c:2529
#define SxactIsROUnsafe(sxact)
Definition predicate.c:291
static Snapshot GetSerializableTransactionSnapshotInt(Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
Definition predicate.c:1694
static LWLock * ScratchPartitionLock
Definition predicate.c:421
static void PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag)
Definition predicate.c:2447
#define SxactIsDeferrableWaiting(sxact)
Definition predicate.c:289
static void ReleasePredicateLocksLocal(void)
Definition predicate.c:3609
static HTAB * LocalPredicateLockHash
Definition predicate.c:427
int max_predicate_locks_per_page
Definition predicate.c:375
struct SerialControlData * SerialControl
Definition predicate.c:354
static PredXactList PredXact
Definition predicate.c:386
static void SetRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
Definition predicate.c:657
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
Definition predicate.c:1558
static uint32 ScratchTargetTagHash
Definition predicate.c:420
static void RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target, uint32 targettaghash)
Definition predicate.c:2113
static uint32 predicatelock_hash(const void *key, Size keysize)
Definition predicate.c:1351
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition predicate.c:3953
static int64 max_serializable_xacts
Definition predicate.c:446
#define SxactIsReadOnly(sxact)
Definition predicate.c:280
#define SerialNextPage(page)
Definition predicate.c:339
static void DropAllPredicateLocksFromTable(Relation relation, bool transfer)
Definition predicate.c:2867
bool PageIsPredicateLocked(Relation relation, BlockNumber blkno)
Definition predicate.c:1938
static int serial_errdetail_for_io_error(const void *opaque_data)
Definition predicate.c:760
static void CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag, uint32 targettaghash, SERIALIZABLEXACT *sxact)
Definition predicate.c:2383
static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
Definition predicate.c:839
static void ClearOldPredicateLocks(void)
Definition predicate.c:3627
#define SxactHasSummaryConflictIn(sxact)
Definition predicate.c:281
static SERIALIZABLEXACT * CreatePredXact(void)
Definition predicate.c:596
static bool GetParentPredicateLockTag(const PREDICATELOCKTARGETTAG *tag, PREDICATELOCKTARGETTAG *parent)
Definition predicate.c:2002
#define PredicateLockHashCodeFromTargetHashCode(predicatelocktag, targethash)
Definition predicate.c:315
static void RestoreScratchTarget(bool lockheld)
Definition predicate.c:2091
#define SerialValue(slotno, xid)
Definition predicate.c:341
static void DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag)
Definition predicate.c:2144
static void DeleteLockTarget(PREDICATELOCKTARGET *target, uint32 targettaghash)
Definition predicate.c:2599
void PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit)
Definition predicate.c:4812
void predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition predicate.c:4839
static SlruDesc SerialSlruDesc
Definition predicate.c:326
static SERIALIZABLEXACT * OldCommittedSxact
Definition predicate.c:364
#define SxactHasConflictOut(sxact)
Definition predicate.c:288
static bool MyXactDidWrite
Definition predicate.c:435
static int MaxPredicateChildLocks(const PREDICATELOCKTARGETTAG *tag)
Definition predicate.c:2219
static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact)
Definition predicate.c:713
static void PredicateLockShmemRequest(void *arg)
Definition predicate.c:1119
void CheckTableForSerializableConflictIn(Relation relation)
Definition predicate.c:4349
#define SxactIsPrepared(sxact)
Definition predicate.c:277
void AttachSerializableXact(SerializableXactHandle handle)
Definition predicate.c:4986
SerializableXactHandle ShareSerializableXact(void)
Definition predicate.c:4977
static bool PredicateLockExists(const PREDICATELOCKTARGETTAG *targettag)
Definition predicate.c:1975
static void RemoveScratchTarget(bool lockheld)
Definition predicate.c:2070
#define SxactIsOnFinishedList(sxact)
Definition predicate.c:266
#define SxactIsPartiallyReleased(sxact)
Definition predicate.c:292
static void SerialSetActiveSerXmin(TransactionId xid)
Definition predicate.c:971
static dlist_head * FinishedSerializableTransactions
Definition predicate.c:412
static bool SerializationNeededForWrite(Relation relation)
Definition predicate.c:574
static HTAB * SerializableXidHash
Definition predicate.c:409
static bool CheckAndPromotePredicateLockRequest(const PREDICATELOCKTARGETTAG *reqtag)
Definition predicate.c:2256
void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition predicate.c:3159
static bool SerialPagePrecedesLogically(int64 page1, int64 page2)
Definition predicate.c:745
static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
Definition predicate.c:4096
int max_predicate_locks_per_relation
Definition predicate.c:374
#define SxactIsROSafe(sxact)
Definition predicate.c:290
void PreCommit_CheckForSerializationFailure(void)
Definition predicate.c:4633
void ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe)
Definition predicate.c:3242
static void FlagRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
Definition predicate.c:4431
static const PREDICATELOCKTARGETTAG ScratchTargetTag
Definition predicate.c:419
#define PredicateLockHashPartitionLockByIndex(i)
Definition predicate.c:260
static void OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
Definition predicate.c:4466
static bool CoarserLockCovers(const PREDICATELOCKTARGETTAG *newtargettag)
Definition predicate.c:2041
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition predicate.c:2506
static SERIALIZABLEXACT * MySerializableXact
Definition predicate.c:434
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition predicate.c:2551
#define SxactIsDoomed(sxact)
Definition predicate.c:279
#define NPREDICATELOCKTARGETENTS()
Definition predicate.c:263
static SerCommitSeqNo SerialGetMinConflictCommitSeqNo(TransactionId xid)
Definition predicate.c:930
static void SummarizeOldestCommittedSxact(void)
Definition predicate.c:1433
const ShmemCallbacks PredicateLockShmemCallbacks
Definition predicate.c:392
bool check_serial_buffers(int *newval, void **extra, GucSource source)
Definition predicate.c:828
void PostPrepare_PredicateLocks(FullTransactionId fxid)
Definition predicate.c:4789
#define TargetTagIsCoveredBy(covered_target, covering_target)
Definition predicate.c:232
static RWConflictPoolHeader RWConflictPool
Definition predicate.c:403
static void ReleaseRWConflict(RWConflict conflict)
Definition predicate.c:705
static bool TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag, PREDICATELOCKTARGETTAG newtargettag, bool removeOld)
Definition predicate.c:2660
void AtPrepare_PredicateLocks(void)
Definition predicate.c:4720
void RegisterPredicateLockingXid(TransactionId xid)
Definition predicate.c:1889
#define PredicateLockHashPartitionLock(hashcode)
Definition predicate.c:257
#define SERIAL_ENTRIESPERPAGE
Definition predicate.c:332
static bool XidIsConcurrent(TransactionId xid)
Definition predicate.c:3902
static void ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial, bool summarize)
Definition predicate.c:3765
static HTAB * PredicateLockTargetHash
Definition predicate.c:410
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition predicate.c:3921
#define SxactIsRolledBack(sxact)
Definition predicate.c:278
static SERIALIZABLEXACT * SavedSerializableXact
Definition predicate.c:444
#define SxactHasSummaryConflictOut(sxact)
Definition predicate.c:282
static void PredicateLockShmemAttach(void *arg)
Definition predicate.c:1328
void TransferPredicateLocksToHeapRelation(Relation relation)
Definition predicate.c:3053
static void CreateLocalPredicateLockHash(void)
Definition predicate.c:1870
#define SerialSlruCtl
Definition predicate.c:328
int max_predicate_locks_per_xact
Definition predicate.c:373
Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot)
Definition predicate.c:1612
void * SerializableXactHandle
Definition predicate.h:39
#define RWConflictDataSize
#define SXACT_FLAG_DEFERRABLE_WAITING
#define SXACT_FLAG_SUMMARY_CONFLICT_IN
@ TWOPHASEPREDICATERECORD_XACT
@ TWOPHASEPREDICATERECORD_LOCK
#define FirstNormalSerCommitSeqNo
#define InvalidSerCommitSeqNo
@ PREDLOCKTAG_RELATION
@ PREDLOCKTAG_PAGE
@ PREDLOCKTAG_TUPLE
#define SXACT_FLAG_CONFLICT_OUT
#define PredXactListDataSize
#define SXACT_FLAG_READ_ONLY
#define SXACT_FLAG_DOOMED
#define GET_PREDICATELOCKTARGETTAG_DB(locktag)
#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag)
#define RWConflictPoolHeaderDataSize
#define InvalidSerializableXact
#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag, dboid, reloid, blocknum)
#define RecoverySerCommitSeqNo
struct RWConflictData * RWConflict
#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag)
#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag, dboid, reloid)
uint64 SerCommitSeqNo
#define SXACT_FLAG_ROLLED_BACK
#define SXACT_FLAG_COMMITTED
#define SXACT_FLAG_RO_UNSAFE
#define SXACT_FLAG_PREPARED
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag, dboid, reloid, blocknum, offnum)
#define SXACT_FLAG_PARTIALLY_RELEASED
#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag)
#define SXACT_FLAG_RO_SAFE
#define SXACT_FLAG_SUMMARY_CONFLICT_OUT
#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag)
static int fb(int x)
Snapshot GetSnapshotData(Snapshot snapshot)
Definition procarray.c:2114
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition procarray.c:2471
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
#define RelationUsesLocalBuffers(relation)
Definition rel.h:648
bool ShmemAddrIsValid(const void *addr)
Definition shmem.c:850
Size add_size(Size s1, Size s2)
Definition shmem.c:1048
Size mul_size(Size s1, Size s2)
Definition shmem.c:1063
#define ShmemRequestHash(...)
Definition shmem.h:179
#define ShmemRequestStruct(...)
Definition shmem.h:176
int SimpleLruReadPage_ReadOnly(SlruDesc *ctl, int64 pageno, const void *opaque_data)
Definition slru.c:654
void SimpleLruTruncate(SlruDesc *ctl, int64 cutoffPage)
Definition slru.c:1458
int SimpleLruZeroPage(SlruDesc *ctl, int64 pageno)
Definition slru.c:397
void SimpleLruWriteAll(SlruDesc *ctl, bool allow_redirtied)
Definition slru.c:1372
int SimpleLruReadPage(SlruDesc *ctl, int64 pageno, bool write_ok, const void *opaque_data)
Definition slru.c:550
bool check_slru_buffers(const char *name, int *newval)
Definition slru.c:377
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition slru.h:233
#define SimpleLruRequest(...)
Definition slru.h:218
static LWLock * SimpleLruGetBankLock(SlruDesc *ctl, int64 pageno)
Definition slru.h:207
Snapshot GetTransactionSnapshot(void)
Definition snapmgr.c:272
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:59
void ProcSendSignal(ProcNumber procNumber)
Definition proc.c:2027
PGPROC * MyProc
Definition proc.c:71
void ProcWaitForSignal(uint32 wait_event_info)
Definition proc.c:2015
Size keysize
Definition dynahash.c:241
Definition proc.h:179
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGET * myTarget
PREDICATELOCKTARGETTAG tag
SerCommitSeqNo commitSeqNo
SERIALIZABLEXACT * element
SerCommitSeqNo LastSxactCommitSeqNo
SerCommitSeqNo CanPartialClearThrough
SERIALIZABLEXACT * OldCommittedSxact
SerCommitSeqNo HavePartialClearedThrough
TransactionId SxactGlobalXmin
Form_pg_index rd_index
Definition rel.h:192
Oid rd_id
Definition rel.h:113
RelFileLocator rd_locator
Definition rel.h:57
VirtualTransactionId vxid
SerCommitSeqNo lastCommitBeforeSnapshot
dlist_head possibleUnsafeConflicts
SerCommitSeqNo prepareSeqNo
SerCommitSeqNo commitSeqNo
union SERIALIZABLEXACT::@134 SeqNo
TransactionId finishedBefore
SerCommitSeqNo earliestOutConflictCommit
TransactionId headXid
Definition predicate.c:350
TransactionId tailXid
Definition predicate.c:351
ShmemRequestCallback request_fn
Definition shmem.h:133
TransactionId xmin
Definition snapshot.h:153
FullTransactionId nextXid
Definition transam.h:220
PREDICATELOCKTARGETTAG target
TwoPhasePredicateRecordType type
TwoPhasePredicateLockRecord lockRecord
union TwoPhasePredicateRecord::@135 data
TwoPhasePredicateXactRecord xactRecord
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
@ SYNC_HANDLER_NONE
Definition sync.h:42
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define FirstUnpinnedObjectId
Definition transam.h:196
#define InvalidTransactionId
Definition transam.h:31
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define FirstNormalTransactionId
Definition transam.h:34
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition twophase.c:1277
int max_prepared_xacts
Definition twophase.c:118
#define TWOPHASE_RM_PREDICATELOCK_ID
TransamVariablesData * TransamVariables
Definition varsup.c:37
const char * name
bool XactDeferrable
Definition xact.c:87
bool XactReadOnly
Definition xact.c:84
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:443
bool IsSubTransaction(void)
Definition xact.c:5095
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:943
bool IsInParallelMode(void)
Definition xact.c:1119
#define IsolationIsSerializable()
Definition xact.h:53
bool RecoveryInProgress(void)
Definition xlog.c:6830

◆ PredicateLockHashCodeFromTargetHashCode

#define PredicateLockHashCodeFromTargetHashCode (   predicatelocktag,
  targethash 
)
Value:
#define LOG2_NUM_PREDICATELOCK_PARTITIONS
Definition lwlock.h:90
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342

Definition at line 315 of file predicate.c.

◆ PredicateLockHashPartition

#define PredicateLockHashPartition (   hashcode)     ((hashcode) % NUM_PREDICATELOCK_PARTITIONS)

Definition at line 255 of file predicate.c.

◆ PredicateLockHashPartitionLock

#define PredicateLockHashPartitionLock (   hashcode)
Value:
LWLockPadded * MainLWLockArray
Definition lwlock.c:150
#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET
Definition lwlock.h:97
LWLock lock
Definition lwlock.h:70

Definition at line 257 of file predicate.c.

◆ PredicateLockHashPartitionLockByIndex

#define PredicateLockHashPartitionLockByIndex (   i)     (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)

Definition at line 260 of file predicate.c.

◆ PredicateLockTargetTagHashCode

Definition at line 302 of file predicate.c.

◆ SERIAL_ENTRIESPERPAGE

#define SERIAL_ENTRIESPERPAGE   (SERIAL_PAGESIZE / SERIAL_ENTRYSIZE)

Definition at line 332 of file predicate.c.

◆ SERIAL_ENTRYSIZE

#define SERIAL_ENTRYSIZE   sizeof(SerCommitSeqNo)

Definition at line 331 of file predicate.c.

◆ SERIAL_MAX_PAGE

#define SERIAL_MAX_PAGE   (MaxTransactionId / SERIAL_ENTRIESPERPAGE)

Definition at line 337 of file predicate.c.

◆ SERIAL_PAGESIZE

#define SERIAL_PAGESIZE   BLCKSZ

Definition at line 330 of file predicate.c.

◆ SerialNextPage

#define SerialNextPage (   page)    (((page) >= SERIAL_MAX_PAGE) ? 0 : (page) + 1)

Definition at line 339 of file predicate.c.

◆ SerialPage

#define SerialPage (   xid)    (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)

Definition at line 345 of file predicate.c.

◆ SerialSlruCtl

#define SerialSlruCtl   (&SerialSlruDesc)

Definition at line 328 of file predicate.c.

◆ SerialValue

#define SerialValue (   slotno,
  xid 
)
Value:
(*((SerCommitSeqNo *) \
(SerialSlruCtl->shared->page_buffer[slotno] + \
#define SERIAL_ENTRYSIZE
Definition predicate.c:331

Definition at line 341 of file predicate.c.

◆ SxactHasConflictOut

#define SxactHasConflictOut (   sxact)    (((sxact)->flags & SXACT_FLAG_CONFLICT_OUT) != 0)

Definition at line 288 of file predicate.c.

◆ SxactHasSummaryConflictIn

#define SxactHasSummaryConflictIn (   sxact)    (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_IN) != 0)

Definition at line 281 of file predicate.c.

◆ SxactHasSummaryConflictOut

#define SxactHasSummaryConflictOut (   sxact)    (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_OUT) != 0)

Definition at line 282 of file predicate.c.

◆ SxactIsCommitted

#define SxactIsCommitted (   sxact)    (((sxact)->flags & SXACT_FLAG_COMMITTED) != 0)

Definition at line 276 of file predicate.c.

◆ SxactIsDeferrableWaiting

#define SxactIsDeferrableWaiting (   sxact)    (((sxact)->flags & SXACT_FLAG_DEFERRABLE_WAITING) != 0)

Definition at line 289 of file predicate.c.

◆ SxactIsDoomed

#define SxactIsDoomed (   sxact)    (((sxact)->flags & SXACT_FLAG_DOOMED) != 0)

Definition at line 279 of file predicate.c.

◆ SxactIsOnFinishedList

#define SxactIsOnFinishedList (   sxact)    (!dlist_node_is_detached(&(sxact)->finishedLink))

Definition at line 266 of file predicate.c.

◆ SxactIsPartiallyReleased

#define SxactIsPartiallyReleased (   sxact)    (((sxact)->flags & SXACT_FLAG_PARTIALLY_RELEASED) != 0)

Definition at line 292 of file predicate.c.

◆ SxactIsPrepared

#define SxactIsPrepared (   sxact)    (((sxact)->flags & SXACT_FLAG_PREPARED) != 0)

Definition at line 277 of file predicate.c.

◆ SxactIsReadOnly

#define SxactIsReadOnly (   sxact)    (((sxact)->flags & SXACT_FLAG_READ_ONLY) != 0)

Definition at line 280 of file predicate.c.

◆ SxactIsRolledBack

#define SxactIsRolledBack (   sxact)    (((sxact)->flags & SXACT_FLAG_ROLLED_BACK) != 0)

Definition at line 278 of file predicate.c.

◆ SxactIsROSafe

#define SxactIsROSafe (   sxact)    (((sxact)->flags & SXACT_FLAG_RO_SAFE) != 0)

Definition at line 290 of file predicate.c.

◆ SxactIsROUnsafe

#define SxactIsROUnsafe (   sxact)    (((sxact)->flags & SXACT_FLAG_RO_UNSAFE) != 0)

Definition at line 291 of file predicate.c.

◆ TargetTagIsCoveredBy

Typedef Documentation

◆ SerialControl

Definition at line 354 of file predicate.c.

◆ SerialControlData

Function Documentation

◆ AtPrepare_PredicateLocks()

void AtPrepare_PredicateLocks ( void  )

Definition at line 4720 of file predicate.c.

4721{
4724 TwoPhasePredicateXactRecord *xactRecord;
4725 TwoPhasePredicateLockRecord *lockRecord;
4726 dlist_iter iter;
4727
4729 xactRecord = &(record.data.xactRecord);
4730 lockRecord = &(record.data.lockRecord);
4731
4733 return;
4734
4735 /* Generate an xact record for our SERIALIZABLEXACT */
4737 xactRecord->xmin = MySerializableXact->xmin;
4738 xactRecord->flags = MySerializableXact->flags;
4739
4740 /*
4741 * Note that we don't include the list of conflicts in our out in the
4742 * statefile, because new conflicts can be added even after the
4743 * transaction prepares. We'll just make a conservative assumption during
4744 * recovery instead.
4745 */
4746
4748 &record, sizeof(record));
4749
4750 /*
4751 * Generate a lock record for each lock.
4752 *
4753 * To do this, we need to walk the predicate lock list in our sxact rather
4754 * than using the local predicate lock table because the latter is not
4755 * guaranteed to be accurate.
4756 */
4758
4759 /*
4760 * No need to take sxact->perXactPredicateListLock in parallel mode
4761 * because there cannot be any parallel workers running while we are
4762 * preparing a transaction.
4763 */
4765
4766 dlist_foreach(iter, &sxact->predicateLocks)
4767 {
4769 dlist_container(PREDICATELOCK, xactLink, iter.cur);
4770
4772 lockRecord->target = predlock->tag.myTarget->tag;
4773
4775 &record, sizeof(record));
4776 }
4777
4779}

References Assert, dlist_iter::cur, TwoPhasePredicateRecord::data, dlist_container, dlist_foreach, fb(), SERIALIZABLEXACT::flags, TwoPhasePredicateXactRecord::flags, InvalidSerializableXact, IsParallelWorker, TwoPhasePredicateRecord::lockRecord, LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, ParallelContextActive(), RegisterTwoPhaseRecord(), TwoPhasePredicateLockRecord::target, TWOPHASE_RM_PREDICATELOCK_ID, TWOPHASEPREDICATERECORD_LOCK, TWOPHASEPREDICATERECORD_XACT, TwoPhasePredicateRecord::type, TwoPhasePredicateRecord::xactRecord, SERIALIZABLEXACT::xmin, and TwoPhasePredicateXactRecord::xmin.

Referenced by PrepareTransaction().

◆ AttachSerializableXact()

◆ check_serial_buffers()

bool check_serial_buffers ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 828 of file predicate.c.

829{
830 return check_slru_buffers("serializable_buffers", newval);
831}

References check_slru_buffers(), and newval.

◆ CheckAndPromotePredicateLockRequest()

static bool CheckAndPromotePredicateLockRequest ( const PREDICATELOCKTARGETTAG reqtag)
static

Definition at line 2256 of file predicate.c.

2257{
2259 nexttag,
2262 bool found,
2263 promote;
2264
2265 promote = false;
2266
2267 targettag = *reqtag;
2268
2269 /* check parents iteratively */
2271 {
2274 &targettag,
2275 HASH_ENTER,
2276 &found);
2277 if (!found)
2278 {
2279 parentlock->held = false;
2280 parentlock->childLocks = 1;
2281 }
2282 else
2283 parentlock->childLocks++;
2284
2285 if (parentlock->childLocks >
2287 {
2288 /*
2289 * We should promote to this parent lock. Continue to check its
2290 * ancestors, however, both to get their child counts right and to
2291 * check whether we should just go ahead and promote to one of
2292 * them.
2293 */
2295 promote = true;
2296 }
2297 }
2298
2299 if (promote)
2300 {
2301 /* acquire coarsest ancestor eligible for promotion */
2303 return true;
2304 }
2305 else
2306 return false;
2307}

References fb(), GetParentPredicateLockTag(), HASH_ENTER, hash_search(), LOCALPREDICATELOCK::held, LocalPredicateLockHash, MaxPredicateChildLocks(), and PredicateLockAcquire().

Referenced by PredicateLockAcquire().

◆ CheckForSerializableConflictIn()

void CheckForSerializableConflictIn ( Relation  relation,
const ItemPointerData tid,
BlockNumber  blkno 
)

Definition at line 4266 of file predicate.c.

4267{
4269
4270 if (!SerializationNeededForWrite(relation))
4271 return;
4272
4273 /* Check if someone else has already decided that we need to die */
4275 ereport(ERROR,
4277 errmsg("could not serialize access due to read/write dependencies among transactions"),
4278 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict in checking."),
4279 errhint("The transaction might succeed if retried.")));
4280
4281 /*
4282 * We're doing a write which might cause rw-conflicts now or later.
4283 * Memorize that fact.
4284 */
4285 MyXactDidWrite = true;
4286
4287 /*
4288 * It is important that we check for locks from the finest granularity to
4289 * the coarsest granularity, so that granularity promotion doesn't cause
4290 * us to miss a lock. The new (coarser) lock will be acquired before the
4291 * old (finer) locks are released.
4292 *
4293 * It is not possible to take and hold a lock across the checks for all
4294 * granularities because each target could be in a separate partition.
4295 */
4296 if (tid != NULL)
4297 {
4299 relation->rd_locator.dbOid,
4300 relation->rd_id,
4304 }
4305
4306 if (blkno != InvalidBlockNumber)
4307 {
4309 relation->rd_locator.dbOid,
4310 relation->rd_id,
4311 blkno);
4313 }
4314
4316 relation->rd_locator.dbOid,
4317 relation->rd_id);
4319}

References CheckTargetForConflictsIn(), RelFileLocator::dbOid, ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, fb(), InvalidBlockNumber, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), MySerializableXact, MyXactDidWrite, RelationData::rd_id, RelationData::rd_locator, SerializationNeededForWrite(), SET_PREDICATELOCKTARGETTAG_PAGE, SET_PREDICATELOCKTARGETTAG_RELATION, SET_PREDICATELOCKTARGETTAG_TUPLE, and SxactIsDoomed.

Referenced by _bt_check_unique(), _bt_doinsert(), _hash_doinsert(), ginEntryInsert(), ginFindLeafPage(), ginHeapTupleFastInsert(), gistinserttuples(), heap_delete(), heap_insert(), heap_multi_insert(), heap_update(), and index_insert().

◆ CheckForSerializableConflictOut()

void CheckForSerializableConflictOut ( Relation  relation,
TransactionId  xid,
Snapshot  snapshot 
)

Definition at line 3953 of file predicate.c.

3954{
3958
3959 if (!SerializationNeededForRead(relation, snapshot))
3960 return;
3961
3962 /* Check if someone else has already decided that we need to die */
3964 {
3965 ereport(ERROR,
3967 errmsg("could not serialize access due to read/write dependencies among transactions"),
3968 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
3969 errhint("The transaction might succeed if retried.")));
3970 }
3972
3974 return;
3975
3976 /*
3977 * Find sxact or summarized info for the top level xid.
3978 */
3979 sxidtag.xid = xid;
3981 sxid = (SERIALIZABLEXID *)
3983 if (!sxid)
3984 {
3985 /*
3986 * Transaction not found in "normal" SSI structures. Check whether it
3987 * got pushed out to SLRU storage for "old committed" transactions.
3988 */
3990
3992 if (conflictCommitSeqNo != 0)
3993 {
3998 ereport(ERROR,
4000 errmsg("could not serialize access due to read/write dependencies among transactions"),
4001 errdetail_internal("Reason code: Canceled on conflict out to old pivot %u.", xid),
4002 errhint("The transaction might succeed if retried.")));
4003
4006 ereport(ERROR,
4008 errmsg("could not serialize access due to read/write dependencies among transactions"),
4009 errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction %u.", xid),
4010 errhint("The transaction might succeed if retried.")));
4011
4013 }
4014
4015 /* It's not serializable or otherwise not important. */
4017 return;
4018 }
4019 sxact = sxid->myXact;
4020 Assert(TransactionIdEquals(sxact->topXid, xid));
4022 {
4023 /* Can't conflict with ourself or a transaction that will roll back. */
4025 return;
4026 }
4027
4028 /*
4029 * We have a conflict out to a transaction which has a conflict out to a
4030 * summarized transaction. That summarized transaction must have
4031 * committed first, and we can't tell when it committed in relation to our
4032 * snapshot acquisition, so something needs to be canceled.
4033 */
4035 {
4036 if (!SxactIsPrepared(sxact))
4037 {
4038 sxact->flags |= SXACT_FLAG_DOOMED;
4040 return;
4041 }
4042 else
4043 {
4045 ereport(ERROR,
4047 errmsg("could not serialize access due to read/write dependencies among transactions"),
4048 errdetail_internal("Reason code: Canceled on conflict out to old pivot."),
4049 errhint("The transaction might succeed if retried.")));
4050 }
4051 }
4052
4053 /*
4054 * If this is a read-only transaction and the writing transaction has
4055 * committed, and it doesn't have a rw-conflict to a transaction which
4056 * committed before it, no conflict.
4057 */
4062 || MySerializableXact->SeqNo.lastCommitBeforeSnapshot < sxact->SeqNo.earliestOutConflictCommit))
4063 {
4064 /* Read-only transaction will appear to run first. No conflict. */
4066 return;
4067 }
4068
4069 if (!XidIsConcurrent(xid))
4070 {
4071 /* This write was already in our snapshot; no conflict. */
4073 return;
4074 }
4075
4077 {
4078 /* We don't want duplicate conflict records in the list. */
4080 return;
4081 }
4082
4083 /*
4084 * Flag the conflict. But first, if this conflict creates a dangerous
4085 * structure, ereport an error.
4086 */
4089}

References Assert, dlist_is_empty(), ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, fb(), FlagRWConflict(), SERIALIZABLEXACT::flags, GetTopTransactionIdIfAny(), HASH_FIND, hash_search(), SERIALIZABLEXACT::inConflicts, InvalidSerCommitSeqNo, SERIALIZABLEXACT::lastCommitBeforeSnapshot, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, RWConflictExists(), SERIALIZABLEXACT::SeqNo, SerialGetMinConflictCommitSeqNo(), SerializableXidHash, SerializationNeededForRead(), SXACT_FLAG_DOOMED, SXACT_FLAG_SUMMARY_CONFLICT_OUT, SxactHasConflictOut, SxactHasSummaryConflictIn, SxactHasSummaryConflictOut, SxactIsCommitted, SxactIsDoomed, SxactIsPrepared, SxactIsReadOnly, TransactionIdEquals, TransactionIdIsValid, and XidIsConcurrent().

Referenced by HeapCheckForSerializableConflictOut().

◆ CheckForSerializableConflictOutNeeded()

bool CheckForSerializableConflictOutNeeded ( Relation  relation,
Snapshot  snapshot 
)

Definition at line 3921 of file predicate.c.

3922{
3923 if (!SerializationNeededForRead(relation, snapshot))
3924 return false;
3925
3926 /* Check if someone else has already decided that we need to die */
3928 {
3929 ereport(ERROR,
3931 errmsg("could not serialize access due to read/write dependencies among transactions"),
3932 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
3933 errhint("The transaction might succeed if retried.")));
3934 }
3935
3936 return true;
3937}

References ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, MySerializableXact, SerializationNeededForRead(), and SxactIsDoomed.

Referenced by heap_prepare_pagescan(), and HeapCheckForSerializableConflictOut().

◆ CheckPointPredicate()

void CheckPointPredicate ( void  )

Definition at line 1022 of file predicate.c.

1023{
1025
1027
1028 /* Exit quickly if the SLRU is currently not in use. */
1029 if (serialControl->headPage < 0)
1030 {
1032 return;
1033 }
1034
1036 {
1038
1040
1041 /*
1042 * It is possible for the tailXid to be ahead of the headXid. This
1043 * occurs if we checkpoint while there are in-progress serializable
1044 * transaction(s) advancing the tail but we are yet to summarize the
1045 * transactions. In this case, we cutoff up to the headPage and the
1046 * next summary will advance the headXid.
1047 */
1049 {
1050 /* We can truncate the SLRU up to the page containing tailXid */
1052 }
1053 else
1055 }
1056 else
1057 {
1058 /*----------
1059 * The SLRU is no longer needed. Truncate to head before we set head
1060 * invalid.
1061 *
1062 * XXX: It's possible that the SLRU is not needed again until XID
1063 * wrap-around has happened, so that the segment containing headPage
1064 * that we leave behind will appear to be new again. In that case it
1065 * won't be removed until XID horizon advances enough to make it
1066 * current again.
1067 *
1068 * XXX: This should happen in vac_truncate_clog(), not in checkpoints.
1069 * Consider this scenario, starting from a system with no in-progress
1070 * transactions and VACUUM FREEZE having maximized oldestXact:
1071 * - Start a SERIALIZABLE transaction.
1072 * - Start, finish, and summarize a SERIALIZABLE transaction, creating
1073 * one SLRU page.
1074 * - Consume XIDs to reach xidStopLimit.
1075 * - Finish all transactions. Due to the long-running SERIALIZABLE
1076 * transaction, earlier checkpoints did not touch headPage. The
1077 * next checkpoint will change it, but that checkpoint happens after
1078 * the end of the scenario.
1079 * - VACUUM to advance XID limits.
1080 * - Consume ~2M XIDs, crossing the former xidWrapLimit.
1081 * - Start, finish, and summarize a SERIALIZABLE transaction.
1082 * SerialAdd() declines to create the targetPage, because headPage
1083 * is not regarded as in the past relative to that targetPage. The
1084 * transaction instigating the summarize fails in
1085 * SimpleLruReadPage().
1086 */
1088 serialControl->headPage = -1;
1089 }
1090
1092
1093 /*
1094 * Truncate away pages that are no longer required. Note that no
1095 * additional locking is required, because this is only called as part of
1096 * a checkpoint, and the validity limits have already been determined.
1097 */
1099
1100 /*
1101 * Write dirty SLRU pages to disk
1102 *
1103 * This is not actually necessary from a correctness point of view. We do
1104 * it merely as a debugging aid.
1105 *
1106 * We're doing this after the truncation to avoid writing pages right
1107 * before deleting the file in which they sit, which would be completely
1108 * pointless.
1109 */
1111}

References fb(), SerialControlData::headPage, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), serialControl, SerialPage, SerialPagePrecedesLogically(), SerialSlruCtl, SimpleLruTruncate(), SimpleLruWriteAll(), SerialControlData::tailXid, and TransactionIdIsValid.

Referenced by CheckPointGuts().

◆ CheckTableForSerializableConflictIn()

void CheckTableForSerializableConflictIn ( Relation  relation)

Definition at line 4349 of file predicate.c.

4350{
4352 PREDICATELOCKTARGET *target;
4353 Oid dbId;
4354 Oid heapId;
4355 int i;
4356
4357 /*
4358 * Bail out quickly if there are no serializable transactions running.
4359 * It's safe to check this without taking locks because the caller is
4360 * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
4361 * would matter here can be acquired while that is held.
4362 */
4364 return;
4365
4366 if (!SerializationNeededForWrite(relation))
4367 return;
4368
4369 /*
4370 * We're doing a write which might cause rw-conflicts now or later.
4371 * Memorize that fact.
4372 */
4373 MyXactDidWrite = true;
4374
4375 Assert(relation->rd_index == NULL); /* not an index relation */
4376
4377 dbId = relation->rd_locator.dbOid;
4378 heapId = relation->rd_id;
4379
4381 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
4384
4385 /* Scan through target list */
4387
4388 while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
4389 {
4390 dlist_mutable_iter iter;
4391
4392 /*
4393 * Check whether this is a target which needs attention.
4394 */
4396 continue; /* wrong relation id */
4397 if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId)
4398 continue; /* wrong database id */
4399
4400 /*
4401 * Loop through locks for this target and flag conflicts.
4402 */
4403 dlist_foreach_modify(iter, &target->predicateLocks)
4404 {
4406 dlist_container(PREDICATELOCK, targetLink, iter.cur);
4407
4408 if (predlock->tag.myXact != MySerializableXact
4410 {
4412 }
4413 }
4414 }
4415
4416 /* Release locks in reverse order */
4418 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
4421}

References Assert, dlist_mutable_iter::cur, RelFileLocator::dbOid, dlist_container, dlist_foreach_modify, fb(), FlagRWConflict(), GET_PREDICATELOCKTARGETTAG_DB, GET_PREDICATELOCKTARGETTAG_RELATION, hash_seq_init(), hash_seq_search(), i, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, MyXactDidWrite, NUM_PREDICATELOCK_PARTITIONS, PredicateLockHashPartitionLockByIndex, PREDICATELOCKTARGET::predicateLocks, PredicateLockTargetHash, PredXact, RelationData::rd_id, RelationData::rd_index, RelationData::rd_locator, RWConflictExists(), SerializationNeededForWrite(), PredXactListData::SxactGlobalXmin, PREDICATELOCKTARGET::tag, and TransactionIdIsValid.

Referenced by ExecuteTruncateGuts(), and heap_drop_with_catalog().

◆ CheckTargetForConflictsIn()

static void CheckTargetForConflictsIn ( PREDICATELOCKTARGETTAG targettag)
static

Definition at line 4096 of file predicate.c.

4097{
4100 PREDICATELOCKTARGET *target;
4103 dlist_mutable_iter iter;
4104
4106
4107 /*
4108 * The same hash and LW lock apply to the lock target and the lock itself.
4109 */
4113 target = (PREDICATELOCKTARGET *)
4116 HASH_FIND, NULL);
4117 if (!target)
4118 {
4119 /* Nothing has this target locked; we're done here. */
4121 return;
4122 }
4123
4124 /*
4125 * Each lock for an overlapping transaction represents a conflict: a
4126 * rw-dependency in to this transaction.
4127 */
4129
4130 dlist_foreach_modify(iter, &target->predicateLocks)
4131 {
4133 dlist_container(PREDICATELOCK, targetLink, iter.cur);
4134 SERIALIZABLEXACT *sxact = predlock->tag.myXact;
4135
4137 {
4138 /*
4139 * If we're getting a write lock on a tuple, we don't need a
4140 * predicate (SIREAD) lock on the same tuple. We can safely remove
4141 * our SIREAD lock, but we'll defer doing so until after the loop
4142 * because that requires upgrading to an exclusive partition lock.
4143 *
4144 * We can't use this optimization within a subtransaction because
4145 * the subtransaction could roll back, and we would be left
4146 * without any lock at the top level.
4147 */
4148 if (!IsSubTransaction()
4150 {
4152 mypredlocktag = predlock->tag;
4153 }
4154 }
4155 else if (!SxactIsDoomed(sxact)
4158 sxact->finishedBefore))
4160 {
4163
4164 /*
4165 * Re-check after getting exclusive lock because the other
4166 * transaction may have flagged a conflict.
4167 */
4168 if (!SxactIsDoomed(sxact)
4171 sxact->finishedBefore))
4173 {
4175 }
4176
4179 }
4180 }
4183
4184 /*
4185 * If we found one of our own SIREAD locks to remove, remove it now.
4186 *
4187 * At this point our transaction already has a RowExclusiveLock on the
4188 * relation, so we are OK to drop the predicate lock on the tuple, if
4189 * found, without fearing that another write against the tuple will occur
4190 * before the MVCC information makes it to the buffer.
4191 */
4192 if (mypredlock != NULL)
4193 {
4196
4198 if (IsInParallelMode())
4202
4203 /*
4204 * Remove the predicate lock from shared memory, if it wasn't removed
4205 * while the locks were released. One way that could happen is from
4206 * autovacuum cleaning up an index.
4207 */
4214 HASH_FIND, NULL);
4215 if (rmpredlock != NULL)
4216 {
4218
4219 dlist_delete(&(mypredlock->targetLink));
4220 dlist_delete(&(mypredlock->xactLink));
4221
4226 HASH_REMOVE, NULL);
4228
4230 }
4231
4234 if (IsInParallelMode())
4237
4238 if (rmpredlock != NULL)
4239 {
4240 /*
4241 * Remove entry in local lock table if it exists. It's OK if it
4242 * doesn't exist; that means the lock was transferred to a new
4243 * target by a different backend.
4244 */
4247 HASH_REMOVE, NULL);
4248
4250 }
4251 }
4252}

References Assert, dlist_mutable_iter::cur, DecrementParentLocks(), dlist_container, dlist_delete(), dlist_foreach_modify, fb(), FlagRWConflict(), GET_PREDICATELOCKTARGETTAG_OFFSET, GetTransactionSnapshot(), HASH_FIND, HASH_REMOVE, hash_search_with_hash_value(), InvalidSerializableXact, IsInParallelMode(), IsSubTransaction(), LocalPredicateLockHash, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, SERIALIZABLEXACT::perXactPredicateListLock, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, PREDICATELOCKTARGET::predicateLocks, PredicateLockTargetHash, PredicateLockTargetTagHashCode, RemoveTargetIfNoLongerUsed(), RWConflictExists(), SxactIsCommitted, SxactIsDoomed, and TransactionIdPrecedes().

Referenced by CheckForSerializableConflictIn().

◆ ClearOldPredicateLocks()

static void ClearOldPredicateLocks ( void  )
static

Definition at line 3627 of file predicate.c.

3628{
3629 dlist_mutable_iter iter;
3630
3631 /*
3632 * Loop through finished transactions. They are in commit order, so we can
3633 * stop as soon as we find one that's still interesting.
3634 */
3638 {
3640 dlist_container(SERIALIZABLEXACT, finishedLink, iter.cur);
3641
3645 {
3646 /*
3647 * This transaction committed before any in-progress transaction
3648 * took its snapshot. It's no longer interesting.
3649 */
3651 dlist_delete_thoroughly(&finishedSxact->finishedLink);
3654 }
3655 else if (finishedSxact->commitSeqNo > PredXact->HavePartialClearedThrough
3656 && finishedSxact->commitSeqNo <= PredXact->CanPartialClearThrough)
3657 {
3658 /*
3659 * Any active transactions that took their snapshot before this
3660 * transaction committed are read-only, so we can clear part of
3661 * its state.
3662 */
3664
3666 {
3667 /* A read-only transaction can be removed entirely */
3668 dlist_delete_thoroughly(&(finishedSxact->finishedLink));
3670 }
3671 else
3672 {
3673 /*
3674 * A read-write transaction can only be partially cleared. We
3675 * need to keep the SERIALIZABLEXACT but can release the
3676 * SIREAD locks and conflicts in.
3677 */
3679 }
3680
3683 }
3684 else
3685 {
3686 /* Still interesting. */
3687 break;
3688 }
3689 }
3691
3692 /*
3693 * Loop through predicate locks on dummy transaction for summarized data.
3694 */
3697 {
3699 dlist_container(PREDICATELOCK, xactLink, iter.cur);
3701
3703 Assert(predlock->commitSeqNo != 0);
3704 Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3707
3708 /*
3709 * If this lock originally belonged to an old enough transaction, we
3710 * can release it.
3711 */
3713 {
3714 PREDICATELOCKTAG tag;
3715 PREDICATELOCKTARGET *target;
3719
3720 tag = predlock->tag;
3721 target = tag.myTarget;
3722 targettag = target->tag;
3725
3727
3728 dlist_delete(&(predlock->targetLink));
3729 dlist_delete(&(predlock->xactLink));
3730
3734 HASH_REMOVE, NULL);
3736
3738 }
3739 }
3740
3743}

References Assert, PredXactListData::CanPartialClearThrough, dlist_mutable_iter::cur, dlist_container, dlist_delete(), dlist_delete_thoroughly(), dlist_foreach_modify, fb(), FinishedSerializableTransactions, HASH_REMOVE, hash_search_with_hash_value(), PredXactListData::HavePartialClearedThrough, InvalidSerCommitSeqNo, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PREDICATELOCKTAG::myTarget, OldCommittedSxact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, SERIALIZABLEXACT::predicateLocks, PredicateLockTargetTagHashCode, PredXact, ReleaseOneSerializableXact(), RemoveTargetIfNoLongerUsed(), PredXactListData::SxactGlobalXmin, SxactIsReadOnly, PREDICATELOCKTARGET::tag, TransactionIdIsValid, and TransactionIdPrecedesOrEquals().

Referenced by ReleasePredicateLocks().

◆ CoarserLockCovers()

static bool CoarserLockCovers ( const PREDICATELOCKTARGETTAG newtargettag)
static

Definition at line 2041 of file predicate.c.

2042{
2044 parenttag;
2045
2047
2048 /* check parents iteratively until no more */
2050 {
2053 return true;
2054 }
2055
2056 /* no more parents to check; lock is not covered */
2057 return false;
2058}

References fb(), GetParentPredicateLockTag(), and PredicateLockExists().

Referenced by PredicateLockAcquire().

◆ CreateLocalPredicateLockHash()

static void CreateLocalPredicateLockHash ( void  )
static

Definition at line 1870 of file predicate.c.

1871{
1873
1874 /* Initialize the backend-local hash table of parent locks */
1876 hash_ctl.keysize = sizeof(PREDICATELOCKTARGETTAG);
1877 hash_ctl.entrysize = sizeof(LOCALPREDICATELOCK);
1878 LocalPredicateLockHash = hash_create("Local predicate lock",
1880 &hash_ctl,
1882}

References Assert, fb(), HASH_BLOBS, hash_create(), HASH_ELEM, LocalPredicateLockHash, and max_predicate_locks_per_xact.

Referenced by AttachSerializableXact(), and GetSerializableTransactionSnapshotInt().

◆ CreatePredicateLock()

static void CreatePredicateLock ( const PREDICATELOCKTARGETTAG targettag,
uint32  targettaghash,
SERIALIZABLEXACT sxact 
)
static

Definition at line 2383 of file predicate.c.

2386{
2387 PREDICATELOCKTARGET *target;
2388 PREDICATELOCKTAG locktag;
2389 PREDICATELOCK *lock;
2391 bool found;
2392
2394
2396 if (IsInParallelMode())
2397 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2399
2400 /* Make sure that the target is represented. */
2401 target = (PREDICATELOCKTARGET *)
2404 HASH_ENTER_NULL, &found);
2405 if (!target)
2406 ereport(ERROR,
2408 errmsg("out of shared memory"),
2409 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2410 if (!found)
2411 dlist_init(&target->predicateLocks);
2412
2413 /* We've got the sxact and target, make sure they're joined. */
2414 locktag.myTarget = target;
2415 locktag.myXact = sxact;
2416 lock = (PREDICATELOCK *)
2419 HASH_ENTER_NULL, &found);
2420 if (!lock)
2421 ereport(ERROR,
2423 errmsg("out of shared memory"),
2424 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2425
2426 if (!found)
2427 {
2428 dlist_push_tail(&target->predicateLocks, &lock->targetLink);
2429 dlist_push_tail(&sxact->predicateLocks, &lock->xactLink);
2431 }
2432
2434 if (IsInParallelMode())
2435 LWLockRelease(&sxact->perXactPredicateListLock);
2437}

References PREDICATELOCK::commitSeqNo, dlist_init(), dlist_push_tail(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), HASH_ENTER_NULL, hash_search_with_hash_value(), InvalidSerCommitSeqNo, IsInParallelMode(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PREDICATELOCKTAG::myTarget, PREDICATELOCKTAG::myXact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, PREDICATELOCKTARGET::predicateLocks, PredicateLockTargetHash, PREDICATELOCK::targetLink, and PREDICATELOCK::xactLink.

Referenced by predicatelock_twophase_recover(), and PredicateLockAcquire().

◆ CreatePredXact()

◆ DecrementParentLocks()

static void DecrementParentLocks ( const PREDICATELOCKTARGETTAG targettag)
static

Definition at line 2321 of file predicate.c.

2322{
2324 nexttag;
2325
2327
2329 {
2333
2339 HASH_FIND, NULL);
2340
2341 /*
2342 * There's a small chance the parent lock doesn't exist in the lock
2343 * table. This can happen if we prematurely removed it because an
2344 * index split caused the child refcount to be off.
2345 */
2346 if (parentlock == NULL)
2347 continue;
2348
2349 parentlock->childLocks--;
2350
2351 /*
2352 * Under similar circumstances the parent lock's refcount might be
2353 * zero. This only happens if we're holding that lock (otherwise we
2354 * would have removed the entry).
2355 */
2356 if (parentlock->childLocks < 0)
2357 {
2358 Assert(parentlock->held);
2359 parentlock->childLocks = 0;
2360 }
2361
2362 if ((parentlock->childLocks == 0) && (!parentlock->held))
2363 {
2367 HASH_REMOVE, NULL);
2369 }
2370 }
2371}

References Assert, fb(), GetParentPredicateLockTag(), HASH_FIND, HASH_REMOVE, hash_search_with_hash_value(), LocalPredicateLockHash, PG_USED_FOR_ASSERTS_ONLY, and PredicateLockTargetTagHashCode.

Referenced by CheckTargetForConflictsIn(), and DeleteChildTargetLocks().

◆ DeleteChildTargetLocks()

static void DeleteChildTargetLocks ( const PREDICATELOCKTARGETTAG newtargettag)
static

Definition at line 2144 of file predicate.c.

2145{
2148 dlist_mutable_iter iter;
2149
2152 if (IsInParallelMode())
2153 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2154
2155 dlist_foreach_modify(iter, &sxact->predicateLocks)
2156 {
2160
2161 predlock = dlist_container(PREDICATELOCK, xactLink, iter.cur);
2162
2163 oldlocktag = predlock->tag;
2164 Assert(oldlocktag.myXact == sxact);
2165 oldtarget = oldlocktag.myTarget;
2166 oldtargettag = oldtarget->tag;
2167
2169 {
2173
2176
2178
2179 dlist_delete(&predlock->xactLink);
2180 dlist_delete(&predlock->targetLink);
2183 &oldlocktag,
2186 HASH_REMOVE, NULL);
2188
2190
2192
2194 }
2195 }
2196 if (IsInParallelMode())
2197 LWLockRelease(&sxact->perXactPredicateListLock);
2199}

References Assert, dlist_mutable_iter::cur, DecrementParentLocks(), dlist_container, dlist_delete(), dlist_foreach_modify, fb(), HASH_REMOVE, hash_search_with_hash_value(), IsInParallelMode(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, PG_USED_FOR_ASSERTS_ONLY, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, PredicateLockTargetTagHashCode, RemoveTargetIfNoLongerUsed(), and TargetTagIsCoveredBy.

Referenced by PredicateLockAcquire().

◆ DeleteLockTarget()

static void DeleteLockTarget ( PREDICATELOCKTARGET target,
uint32  targettaghash 
)
static

◆ DropAllPredicateLocksFromTable()

static void DropAllPredicateLocksFromTable ( Relation  relation,
bool  transfer 
)
static

Definition at line 2867 of file predicate.c.

2868{
2872 Oid dbId;
2873 Oid relId;
2874 Oid heapId;
2875 int i;
2876 bool isIndex;
2877 bool found;
2879
2880 /*
2881 * Bail out quickly if there are no serializable transactions running.
2882 * It's safe to check this without taking locks because the caller is
2883 * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
2884 * would matter here can be acquired while that is held.
2885 */
2887 return;
2888
2889 if (!PredicateLockingNeededForRelation(relation))
2890 return;
2891
2892 dbId = relation->rd_locator.dbOid;
2893 relId = relation->rd_id;
2894 if (relation->rd_index == NULL)
2895 {
2896 isIndex = false;
2897 heapId = relId;
2898 }
2899 else
2900 {
2901 isIndex = true;
2902 heapId = relation->rd_index->indrelid;
2903 }
2905 Assert(transfer || !isIndex); /* index OID only makes sense with
2906 * transfer */
2907
2908 /* Retrieve first time needed, then keep. */
2910 heaptarget = NULL;
2911
2912 /* Acquire locks on all lock partitions */
2914 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
2917
2918 /*
2919 * Remove the dummy entry to give us scratch space, so we know we'll be
2920 * able to create the new lock target.
2921 */
2922 if (transfer)
2923 RemoveScratchTarget(true);
2924
2925 /* Scan through target map */
2927
2929 {
2930 dlist_mutable_iter iter;
2931
2932 /*
2933 * Check whether this is a target which needs attention.
2934 */
2936 continue; /* wrong relation id */
2937 if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId)
2938 continue; /* wrong database id */
2939 if (transfer && !isIndex
2941 continue; /* already the right lock */
2942
2943 /*
2944 * If we made it here, we have work to do. We make sure the heap
2945 * relation lock exists, then we walk the list of predicate locks for
2946 * the old target we found, moving all locks to the heap relation lock
2947 * -- unless they already hold that.
2948 */
2949
2950 /*
2951 * First make sure we have the heap relation target. We only need to
2952 * do this once.
2953 */
2954 if (transfer && heaptarget == NULL)
2955 {
2957
2963 HASH_ENTER, &found);
2964 if (!found)
2965 dlist_init(&heaptarget->predicateLocks);
2966 }
2967
2968 /*
2969 * Loop through all the locks on the old target, replacing them with
2970 * locks on the new target.
2971 */
2972 dlist_foreach_modify(iter, &oldtarget->predicateLocks)
2973 {
2975 dlist_container(PREDICATELOCK, targetLink, iter.cur);
2979
2980 /*
2981 * Remove the old lock first. This avoids the chance of running
2982 * out of lock structure entries for the hash table.
2983 */
2985 oldXact = oldpredlock->tag.myXact;
2986
2987 dlist_delete(&(oldpredlock->xactLink));
2988
2989 /*
2990 * No need for retail delete from oldtarget list, we're removing
2991 * the whole target anyway.
2992 */
2994 &oldpredlock->tag,
2995 HASH_REMOVE, &found);
2996 Assert(found);
2997
2998 if (transfer)
2999 {
3001
3003 newpredlocktag.myXact = oldXact;
3009 HASH_ENTER,
3010 &found);
3011 if (!found)
3012 {
3013 dlist_push_tail(&(heaptarget->predicateLocks),
3014 &(newpredlock->targetLink));
3015 dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
3016 &(newpredlock->xactLink));
3017 newpredlock->commitSeqNo = oldCommitSeqNo;
3018 }
3019 else
3020 {
3021 if (newpredlock->commitSeqNo < oldCommitSeqNo)
3022 newpredlock->commitSeqNo = oldCommitSeqNo;
3023 }
3024
3025 Assert(newpredlock->commitSeqNo != 0);
3026 Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
3027 || (newpredlock->tag.myXact == OldCommittedSxact));
3028 }
3029 }
3030
3032 &found);
3033 Assert(found);
3034 }
3035
3036 /* Put the scratch entry back */
3037 if (transfer)
3039
3040 /* Release locks in reverse order */
3042 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
3045}

References Assert, SERIALIZABLEXACT::commitSeqNo, dlist_mutable_iter::cur, RelFileLocator::dbOid, dlist_container, dlist_delete(), dlist_foreach_modify, dlist_init(), dlist_push_tail(), fb(), GET_PREDICATELOCKTARGETTAG_DB, GET_PREDICATELOCKTARGETTAG_RELATION, GET_PREDICATELOCKTARGETTAG_TYPE, HASH_ENTER, HASH_REMOVE, hash_search(), hash_search_with_hash_value(), hash_seq_init(), hash_seq_search(), i, InvalidOid, InvalidSerCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), PREDICATELOCKTAG::myTarget, NUM_PREDICATELOCK_PARTITIONS, OldCommittedSxact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLockByIndex, PredicateLockingNeededForRelation(), PredicateLockTargetHash, PredicateLockTargetTagHashCode, PREDLOCKTAG_RELATION, PredXact, RelationData::rd_id, RelationData::rd_index, RelationData::rd_locator, RemoveScratchTarget(), RestoreScratchTarget(), SET_PREDICATELOCKTARGETTAG_RELATION, PredXactListData::SxactGlobalXmin, and TransactionIdIsValid.

Referenced by TransferPredicateLocksToHeapRelation().

◆ FlagRWConflict()

static void FlagRWConflict ( SERIALIZABLEXACT reader,
SERIALIZABLEXACT writer 
)
static

Definition at line 4431 of file predicate.c.

4432{
4433 Assert(reader != writer);
4434
4435 /* First, see if this conflict causes failure. */
4437
4438 /* Actually do the conflict flagging. */
4439 if (reader == OldCommittedSxact)
4441 else if (writer == OldCommittedSxact)
4443 else
4444 SetRWConflict(reader, writer);
4445}

References Assert, fb(), SERIALIZABLEXACT::flags, OldCommittedSxact, OnConflict_CheckForSerializationFailure(), SetRWConflict(), SXACT_FLAG_SUMMARY_CONFLICT_IN, and SXACT_FLAG_SUMMARY_CONFLICT_OUT.

Referenced by CheckForSerializableConflictOut(), CheckTableForSerializableConflictIn(), and CheckTargetForConflictsIn().

◆ FlagSxactUnsafe()

static void FlagSxactUnsafe ( SERIALIZABLEXACT sxact)
static

Definition at line 713 of file predicate.c.

714{
716
719
720 sxact->flags |= SXACT_FLAG_RO_UNSAFE;
721
722 /*
723 * We know this isn't a safe snapshot, so we can stop looking for other
724 * potential conflicts.
725 */
726 dlist_foreach_modify(iter, &sxact->possibleUnsafeConflicts)
727 {
729 dlist_container(RWConflictData, inLink, iter.cur);
730
731 Assert(!SxactIsReadOnly(conflict->sxactOut));
732 Assert(sxact == conflict->sxactIn);
733
735 }
736}

References Assert, dlist_mutable_iter::cur, dlist_container, dlist_foreach_modify, fb(), ReleaseRWConflict(), SXACT_FLAG_RO_UNSAFE, SxactIsReadOnly, and SxactIsROSafe.

Referenced by ReleasePredicateLocks().

◆ GetParentPredicateLockTag()

static bool GetParentPredicateLockTag ( const PREDICATELOCKTARGETTAG tag,
PREDICATELOCKTARGETTAG parent 
)
static

Definition at line 2002 of file predicate.c.

2004{
2005 switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2006 {
2008 /* relation locks have no parent lock */
2009 return false;
2010
2011 case PREDLOCKTAG_PAGE:
2012 /* parent lock is relation lock */
2016
2017 return true;
2018
2019 case PREDLOCKTAG_TUPLE:
2020 /* parent lock is page lock */
2025 return true;
2026 }
2027
2028 /* not reachable */
2029 Assert(false);
2030 return false;
2031}

References Assert, GET_PREDICATELOCKTARGETTAG_DB, GET_PREDICATELOCKTARGETTAG_PAGE, GET_PREDICATELOCKTARGETTAG_RELATION, GET_PREDICATELOCKTARGETTAG_TYPE, PREDLOCKTAG_PAGE, PREDLOCKTAG_RELATION, PREDLOCKTAG_TUPLE, SET_PREDICATELOCKTARGETTAG_PAGE, and SET_PREDICATELOCKTARGETTAG_RELATION.

Referenced by CheckAndPromotePredicateLockRequest(), CoarserLockCovers(), DecrementParentLocks(), and PredicateLockPageSplit().

◆ GetPredicateLockStatusData()

PredicateLockData * GetPredicateLockStatusData ( void  )

Definition at line 1377 of file predicate.c.

1378{
1380 int i;
1381 int els,
1382 el;
1385
1387
1388 /*
1389 * To ensure consistency, take simultaneous locks on all partition locks
1390 * in ascending order, then SerializableXactHashLock.
1391 */
1392 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
1395
1396 /* Get number of locks and allocate appropriately-sized arrays. */
1398 data->nelements = els;
1401
1402
1403 /* Scan through PredicateLockHash and copy contents */
1405
1406 el = 0;
1407
1409 {
1410 data->locktags[el] = predlock->tag.myTarget->tag;
1411 data->xacts[el] = *predlock->tag.myXact;
1412 el++;
1413 }
1414
1415 Assert(el == els);
1416
1417 /* Release locks in reverse order */
1419 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
1421
1422 return data;
1423}

References Assert, data, fb(), hash_get_num_entries(), hash_seq_init(), hash_seq_search(), i, LW_SHARED, LWLockAcquire(), LWLockRelease(), NUM_PREDICATELOCK_PARTITIONS, palloc_array, palloc_object, PredicateLockHash, and PredicateLockHashPartitionLockByIndex.

Referenced by pg_lock_status().

◆ GetSafeSnapshot()

static Snapshot GetSafeSnapshot ( Snapshot  origSnapshot)
static

Definition at line 1488 of file predicate.c.

1489{
1490 Snapshot snapshot;
1491
1493
1494 while (true)
1495 {
1496 /*
1497 * GetSerializableTransactionSnapshotInt is going to call
1498 * GetSnapshotData, so we need to provide it the static snapshot area
1499 * our caller passed to us. The pointer returned is actually the same
1500 * one passed to it, but we avoid assuming that here.
1501 */
1503 NULL, InvalidPid);
1504
1506 return snapshot; /* no concurrent r/w xacts; it's safe */
1507
1509
1510 /*
1511 * Wait for concurrent transactions to finish. Stop early if one of
1512 * them marked us as conflicted.
1513 */
1517 {
1521 }
1523
1525 {
1527 break; /* success */
1528 }
1529
1531
1532 /* else, need to retry... */
1535 errmsg_internal("deferrable snapshot was unsafe; trying a new one")));
1536 ReleasePredicateLocks(false, false);
1537 }
1538
1539 /*
1540 * Now we have a safe snapshot, so we don't need to do any further checks.
1541 */
1543 ReleasePredicateLocks(false, true);
1544
1545 return snapshot;
1546}

References Assert, DEBUG2, dlist_is_empty(), ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errmsg_internal(), fb(), SERIALIZABLEXACT::flags, GetSerializableTransactionSnapshotInt(), InvalidPid, InvalidSerializableXact, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, SERIALIZABLEXACT::possibleUnsafeConflicts, ProcWaitForSignal(), ReleasePredicateLocks(), SXACT_FLAG_DEFERRABLE_WAITING, SxactIsROSafe, SxactIsROUnsafe, XactDeferrable, and XactReadOnly.

Referenced by GetSerializableTransactionSnapshot().

◆ GetSafeSnapshotBlockingPids()

int GetSafeSnapshotBlockingPids ( int  blocked_pid,
int output,
int  output_size 
)

Definition at line 1558 of file predicate.c.

1559{
1560 int num_written = 0;
1561 dlist_iter iter;
1563
1565
1566 /* Find blocked_pid's SERIALIZABLEXACT by linear search. */
1568 {
1570 dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
1571
1572 if (sxact->pid == blocked_pid)
1573 {
1575 break;
1576 }
1577 }
1578
1579 /* Did we find it, and is it currently waiting in GetSafeSnapshot? */
1581 {
1582 /* Traverse the list of possible unsafe conflicts collecting PIDs. */
1583 dlist_foreach(iter, &blocking_sxact->possibleUnsafeConflicts)
1584 {
1586 dlist_container(RWConflictData, inLink, iter.cur);
1587
1588 output[num_written++] = possibleUnsafeConflict->sxactOut->pid;
1589
1590 if (num_written >= output_size)
1591 break;
1592 }
1593 }
1594
1596
1597 return num_written;
1598}

References PredXactListData::activeList, dlist_iter::cur, dlist_container, dlist_foreach, fb(), LW_SHARED, LWLockAcquire(), LWLockRelease(), output, PredXact, and SxactIsDeferrableWaiting.

Referenced by pg_isolation_test_session_is_blocked(), and pg_safe_snapshot_blocking_pids().

◆ GetSerializableTransactionSnapshot()

Snapshot GetSerializableTransactionSnapshot ( Snapshot  snapshot)

Definition at line 1612 of file predicate.c.

1613{
1615
1616 /*
1617 * Can't use serializable mode while recovery is still active, as it is,
1618 * for example, on a hot standby. We could get here despite the check in
1619 * check_transaction_isolation() if default_transaction_isolation is set
1620 * to serializable, so phrase the hint accordingly.
1621 */
1622 if (RecoveryInProgress())
1623 ereport(ERROR,
1625 errmsg("cannot use serializable mode in a hot standby"),
1626 errdetail("\"default_transaction_isolation\" is set to \"serializable\"."),
1627 errhint("You can use \"SET default_transaction_isolation = 'repeatable read'\" to change the default.")));
1628
1629 /*
1630 * A special optimization is available for SERIALIZABLE READ ONLY
1631 * DEFERRABLE transactions -- we can wait for a suitable snapshot and
1632 * thereby avoid all SSI overhead once it's running.
1633 */
1635 return GetSafeSnapshot(snapshot);
1636
1638 NULL, InvalidPid);
1639}

References Assert, ereport, errcode(), errdetail(), errhint(), errmsg, ERROR, fb(), GetSafeSnapshot(), GetSerializableTransactionSnapshotInt(), InvalidPid, IsolationIsSerializable, RecoveryInProgress(), XactDeferrable, and XactReadOnly.

Referenced by GetTransactionSnapshot().

◆ GetSerializableTransactionSnapshotInt()

static Snapshot GetSerializableTransactionSnapshotInt ( Snapshot  snapshot,
VirtualTransactionId sourcevxid,
int  sourcepid 
)
static

Definition at line 1694 of file predicate.c.

1697{
1698 PGPROC *proc;
1701 *othersxact;
1702
1703 /* We only do this for serializable transactions. Once. */
1705
1707
1708 /*
1709 * Since all parts of a serializable transaction must use the same
1710 * snapshot, it is too late to establish one after a parallel operation
1711 * has begun.
1712 */
1713 if (IsInParallelMode())
1714 elog(ERROR, "cannot establish serializable snapshot during a parallel operation");
1715
1716 proc = MyProc;
1717 Assert(proc != NULL);
1718 GET_VXID_FROM_PGPROC(vxid, *proc);
1719
1720 /*
1721 * First we get the sxact structure, which may involve looping and access
1722 * to the "finished" list to free a structure for use.
1723 *
1724 * We must hold SerializableXactHashLock when taking/checking the snapshot
1725 * to avoid race conditions, for much the same reasons that
1726 * GetSnapshotData takes the ProcArrayLock. Since we might have to
1727 * release SerializableXactHashLock to call SummarizeOldestCommittedSxact,
1728 * this means we have to create the sxact first, which is a bit annoying
1729 * (in particular, an elog(ERROR) in procarray.c would cause us to leak
1730 * the sxact). Consider refactoring to avoid this.
1731 */
1732#ifdef TEST_SUMMARIZE_SERIAL
1734#endif
1736 do
1737 {
1739 /* If null, push out committed sxact to SLRU summary & retry. */
1740 if (!sxact)
1741 {
1745 }
1746 } while (!sxact);
1747
1748 /* Get the snapshot, or check that it's safe to use */
1749 if (!sourcevxid)
1750 snapshot = GetSnapshotData(snapshot);
1751 else if (!ProcArrayInstallImportedXmin(snapshot->xmin, sourcevxid))
1752 {
1755 ereport(ERROR,
1757 errmsg("could not import the requested snapshot"),
1758 errdetail("The source process with PID %d is not running anymore.",
1759 sourcepid)));
1760 }
1761
1762 /*
1763 * If there are no serializable transactions which are not read-only, we
1764 * can "opt out" of predicate locking and conflict checking for a
1765 * read-only transaction.
1766 *
1767 * The reason this is safe is that a read-only transaction can only become
1768 * part of a dangerous structure if it overlaps a writable transaction
1769 * which in turn overlaps a writable transaction which committed before
1770 * the read-only transaction started. A new writable transaction can
1771 * overlap this one, but it can't meet the other condition of overlapping
1772 * a transaction which committed before this one started.
1773 */
1775 {
1778 return snapshot;
1779 }
1780
1781 /* Initialize the structure. */
1782 sxact->vxid = vxid;
1783 sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo;
1784 sxact->prepareSeqNo = InvalidSerCommitSeqNo;
1785 sxact->commitSeqNo = InvalidSerCommitSeqNo;
1786 dlist_init(&(sxact->outConflicts));
1787 dlist_init(&(sxact->inConflicts));
1788 dlist_init(&(sxact->possibleUnsafeConflicts));
1789 sxact->topXid = GetTopTransactionIdIfAny();
1790 sxact->finishedBefore = InvalidTransactionId;
1791 sxact->xmin = snapshot->xmin;
1792 sxact->pid = MyProcPid;
1793 sxact->pgprocno = MyProcNumber;
1794 dlist_init(&sxact->predicateLocks);
1795 dlist_node_init(&sxact->finishedLink);
1796 sxact->flags = 0;
1797 if (XactReadOnly)
1798 {
1799 dlist_iter iter;
1800
1801 sxact->flags |= SXACT_FLAG_READ_ONLY;
1802
1803 /*
1804 * Register all concurrent r/w transactions as possible conflicts; if
1805 * all of them commit without any outgoing conflicts to earlier
1806 * transactions then this snapshot can be deemed safe (and we can run
1807 * without tracking predicate locks).
1808 */
1810 {
1812
1816 {
1818 }
1819 }
1820
1821 /*
1822 * If we didn't find any possibly unsafe conflicts because every
1823 * uncommitted writable transaction turned out to be doomed, then we
1824 * can "opt out" immediately. See comments above the earlier check
1825 * for PredXact->WritableSxactCount == 0.
1826 */
1827 if (dlist_is_empty(&sxact->possibleUnsafeConflicts))
1828 {
1831 return snapshot;
1832 }
1833 }
1834 else
1835 {
1839 }
1840
1841 /* Maintain serializable global xmin info. */
1843 {
1845 PredXact->SxactGlobalXmin = snapshot->xmin;
1847 SerialSetActiveSerXmin(snapshot->xmin);
1848 }
1849 else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin))
1850 {
1853 }
1854 else
1855 {
1857 }
1858
1860 MyXactDidWrite = false; /* haven't written anything yet */
1861
1863
1865
1866 return snapshot;
1867}

References PredXactListData::activeList, Assert, CreateLocalPredicateLockHash(), CreatePredXact(), dlist_iter::cur, dlist_container, dlist_foreach, dlist_init(), dlist_is_empty(), dlist_node_init(), elog, ereport, errcode(), errdetail(), errmsg, ERROR, fb(), GET_VXID_FROM_PGPROC, GetSnapshotData(), GetTopTransactionIdIfAny(), InvalidSerCommitSeqNo, InvalidSerializableXact, InvalidTransactionId, IsInParallelMode(), PredXactListData::LastSxactCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_prepared_xacts, MaxBackends, MyProc, MyProcNumber, MyProcPid, MySerializableXact, MyXactDidWrite, PredXact, ProcArrayInstallImportedXmin(), RecoveryInProgress(), ReleasePredXact(), SerialSetActiveSerXmin(), SetPossibleUnsafeConflict(), SummarizeOldestCommittedSxact(), SXACT_FLAG_READ_ONLY, PredXactListData::SxactGlobalXmin, PredXactListData::SxactGlobalXminCount, SxactIsCommitted, SxactIsDoomed, SxactIsReadOnly, TransactionIdEquals, TransactionIdFollows(), TransactionIdIsValid, PredXactListData::WritableSxactCount, XactReadOnly, and SnapshotData::xmin.

Referenced by GetSafeSnapshot(), GetSerializableTransactionSnapshot(), and SetSerializableTransactionSnapshot().

◆ MaxPredicateChildLocks()

static int MaxPredicateChildLocks ( const PREDICATELOCKTARGETTAG tag)
static

Definition at line 2219 of file predicate.c.

2220{
2221 switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2222 {
2228
2229 case PREDLOCKTAG_PAGE:
2231
2232 case PREDLOCKTAG_TUPLE:
2233
2234 /*
2235 * not reachable: nothing is finer-granularity than a tuple, so we
2236 * should never try to promote to it.
2237 */
2238 Assert(false);
2239 return 0;
2240 }
2241
2242 /* not reachable */
2243 Assert(false);
2244 return 0;
2245}

References Assert, GET_PREDICATELOCKTARGETTAG_TYPE, max_predicate_locks_per_page, max_predicate_locks_per_relation, max_predicate_locks_per_xact, PREDLOCKTAG_PAGE, PREDLOCKTAG_RELATION, and PREDLOCKTAG_TUPLE.

Referenced by CheckAndPromotePredicateLockRequest().

◆ OnConflict_CheckForSerializationFailure()

static void OnConflict_CheckForSerializationFailure ( const SERIALIZABLEXACT reader,
SERIALIZABLEXACT writer 
)
static

Definition at line 4466 of file predicate.c.

4468{
4469 bool failure;
4470
4472
4473 failure = false;
4474
4475 /*------------------------------------------------------------------------
4476 * Check for already-committed writer with rw-conflict out flagged
4477 * (conflict-flag on W means that T2 committed before W):
4478 *
4479 * R ------> W ------> T2
4480 * rw rw
4481 *
4482 * That is a dangerous structure, so we must abort. (Since the writer
4483 * has already committed, we must be the reader)
4484 *------------------------------------------------------------------------
4485 */
4488 failure = true;
4489
4490 /*------------------------------------------------------------------------
4491 * Check whether the writer has become a pivot with an out-conflict
4492 * committed transaction (T2), and T2 committed first:
4493 *
4494 * R ------> W ------> T2
4495 * rw rw
4496 *
4497 * Because T2 must've committed first, there is no anomaly if:
4498 * - the reader committed before T2
4499 * - the writer committed before T2
4500 * - the reader is a READ ONLY transaction and the reader was concurrent
4501 * with T2 (= reader acquired its snapshot before T2 committed)
4502 *
4503 * We also handle the case that T2 is prepared but not yet committed
4504 * here. In that case T2 has already checked for conflicts, so if it
4505 * commits first, making the above conflict real, it's too late for it
4506 * to abort.
4507 *------------------------------------------------------------------------
4508 */
4510 failure = true;
4511 else if (!failure)
4512 {
4513 dlist_iter iter;
4514
4515 dlist_foreach(iter, &writer->outConflicts)
4516 {
4518 dlist_container(RWConflictData, outLink, iter.cur);
4519 SERIALIZABLEXACT *t2 = conflict->sxactIn;
4520
4521 if (SxactIsPrepared(t2)
4522 && (!SxactIsCommitted(reader)
4523 || t2->prepareSeqNo <= reader->commitSeqNo)
4525 || t2->prepareSeqNo <= writer->commitSeqNo)
4526 && (!SxactIsReadOnly(reader)
4527 || t2->prepareSeqNo <= reader->SeqNo.lastCommitBeforeSnapshot))
4528 {
4529 failure = true;
4530 break;
4531 }
4532 }
4533 }
4534
4535 /*------------------------------------------------------------------------
4536 * Check whether the reader has become a pivot with a writer
4537 * that's committed (or prepared):
4538 *
4539 * T0 ------> R ------> W
4540 * rw rw
4541 *
4542 * Because W must've committed first for an anomaly to occur, there is no
4543 * anomaly if:
4544 * - T0 committed before the writer
4545 * - T0 is READ ONLY, and overlaps the writer
4546 *------------------------------------------------------------------------
4547 */
4548 if (!failure && SxactIsPrepared(writer) && !SxactIsReadOnly(reader))
4549 {
4550 if (SxactHasSummaryConflictIn(reader))
4551 {
4552 failure = true;
4553 }
4554 else
4555 {
4556 dlist_iter iter;
4557
4558 /*
4559 * The unconstify is needed as we have no const version of
4560 * dlist_foreach().
4561 */
4562 dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->inConflicts)
4563 {
4564 const RWConflict conflict =
4565 dlist_container(RWConflictData, inLink, iter.cur);
4566 const SERIALIZABLEXACT *t0 = conflict->sxactOut;
4567
4568 if (!SxactIsDoomed(t0)
4569 && (!SxactIsCommitted(t0)
4570 || t0->commitSeqNo >= writer->prepareSeqNo)
4571 && (!SxactIsReadOnly(t0)
4572 || t0->SeqNo.lastCommitBeforeSnapshot >= writer->prepareSeqNo))
4573 {
4574 failure = true;
4575 break;
4576 }
4577 }
4578 }
4579 }
4580
4581 if (failure)
4582 {
4583 /*
4584 * We have to kill a transaction to avoid a possible anomaly from
4585 * occurring. If the writer is us, we can just ereport() to cause a
4586 * transaction abort. Otherwise we flag the writer for termination,
4587 * causing it to abort when it tries to commit. However, if the writer
4588 * is a prepared transaction, already prepared, we can't abort it
4589 * anymore, so we have to kill the reader instead.
4590 */
4592 {
4594 ereport(ERROR,
4596 errmsg("could not serialize access due to read/write dependencies among transactions"),
4597 errdetail_internal("Reason code: Canceled on identification as a pivot, during write."),
4598 errhint("The transaction might succeed if retried.")));
4599 }
4600 else if (SxactIsPrepared(writer))
4601 {
4603
4604 /* if we're not the writer, we have to be the reader */
4605 Assert(MySerializableXact == reader);
4606 ereport(ERROR,
4608 errmsg("could not serialize access due to read/write dependencies among transactions"),
4609 errdetail_internal("Reason code: Canceled on conflict out to pivot %u, during read.", writer->topXid),
4610 errhint("The transaction might succeed if retried.")));
4611 }
4612 writer->flags |= SXACT_FLAG_DOOMED;
4613 }
4614}

References Assert, SERIALIZABLEXACT::commitSeqNo, dlist_iter::cur, dlist_container, dlist_foreach, ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, fb(), SERIALIZABLEXACT::lastCommitBeforeSnapshot, LWLockHeldByMe(), LWLockRelease(), MySerializableXact, SERIALIZABLEXACT::SeqNo, SXACT_FLAG_DOOMED, SxactHasConflictOut, SxactHasSummaryConflictIn, SxactHasSummaryConflictOut, SxactIsCommitted, SxactIsDoomed, SxactIsPrepared, SxactIsReadOnly, and unconstify.

Referenced by FlagRWConflict().

◆ PageIsPredicateLocked()

◆ PostPrepare_PredicateLocks()

◆ PreCommit_CheckForSerializationFailure()

void PreCommit_CheckForSerializationFailure ( void  )

Definition at line 4633 of file predicate.c.

4634{
4636
4638 return;
4639
4641
4643
4644 /*
4645 * Check if someone else has already decided that we need to die. Since
4646 * we set our own DOOMED flag when partially releasing, ignore in that
4647 * case.
4648 */
4651 {
4653 ereport(ERROR,
4655 errmsg("could not serialize access due to read/write dependencies among transactions"),
4656 errdetail_internal("Reason code: Canceled on identification as a pivot, during commit attempt."),
4657 errhint("The transaction might succeed if retried.")));
4658 }
4659
4661 {
4664
4665 if (!SxactIsCommitted(nearConflict->sxactOut)
4666 && !SxactIsDoomed(nearConflict->sxactOut))
4667 {
4669
4670 dlist_foreach(far_iter, &nearConflict->sxactOut->inConflicts)
4671 {
4674
4675 if (farConflict->sxactOut == MySerializableXact
4676 || (!SxactIsCommitted(farConflict->sxactOut)
4677 && !SxactIsReadOnly(farConflict->sxactOut)
4678 && !SxactIsDoomed(farConflict->sxactOut)))
4679 {
4680 /*
4681 * Normally, we kill the pivot transaction to make sure we
4682 * make progress if the failing transaction is retried.
4683 * However, we can't kill it if it's already prepared, so
4684 * in that case we commit suicide instead.
4685 */
4686 if (SxactIsPrepared(nearConflict->sxactOut))
4687 {
4689 ereport(ERROR,
4691 errmsg("could not serialize access due to read/write dependencies among transactions"),
4692 errdetail_internal("Reason code: Canceled on commit attempt with conflict in from prepared pivot."),
4693 errhint("The transaction might succeed if retried.")));
4694 }
4695 nearConflict->sxactOut->flags |= SXACT_FLAG_DOOMED;
4696 break;
4697 }
4698 }
4699 }
4700 }
4701
4704
4706}

References Assert, dlist_container, dlist_foreach, ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, fb(), SERIALIZABLEXACT::flags, SERIALIZABLEXACT::inConflicts, InvalidSerializableXact, IsolationIsSerializable, PredXactListData::LastSxactCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, PredXact, SERIALIZABLEXACT::prepareSeqNo, SXACT_FLAG_DOOMED, SXACT_FLAG_PREPARED, SxactIsCommitted, SxactIsDoomed, SxactIsPartiallyReleased, SxactIsPrepared, and SxactIsReadOnly.

Referenced by CommitTransaction(), and PrepareTransaction().

◆ predicatelock_hash()

static uint32 predicatelock_hash ( const void key,
Size  keysize 
)
static

Definition at line 1351 of file predicate.c.

1352{
1353 const PREDICATELOCKTAG *predicatelocktag = (const PREDICATELOCKTAG *) key;
1355
1356 Assert(keysize == sizeof(PREDICATELOCKTAG));
1357
1358 /* Look into the associated target object, and compute its hash code */
1360
1362}

References Assert, fb(), PredicateLockHashCodeFromTargetHashCode, and PredicateLockTargetTagHashCode.

Referenced by PredicateLockShmemRequest().

◆ predicatelock_twophase_recover()

void predicatelock_twophase_recover ( FullTransactionId  fxid,
uint16  info,
void recdata,
uint32  len 
)

Definition at line 4839 of file predicate.c.

4841{
4844
4846
4847 record = (TwoPhasePredicateRecord *) recdata;
4848
4850 (record->type == TWOPHASEPREDICATERECORD_LOCK));
4851
4852 if (record->type == TWOPHASEPREDICATERECORD_XACT)
4853 {
4854 /* Per-transaction record. Set up a SERIALIZABLEXACT. */
4855 TwoPhasePredicateXactRecord *xactRecord;
4859 bool found;
4860
4861 xactRecord = (TwoPhasePredicateXactRecord *) &record->data.xactRecord;
4862
4865 if (!sxact)
4866 ereport(ERROR,
4868 errmsg("out of shared memory")));
4869
4870 /* vxid for a prepared xact is INVALID_PROC_NUMBER/xid; no pid */
4871 sxact->vxid.procNumber = INVALID_PROC_NUMBER;
4872 sxact->vxid.localTransactionId = (LocalTransactionId) xid;
4873 sxact->pid = 0;
4874 sxact->pgprocno = INVALID_PROC_NUMBER;
4875
4876 /* a prepared xact hasn't committed yet */
4877 sxact->prepareSeqNo = RecoverySerCommitSeqNo;
4878 sxact->commitSeqNo = InvalidSerCommitSeqNo;
4879 sxact->finishedBefore = InvalidTransactionId;
4880
4881 sxact->SeqNo.lastCommitBeforeSnapshot = RecoverySerCommitSeqNo;
4882
4883 /*
4884 * Don't need to track this; no transactions running at the time the
4885 * recovered xact started are still active, except possibly other
4886 * prepared xacts and we don't care whether those are RO_SAFE or not.
4887 */
4888 dlist_init(&(sxact->possibleUnsafeConflicts));
4889
4890 dlist_init(&(sxact->predicateLocks));
4891 dlist_node_init(&sxact->finishedLink);
4892
4893 sxact->topXid = xid;
4894 sxact->xmin = xactRecord->xmin;
4895 sxact->flags = xactRecord->flags;
4897 if (!SxactIsReadOnly(sxact))
4898 {
4902 }
4903
4904 /*
4905 * We don't know whether the transaction had any conflicts or not, so
4906 * we'll conservatively assume that it had both a conflict in and a
4907 * conflict out, and represent that with the summary conflict flags.
4908 */
4909 dlist_init(&(sxact->outConflicts));
4910 dlist_init(&(sxact->inConflicts));
4913
4914 /* Register the transaction's xid */
4915 sxidtag.xid = xid;
4917 &sxidtag,
4918 HASH_ENTER, &found);
4919 Assert(sxid != NULL);
4920 Assert(!found);
4921 sxid->myXact = sxact;
4922
4923 /*
4924 * Update global xmin. Note that this is a special case compared to
4925 * registering a normal transaction, because the global xmin might go
4926 * backwards. That's OK, because until recovery is over we're not
4927 * going to complete any transactions or create any non-prepared
4928 * transactions, so there's no danger of throwing away.
4929 */
4932 {
4936 }
4938 {
4941 }
4942
4944 }
4945 else if (record->type == TWOPHASEPREDICATERECORD_LOCK)
4946 {
4947 /* Lock record. Recreate the PREDICATELOCK */
4948 TwoPhasePredicateLockRecord *lockRecord;
4953
4954 lockRecord = (TwoPhasePredicateLockRecord *) &record->data.lockRecord;
4955 targettaghash = PredicateLockTargetTagHashCode(&lockRecord->target);
4956
4958 sxidtag.xid = xid;
4959 sxid = (SERIALIZABLEXID *)
4962
4963 Assert(sxid != NULL);
4964 sxact = sxid->myXact;
4966
4967 CreatePredicateLock(&lockRecord->target, targettaghash, sxact);
4968 }
4969}

References Assert, CreatePredicateLock(), CreatePredXact(), TwoPhasePredicateRecord::data, dlist_init(), dlist_node_init(), ereport, errcode(), errmsg, ERROR, fb(), TwoPhasePredicateXactRecord::flags, HASH_ENTER, HASH_FIND, hash_search(), INVALID_PROC_NUMBER, InvalidSerCommitSeqNo, InvalidSerializableXact, InvalidTransactionId, len, TwoPhasePredicateRecord::lockRecord, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), max_prepared_xacts, MaxBackends, PredicateLockTargetTagHashCode, PredXact, RecoverySerCommitSeqNo, SerializableXidHash, SerialSetActiveSerXmin(), SXACT_FLAG_SUMMARY_CONFLICT_IN, SXACT_FLAG_SUMMARY_CONFLICT_OUT, PredXactListData::SxactGlobalXmin, PredXactListData::SxactGlobalXminCount, SxactIsPrepared, SxactIsReadOnly, TwoPhasePredicateLockRecord::target, TransactionIdEquals, TransactionIdFollows(), TransactionIdIsValid, TWOPHASEPREDICATERECORD_LOCK, TWOPHASEPREDICATERECORD_XACT, TwoPhasePredicateRecord::type, PredXactListData::WritableSxactCount, TwoPhasePredicateRecord::xactRecord, XidFromFullTransactionId, and TwoPhasePredicateXactRecord::xmin.

◆ PredicateLockAcquire()

static void PredicateLockAcquire ( const PREDICATELOCKTARGETTAG targettag)
static

Definition at line 2447 of file predicate.c.

2448{
2450 bool found;
2452
2453 /* Do we have the lock already, or a covering lock? */
2455 return;
2456
2458 return;
2459
2460 /* the same hash and LW lock apply to the lock target and the local lock. */
2462
2463 /* Acquire lock in local table */
2467 HASH_ENTER, &found);
2468 locallock->held = true;
2469 if (!found)
2470 locallock->childLocks = 0;
2471
2472 /* Actually create the lock */
2474
2475 /*
2476 * Lock has been acquired. Check whether it should be promoted to a
2477 * coarser granularity, or whether there are finer-granularity locks to
2478 * clean up.
2479 */
2481 {
2482 /*
2483 * Lock request was promoted to a coarser-granularity lock, and that
2484 * lock was acquired. It will delete this lock and any of its
2485 * children, so we're done.
2486 */
2487 }
2488 else
2489 {
2490 /* Clean up any finer-granularity locks */
2493 }
2494}

References CheckAndPromotePredicateLockRequest(), CoarserLockCovers(), CreatePredicateLock(), DeleteChildTargetLocks(), fb(), GET_PREDICATELOCKTARGETTAG_TYPE, HASH_ENTER, hash_search_with_hash_value(), LocalPredicateLockHash, MySerializableXact, PredicateLockExists(), PredicateLockTargetTagHashCode, and PREDLOCKTAG_TUPLE.

Referenced by CheckAndPromotePredicateLockRequest(), PredicateLockPage(), PredicateLockRelation(), and PredicateLockTID().

◆ PredicateLockExists()

static bool PredicateLockExists ( const PREDICATELOCKTARGETTAG targettag)
static

Definition at line 1975 of file predicate.c.

1976{
1977 LOCALPREDICATELOCK *lock;
1978
1979 /* check local hash table */
1981 targettag,
1982 HASH_FIND, NULL);
1983
1984 if (!lock)
1985 return false;
1986
1987 /*
1988 * Found entry in the table, but still need to check whether it's actually
1989 * held -- it could just be a parent of some held lock.
1990 */
1991 return lock->held;
1992}

References fb(), HASH_FIND, hash_search(), LOCALPREDICATELOCK::held, and LocalPredicateLockHash.

Referenced by CoarserLockCovers(), PredicateLockAcquire(), and PredicateLockTID().

◆ PredicateLockingNeededForRelation()

static bool PredicateLockingNeededForRelation ( Relation  relation)
inlinestatic

◆ PredicateLockPage()

◆ PredicateLockPageCombine()

void PredicateLockPageCombine ( Relation  relation,
BlockNumber  oldblkno,
BlockNumber  newblkno 
)

Definition at line 3159 of file predicate.c.

3161{
3162 /*
3163 * Page combines differ from page splits in that we ought to be able to
3164 * remove the locks on the old page after transferring them to the new
3165 * page, instead of duplicating them. However, because we can't edit other
3166 * backends' local lock tables, removing the old lock would leave them
3167 * with an entry in their LocalPredicateLockHash for a lock they're not
3168 * holding, which isn't acceptable. So we wind up having to do the same
3169 * work as a page split, acquiring a lock on the new page and keeping the
3170 * old page locked too. That can lead to some false positives, but should
3171 * be rare in practice.
3172 */
3174}

References fb(), and PredicateLockPageSplit().

Referenced by _bt_mark_page_halfdead(), and ginDeletePostingPage().

◆ PredicateLockPageSplit()

void PredicateLockPageSplit ( Relation  relation,
BlockNumber  oldblkno,
BlockNumber  newblkno 
)

Definition at line 3074 of file predicate.c.

3076{
3079 bool success;
3080
3081 /*
3082 * Bail out quickly if there are no serializable transactions running.
3083 *
3084 * It's safe to do this check without taking any additional locks. Even if
3085 * a serializable transaction starts concurrently, we know it can't take
3086 * any SIREAD locks on the page being split because the caller is holding
3087 * the associated buffer page lock. Memory reordering isn't an issue; the
3088 * memory barrier in the LWLock acquisition guarantees that this read
3089 * occurs while the buffer page lock is held.
3090 */
3092 return;
3093
3094 if (!PredicateLockingNeededForRelation(relation))
3095 return;
3096
3100
3102 relation->rd_locator.dbOid,
3103 relation->rd_id,
3104 oldblkno);
3106 relation->rd_locator.dbOid,
3107 relation->rd_id,
3108 newblkno);
3109
3111
3112 /*
3113 * Try copying the locks over to the new page's tag, creating it if
3114 * necessary.
3115 */
3118 false);
3119
3120 if (!success)
3121 {
3122 /*
3123 * No more predicate lock entries are available. Failure isn't an
3124 * option here, so promote the page lock to a relation lock.
3125 */
3126
3127 /* Get the parent relation lock's lock tag */
3129 &newtargettag);
3130 Assert(success);
3131
3132 /*
3133 * Move the locks to the parent. This shouldn't fail.
3134 *
3135 * Note that here we are removing locks held by other backends,
3136 * leading to a possible inconsistency in their local lock hash table.
3137 * This is OK because we're replacing it with a lock that covers the
3138 * old one.
3139 */
3142 true);
3143 Assert(success);
3144 }
3145
3147}

References Assert, BlockNumberIsValid(), RelFileLocator::dbOid, fb(), GetParentPredicateLockTag(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), PredicateLockingNeededForRelation(), PredXact, RelationData::rd_id, RelationData::rd_locator, SET_PREDICATELOCKTARGETTAG_PAGE, success, PredXactListData::SxactGlobalXmin, TransactionIdIsValid, and TransferPredicateLocksToNewTarget().

Referenced by _bt_insertonpg(), _hash_splitbucket(), createPostingTree(), ginPlaceToPage(), gistplacetopage(), and PredicateLockPageCombine().

◆ PredicateLockRelation()

void PredicateLockRelation ( Relation  relation,
Snapshot  snapshot 
)

◆ PredicateLockShmemAttach()

static void PredicateLockShmemAttach ( void arg)
static

Definition at line 1328 of file predicate.c.

1329{
1330 /* This never changes, so let's keep a local copy. */
1332
1333 /* Pre-calculate the hash and partition lock of the scratch entry */
1336}

References OldCommittedSxact, PredXactListData::OldCommittedSxact, PredicateLockHashPartitionLock, PredicateLockTargetTagHashCode, PredXact, ScratchPartitionLock, ScratchTargetTag, and ScratchTargetTagHash.

◆ PredicateLockShmemInit()

static void PredicateLockShmemInit ( void arg)
static

Definition at line 1244 of file predicate.c.

1245{
1246 int max_rw_conflicts;
1247 bool found;
1248
1249 /*
1250 * Reserve a dummy entry in the hash table; we use it to make sure there's
1251 * always one entry available when we need to split or combine a page,
1252 * because running out of space there could mean aborting a
1253 * non-serializable transaction.
1254 */
1256 HASH_ENTER, &found);
1257 Assert(!found);
1258
1269 /* Add all elements to available list, clean. */
1270 for (int i = 0; i < max_serializable_xacts; i++)
1271 {
1275 }
1292
1293 /* Initialize the rw-conflict pool */
1297
1299
1300 /* Add all elements to available list, clean. */
1301 for (int i = 0; i < max_rw_conflicts; i++)
1302 {
1305 }
1306
1307 /* Initialize the list of finished serializable transactions */
1309
1310 /* Initialize SerialControl to reflect empty SLRU. */
1312 serialControl->headPage = -1;
1316
1318
1319 /* This never changes, so let's keep a local copy. */
1321
1322 /* Pre-calculate the hash and partition lock of the scratch entry */
1325}

References PredXactListData::activeList, Assert, PredXactListData::availableList, RWConflictPoolHeaderData::availableList, PredXactListData::CanPartialClearThrough, SERIALIZABLEXACT::commitSeqNo, CreatePredXact(), dlist_init(), dlist_node_init(), dlist_push_tail(), PredXactListData::element, RWConflictPoolHeaderData::element, fb(), SERIALIZABLEXACT::finishedBefore, SERIALIZABLEXACT::finishedLink, FinishedSerializableTransactions, FirstNormalSerCommitSeqNo, SERIALIZABLEXACT::flags, HASH_ENTER, hash_search(), PredXactListData::HavePartialClearedThrough, SerialControlData::headPage, SerialControlData::headXid, i, SERIALIZABLEXACT::inConflicts, INVALID_PROC_NUMBER, InvalidTransactionId, SERIALIZABLEXACT::lastCommitBeforeSnapshot, PredXactListData::LastSxactCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockInitialize(), LWLockRelease(), max_serializable_xacts, OldCommittedSxact, PredXactListData::OldCommittedSxact, SERIALIZABLEXACT::outConflicts, RWConflictData::outLink, SERIALIZABLEXACT::perXactPredicateListLock, SERIALIZABLEXACT::pgprocno, SERIALIZABLEXACT::pid, SERIALIZABLEXACT::possibleUnsafeConflicts, PredicateLockHashPartitionLock, SERIALIZABLEXACT::predicateLocks, PredicateLockTargetHash, PredicateLockTargetTagHashCode, PredXact, PredXactListDataSize, SERIALIZABLEXACT::prepareSeqNo, RWConflictPool, RWConflictPoolHeaderDataSize, ScratchPartitionLock, ScratchTargetTag, ScratchTargetTagHash, SERIALIZABLEXACT::SeqNo, SERIAL_ENTRIESPERPAGE, serialControl, SerialSlruCtl, SetInvalidVirtualTransactionId, SlruPagePrecedesUnitTests, SXACT_FLAG_COMMITTED, PredXactListData::SxactGlobalXmin, PredXactListData::SxactGlobalXminCount, SerialControlData::tailXid, SERIALIZABLEXACT::topXid, SERIALIZABLEXACT::vxid, PredXactListData::WritableSxactCount, SERIALIZABLEXACT::xactLink, and SERIALIZABLEXACT::xmin.

◆ PredicateLockShmemRequest()

static void PredicateLockShmemRequest ( void arg)
static

Definition at line 1119 of file predicate.c.

1120{
1124
1125 /*
1126 * Register hash table for PREDICATELOCKTARGET structs. This stores
1127 * per-predicate-lock-target information.
1128 */
1130
1131 ShmemRequestHash(.name = "PREDICATELOCKTARGET hash",
1134 .hash_info.keysize = sizeof(PREDICATELOCKTARGETTAG),
1135 .hash_info.entrysize = sizeof(PREDICATELOCKTARGET),
1136 .hash_info.num_partitions = NUM_PREDICATELOCK_PARTITIONS,
1138 );
1139
1140 /*
1141 * Allocate hash table for PREDICATELOCK structs. This stores per
1142 * xact-lock-of-a-target information.
1143 *
1144 * Assume an average of 2 xacts per target.
1145 */
1147
1148 ShmemRequestHash(.name = "PREDICATELOCK hash",
1149 .nelems = max_predicate_locks,
1150 .ptr = &PredicateLockHash,
1151 .hash_info.keysize = sizeof(PREDICATELOCKTAG),
1152 .hash_info.entrysize = sizeof(PREDICATELOCK),
1153 .hash_info.hash = predicatelock_hash,
1154 .hash_info.num_partitions = NUM_PREDICATELOCK_PARTITIONS,
1156 );
1157
1158 /*
1159 * Compute size for serializable transaction hashtable. Note these
1160 * calculations must agree with PredicateLockShmemSize!
1161 *
1162 * Assume an average of 10 predicate locking transactions per backend.
1163 * This allows aggressive cleanup while detail is present before data must
1164 * be summarized for storage in SLRU and the "dummy" transaction.
1165 */
1167
1168 /*
1169 * Register a list to hold information on transactions participating in
1170 * predicate locking.
1171 */
1172 ShmemRequestStruct(.name = "PredXactList",
1175 sizeof(SERIALIZABLEXACT)))),
1176 .ptr = (void **) &PredXact,
1177 );
1178
1179 /*
1180 * Register hash table for SERIALIZABLEXID structs. This stores per-xid
1181 * information for serializable transactions which have accessed data.
1182 */
1183 ShmemRequestHash(.name = "SERIALIZABLEXID hash",
1184 .nelems = max_serializable_xacts,
1185 .ptr = &SerializableXidHash,
1186 .hash_info.keysize = sizeof(SERIALIZABLEXIDTAG),
1187 .hash_info.entrysize = sizeof(SERIALIZABLEXID),
1188 .hash_flags = HASH_ELEM | HASH_BLOBS | HASH_FIXED_SIZE,
1189 );
1190
1191 /*
1192 * Allocate space for tracking rw-conflicts in lists attached to the
1193 * transactions.
1194 *
1195 * Assume an average of 5 conflicts per transaction. Calculations suggest
1196 * that this will prevent resource exhaustion in even the most pessimal
1197 * loads up to max_connections = 200 with all 200 connections pounding the
1198 * database with serializable transactions. Beyond that, there may be
1199 * occasional transactions canceled when trying to flag conflicts. That's
1200 * probably OK.
1201 */
1203
1204 ShmemRequestStruct(.name = "RWConflictPool",
1207 .ptr = (void **) &RWConflictPool,
1208 );
1209
1210 ShmemRequestStruct(.name = "FinishedSerializableTransactions",
1211 .size = sizeof(dlist_head),
1212 .ptr = (void **) &FinishedSerializableTransactions,
1213 );
1214
1215 /*
1216 * Initialize the SLRU storage for old committed serializable
1217 * transactions.
1218 */
1220 .name = "serializable",
1221 .Dir = "pg_serial",
1222 .long_segment_names = false,
1223
1224 .nslots = serializable_buffers,
1225
1226 .sync_handler = SYNC_HANDLER_NONE,
1227 .PagePrecedes = SerialPagePrecedesLogically,
1228 .errdetail_for_io_error = serial_errdetail_for_io_error,
1229
1230 .buffer_tranche_id = LWTRANCHE_SERIAL_BUFFER,
1231 .bank_tranche_id = LWTRANCHE_SERIAL_SLRU,
1232 );
1233#ifdef USE_ASSERT_CHECKING
1235#endif
1236
1237 ShmemRequestStruct(.name = "SerialControlData",
1238 .size = sizeof(SerialControlData),
1239 .ptr = (void **) &serialControl,
1240 );
1241}

References add_size(), fb(), FinishedSerializableTransactions, HASH_BLOBS, HASH_ELEM, HASH_FIXED_SIZE, HASH_FUNCTION, HASH_PARTITION, HTAB::keysize, max_prepared_xacts, max_serializable_xacts, MaxBackends, mul_size(), name, NPREDICATELOCKTARGETENTS, NUM_PREDICATELOCK_PARTITIONS, predicatelock_hash(), PredicateLockHash, PredicateLockTargetHash, PredXact, PredXactListDataSize, RWConflictDataSize, RWConflictPool, RWConflictPoolHeaderDataSize, serial_errdetail_for_io_error(), serialControl, serializable_buffers, SerializableXidHash, SerialPagePrecedesLogically(), SerialSlruDesc, ShmemRequestHash, ShmemRequestStruct, SimpleLruRequest, and SYNC_HANDLER_NONE.

◆ PredicateLockTID()

void PredicateLockTID ( Relation  relation,
const ItemPointerData tid,
Snapshot  snapshot,
TransactionId  tuple_xid 
)

Definition at line 2551 of file predicate.c.

2553{
2555
2556 if (!SerializationNeededForRead(relation, snapshot))
2557 return;
2558
2559 /*
2560 * Return if this xact wrote it.
2561 */
2562 if (relation->rd_index == NULL)
2563 {
2564 /* If we wrote it; we already have a write lock. */
2566 return;
2567 }
2568
2569 /*
2570 * Do quick-but-not-definitive test for a relation lock first. This will
2571 * never cause a return when the relation is *not* locked, but will
2572 * occasionally let the check continue when there really *is* a relation
2573 * level lock.
2574 */
2576 relation->rd_locator.dbOid,
2577 relation->rd_id);
2578 if (PredicateLockExists(&tag))
2579 return;
2580
2582 relation->rd_locator.dbOid,
2583 relation->rd_id,
2587}

References RelFileLocator::dbOid, fb(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), PredicateLockAcquire(), PredicateLockExists(), RelationData::rd_id, RelationData::rd_index, RelationData::rd_locator, SerializationNeededForRead(), SET_PREDICATELOCKTARGETTAG_RELATION, SET_PREDICATELOCKTARGETTAG_TUPLE, and TransactionIdIsCurrentTransactionId().

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), and heap_hot_search_buffer().

◆ PredicateLockTwoPhaseFinish()

void PredicateLockTwoPhaseFinish ( FullTransactionId  fxid,
bool  isCommit 
)

Definition at line 4812 of file predicate.c.

4813{
4816
4818
4820 sxid = (SERIALIZABLEXID *)
4823
4824 /* xid will not be found if it wasn't a serializable transaction */
4825 if (sxid == NULL)
4826 return;
4827
4828 /* Release its locks */
4829 MySerializableXact = sxid->myXact;
4830 MyXactDidWrite = true; /* conservatively assume that we wrote
4831 * something */
4833}

References fb(), HASH_FIND, hash_search(), LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, MyXactDidWrite, ReleasePredicateLocks(), SerializableXidHash, SERIALIZABLEXIDTAG::xid, and XidFromFullTransactionId.

Referenced by FinishPreparedTransaction().

◆ RegisterPredicateLockingXid()

void RegisterPredicateLockingXid ( TransactionId  xid)

Definition at line 1889 of file predicate.c.

1890{
1893 bool found;
1894
1895 /*
1896 * If we're not tracking predicate lock data for this transaction, we
1897 * should ignore the request and return quickly.
1898 */
1900 return;
1901
1902 /* We should have a valid XID and be at the top level. */
1904
1906
1907 /* This should only be done once per transaction. */
1909
1911
1912 sxidtag.xid = xid;
1914 &sxidtag,
1915 HASH_ENTER, &found);
1916 Assert(!found);
1917
1918 /* Initialize the structure. */
1919 sxid->myXact = MySerializableXact;
1921}

References Assert, fb(), HASH_ENTER, hash_search(), InvalidSerializableXact, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, SerializableXidHash, SERIALIZABLEXACT::topXid, and TransactionIdIsValid.

Referenced by AssignTransactionId().

◆ ReleaseOneSerializableXact()

static void ReleaseOneSerializableXact ( SERIALIZABLEXACT sxact,
bool  partial,
bool  summarize 
)
static

Definition at line 3765 of file predicate.c.

3767{
3769 dlist_mutable_iter iter;
3770
3771 Assert(sxact != NULL);
3773 Assert(partial || !SxactIsOnFinishedList(sxact));
3775
3776 /*
3777 * First release all the predicate locks held by this xact (or transfer
3778 * them to OldCommittedSxact if summarize is true)
3779 */
3781 if (IsInParallelMode())
3782 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
3783 dlist_foreach_modify(iter, &sxact->predicateLocks)
3784 {
3786 dlist_container(PREDICATELOCK, xactLink, iter.cur);
3787 PREDICATELOCKTAG tag;
3788 PREDICATELOCKTARGET *target;
3792
3793 tag = predlock->tag;
3794 target = tag.myTarget;
3795 targettag = target->tag;
3798
3800
3801 dlist_delete(&predlock->targetLink);
3802
3806 HASH_REMOVE, NULL);
3807 if (summarize)
3808 {
3809 bool found;
3810
3811 /* Fold into dummy transaction list. */
3816 HASH_ENTER_NULL, &found);
3817 if (!predlock)
3818 ereport(ERROR,
3820 errmsg("out of shared memory"),
3821 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
3822 if (found)
3823 {
3824 Assert(predlock->commitSeqNo != 0);
3825 Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3826 if (predlock->commitSeqNo < sxact->commitSeqNo)
3827 predlock->commitSeqNo = sxact->commitSeqNo;
3828 }
3829 else
3830 {
3832 &predlock->targetLink);
3834 &predlock->xactLink);
3835 predlock->commitSeqNo = sxact->commitSeqNo;
3836 }
3837 }
3838 else
3840
3842 }
3843
3844 /*
3845 * Rather than retail removal, just re-init the head after we've run
3846 * through the list.
3847 */
3848 dlist_init(&sxact->predicateLocks);
3849
3850 if (IsInParallelMode())
3851 LWLockRelease(&sxact->perXactPredicateListLock);
3853
3854 sxidtag.xid = sxact->topXid;
3856
3857 /* Release all outConflicts (unless 'partial' is true) */
3858 if (!partial)
3859 {
3860 dlist_foreach_modify(iter, &sxact->outConflicts)
3861 {
3863 dlist_container(RWConflictData, outLink, iter.cur);
3864
3865 if (summarize)
3866 conflict->sxactIn->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
3868 }
3869 }
3870
3871 /* Release all inConflicts. */
3872 dlist_foreach_modify(iter, &sxact->inConflicts)
3873 {
3875 dlist_container(RWConflictData, inLink, iter.cur);
3876
3877 if (summarize)
3878 conflict->sxactOut->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
3880 }
3881
3882 /* Finally, get rid of the xid and the record of the transaction itself. */
3883 if (!partial)
3884 {
3885 if (sxidtag.xid != InvalidTransactionId)
3888 }
3889
3891}

References Assert, dlist_mutable_iter::cur, dlist_container, dlist_delete(), dlist_foreach_modify, dlist_init(), dlist_push_tail(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), HASH_ENTER_NULL, HASH_REMOVE, hash_search(), hash_search_with_hash_value(), InvalidSerCommitSeqNo, InvalidTransactionId, IsInParallelMode(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockHeldByMe(), LWLockRelease(), PREDICATELOCKTAG::myTarget, PREDICATELOCKTAG::myXact, OldCommittedSxact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, SERIALIZABLEXACT::predicateLocks, PREDICATELOCKTARGET::predicateLocks, PredicateLockTargetTagHashCode, ReleasePredXact(), ReleaseRWConflict(), RemoveTargetIfNoLongerUsed(), SerializableXidHash, SXACT_FLAG_SUMMARY_CONFLICT_IN, SXACT_FLAG_SUMMARY_CONFLICT_OUT, SxactIsCommitted, SxactIsOnFinishedList, SxactIsRolledBack, and PREDICATELOCKTARGET::tag.

Referenced by ClearOldPredicateLocks(), ReleasePredicateLocks(), and SummarizeOldestCommittedSxact().

◆ ReleasePredicateLocks()

void ReleasePredicateLocks ( bool  isCommit,
bool  isReadOnlySafe 
)

Definition at line 3242 of file predicate.c.

3243{
3244 bool partiallyReleasing = false;
3245 bool needToClear;
3247 dlist_mutable_iter iter;
3248
3249 /*
3250 * We can't trust XactReadOnly here, because a transaction which started
3251 * as READ WRITE can show as READ ONLY later, e.g., within
3252 * subtransactions. We want to flag a transaction as READ ONLY if it
3253 * commits without writing so that de facto READ ONLY transactions get the
3254 * benefit of some RO optimizations, so we will use this local variable to
3255 * get some cleanup logic right which is based on whether the transaction
3256 * was declared READ ONLY at the top level.
3257 */
3259
3260 /* We can't be both committing and releasing early due to RO_SAFE. */
3262
3263 /* Are we at the end of a transaction, that is, a commit or abort? */
3264 if (!isReadOnlySafe)
3265 {
3266 /*
3267 * Parallel workers mustn't release predicate locks at the end of
3268 * their transaction. The leader will do that at the end of its
3269 * transaction.
3270 */
3271 if (IsParallelWorker())
3272 {
3274 return;
3275 }
3276
3277 /*
3278 * By the time the leader in a parallel query reaches end of
3279 * transaction, it has waited for all workers to exit.
3280 */
3282
3283 /*
3284 * If the leader in a parallel query earlier stashed a partially
3285 * released SERIALIZABLEXACT for final clean-up at end of transaction
3286 * (because workers might still have been accessing it), then it's
3287 * time to restore it.
3288 */
3290 {
3295 }
3296 }
3297
3299 {
3301 return;
3302 }
3303
3305
3306 /*
3307 * If the transaction is committing, but it has been partially released
3308 * already, then treat this as a roll back. It was marked as rolled back.
3309 */
3311 isCommit = false;
3312
3313 /*
3314 * If we're called in the middle of a transaction because we discovered
3315 * that the SXACT_FLAG_RO_SAFE flag was set, then we'll partially release
3316 * it (that is, release the predicate locks and conflicts, but not the
3317 * SERIALIZABLEXACT itself) if we're the first backend to have noticed.
3318 */
3320 {
3321 /*
3322 * The leader needs to stash a pointer to it, so that it can
3323 * completely release it at end-of-transaction.
3324 */
3325 if (!IsParallelWorker())
3327
3328 /*
3329 * The first backend to reach this condition will partially release
3330 * the SERIALIZABLEXACT. All others will just clear their
3331 * backend-local state so that they stop doing SSI checks for the rest
3332 * of the transaction.
3333 */
3335 {
3338 return;
3339 }
3340 else
3341 {
3343 partiallyReleasing = true;
3344 /* ... and proceed to perform the partial release below. */
3345 }
3346 }
3352
3353 /* may not be serializable during COMMIT/ROLLBACK PREPARED */
3355
3356 /* We'd better not already be on the cleanup list. */
3358
3360
3361 /*
3362 * We don't hold XidGenLock lock here, assuming that TransactionId is
3363 * atomic!
3364 *
3365 * If this value is changing, we don't care that much whether we get the
3366 * old or new value -- it is just used to determine how far
3367 * SxactGlobalXmin must advance before this transaction can be fully
3368 * cleaned up. The worst that could happen is we wait for one more
3369 * transaction to complete before freeing some RAM; correctness of visible
3370 * behavior is not affected.
3371 */
3373
3374 /*
3375 * If it's not a commit it's either a rollback or a read-only transaction
3376 * flagged SXACT_FLAG_RO_SAFE, and we can clear our locks immediately.
3377 */
3378 if (isCommit)
3379 {
3382 /* Recognize implicit read-only transaction (commit without write). */
3383 if (!MyXactDidWrite)
3385 }
3386 else
3387 {
3388 /*
3389 * The DOOMED flag indicates that we intend to roll back this
3390 * transaction and so it should not cause serialization failures for
3391 * other transactions that conflict with it. Note that this flag might
3392 * already be set, if another backend marked this transaction for
3393 * abort.
3394 *
3395 * The ROLLED_BACK flag further indicates that ReleasePredicateLocks
3396 * has been called, and so the SerializableXact is eligible for
3397 * cleanup. This means it should not be considered when calculating
3398 * SxactGlobalXmin.
3399 */
3402
3403 /*
3404 * If the transaction was previously prepared, but is now failing due
3405 * to a ROLLBACK PREPARED or (hopefully very rare) error after the
3406 * prepare, clear the prepared flag. This simplifies conflict
3407 * checking.
3408 */
3410 }
3411
3413 {
3415 if (--(PredXact->WritableSxactCount) == 0)
3416 {
3417 /*
3418 * Release predicate locks and rw-conflicts in for all committed
3419 * transactions. There are no longer any transactions which might
3420 * conflict with the locks and no chance for new transactions to
3421 * overlap. Similarly, existing conflicts in can't cause pivots,
3422 * and any conflicts in which could have completed a dangerous
3423 * structure would already have caused a rollback, so any
3424 * remaining ones must be benign.
3425 */
3427 }
3428 }
3429 else
3430 {
3431 /*
3432 * Read-only transactions: clear the list of transactions that might
3433 * make us unsafe. Note that we use 'inLink' for the iteration as
3434 * opposed to 'outLink' for the r/w xacts.
3435 */
3437 {
3439 dlist_container(RWConflictData, inLink, iter.cur);
3440
3443
3445 }
3446 }
3447
3448 /* Check for conflict out to old committed transactions. */
3449 if (isCommit
3452 {
3453 /*
3454 * we don't know which old committed transaction we conflicted with,
3455 * so be conservative and use FirstNormalSerCommitSeqNo here
3456 */
3460 }
3461
3462 /*
3463 * Release all outConflicts to committed transactions. If we're rolling
3464 * back clear them all. Set SXACT_FLAG_CONFLICT_OUT if any point to
3465 * previously committed transactions.
3466 */
3468 {
3470 dlist_container(RWConflictData, outLink, iter.cur);
3471
3472 if (isCommit
3474 && SxactIsCommitted(conflict->sxactIn))
3475 {
3477 || conflict->sxactIn->prepareSeqNo < MySerializableXact->SeqNo.earliestOutConflictCommit)
3480 }
3481
3482 if (!isCommit
3483 || SxactIsCommitted(conflict->sxactIn)
3484 || (conflict->sxactIn->SeqNo.lastCommitBeforeSnapshot >= PredXact->LastSxactCommitSeqNo))
3486 }
3487
3488 /*
3489 * Release all inConflicts from committed and read-only transactions. If
3490 * we're rolling back, clear them all.
3491 */
3493 {
3495 dlist_container(RWConflictData, inLink, iter.cur);
3496
3497 if (!isCommit
3498 || SxactIsCommitted(conflict->sxactOut)
3499 || SxactIsReadOnly(conflict->sxactOut))
3501 }
3502
3504 {
3505 /*
3506 * Remove ourselves from the list of possible conflicts for concurrent
3507 * READ ONLY transactions, flagging them as unsafe if we have a
3508 * conflict out. If any are waiting DEFERRABLE transactions, wake them
3509 * up if they are known safe or known unsafe.
3510 */
3512 {
3514 dlist_container(RWConflictData, outLink, iter.cur);
3515
3516 roXact = possibleUnsafeConflict->sxactIn;
3519
3520 /* Mark conflicted if necessary. */
3521 if (isCommit
3525 <= roXact->SeqNo.lastCommitBeforeSnapshot))
3526 {
3527 /*
3528 * This releases possibleUnsafeConflict (as well as all other
3529 * possible conflicts for roXact)
3530 */
3532 }
3533 else
3534 {
3536
3537 /*
3538 * If we were the last possible conflict, flag it safe. The
3539 * transaction can now safely release its predicate locks (but
3540 * that transaction's backend has to do that itself).
3541 */
3542 if (dlist_is_empty(&roXact->possibleUnsafeConflicts))
3543 roXact->flags |= SXACT_FLAG_RO_SAFE;
3544 }
3545
3546 /*
3547 * Wake up the process for a waiting DEFERRABLE transaction if we
3548 * now know it's either safe or conflicted.
3549 */
3552 ProcSendSignal(roXact->pgprocno);
3553 }
3554 }
3555
3556 /*
3557 * Check whether it's time to clean up old transactions. This can only be
3558 * done when the last serializable transaction with the oldest xmin among
3559 * serializable transactions completes. We then find the "new oldest"
3560 * xmin and purge any transactions which finished before this transaction
3561 * was launched.
3562 *
3563 * For parallel queries in read-only transactions, it might run twice. We
3564 * only release the reference on the first call.
3565 */
3566 needToClear = false;
3567 if ((partiallyReleasing ||
3571 {
3573 if (--(PredXact->SxactGlobalXminCount) == 0)
3574 {
3576 needToClear = true;
3577 }
3578 }
3579
3581
3583
3584 /* Add this to the list of transactions to check for later cleanup. */
3585 if (isCommit)
3588
3589 /*
3590 * If we're releasing a RO_SAFE transaction in parallel mode, we'll only
3591 * partially release it. That's necessary because other backends may have
3592 * a reference to it. The leader will release the SERIALIZABLEXACT itself
3593 * at the end of the transaction after workers have stopped running.
3594 */
3595 if (!isCommit)
3598 false);
3599
3601
3602 if (needToClear)
3604
3606}

References Assert, PredXactListData::CanPartialClearThrough, ClearOldPredicateLocks(), SERIALIZABLEXACT::commitSeqNo, dlist_mutable_iter::cur, dlist_container, dlist_foreach_modify, dlist_is_empty(), dlist_push_tail(), SERIALIZABLEXACT::earliestOutConflictCommit, fb(), SERIALIZABLEXACT::finishedBefore, SERIALIZABLEXACT::finishedLink, FinishedSerializableTransactions, FirstNormalSerCommitSeqNo, SERIALIZABLEXACT::flags, FlagSxactUnsafe(), SERIALIZABLEXACT::inConflicts, InvalidSerializableXact, IsInParallelMode(), IsolationIsSerializable, IsParallelWorker, PredXactListData::LastSxactCommitSeqNo, LocalPredicateLockHash, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, MyXactDidWrite, TransamVariablesData::nextXid, SERIALIZABLEXACT::outConflicts, ParallelContextActive(), SERIALIZABLEXACT::pid, SERIALIZABLEXACT::possibleUnsafeConflicts, PredXact, ProcSendSignal(), ReleaseOneSerializableXact(), ReleasePredicateLocksLocal(), ReleaseRWConflict(), SavedSerializableXact, SERIALIZABLEXACT::SeqNo, SetNewSxactGlobalXmin(), SXACT_FLAG_COMMITTED, SXACT_FLAG_CONFLICT_OUT, SXACT_FLAG_DOOMED, SXACT_FLAG_PARTIALLY_RELEASED, SXACT_FLAG_READ_ONLY, SXACT_FLAG_RO_SAFE, SXACT_FLAG_ROLLED_BACK, PredXactListData::SxactGlobalXmin, PredXactListData::SxactGlobalXminCount, SxactHasConflictOut, SxactHasSummaryConflictOut, SxactIsCommitted, SxactIsDeferrableWaiting, SxactIsDoomed, SxactIsOnFinishedList, SxactIsPartiallyReleased, SxactIsPrepared, SxactIsReadOnly, SxactIsRolledBack, SxactIsROSafe, SxactIsROUnsafe, TransactionIdEquals, TransamVariables, PredXactListData::WritableSxactCount, XidFromFullTransactionId, and SERIALIZABLEXACT::xmin.

Referenced by GetSafeSnapshot(), PredicateLockTwoPhaseFinish(), ResourceOwnerReleaseInternal(), and SerializationNeededForRead().

◆ ReleasePredicateLocksLocal()

static void ReleasePredicateLocksLocal ( void  )
static

Definition at line 3609 of file predicate.c.

3610{
3612 MyXactDidWrite = false;
3613
3614 /* Delete per-transaction lock table */
3616 {
3619 }
3620}

References fb(), hash_destroy(), InvalidSerializableXact, LocalPredicateLockHash, MySerializableXact, and MyXactDidWrite.

Referenced by ReleasePredicateLocks().

◆ ReleasePredXact()

◆ ReleaseRWConflict()

static void ReleaseRWConflict ( RWConflict  conflict)
static

◆ RemoveScratchTarget()

◆ RemoveTargetIfNoLongerUsed()

◆ RestoreScratchTarget()

◆ RWConflictExists()

static bool RWConflictExists ( const SERIALIZABLEXACT reader,
const SERIALIZABLEXACT writer 
)
static

Definition at line 624 of file predicate.c.

625{
626 dlist_iter iter;
627
628 Assert(reader != writer);
629
630 /* Check the ends of the purported conflict first. */
631 if (SxactIsDoomed(reader)
633 || dlist_is_empty(&reader->outConflicts)
634 || dlist_is_empty(&writer->inConflicts))
635 return false;
636
637 /*
638 * A conflict is possible; walk the list to find out.
639 *
640 * The unconstify is needed as we have no const version of
641 * dlist_foreach().
642 */
643 dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->outConflicts)
644 {
646 dlist_container(RWConflictData, outLink, iter.cur);
647
648 if (conflict->sxactIn == writer)
649 return true;
650 }
651
652 /* No conflict found. */
653 return false;
654}

References Assert, dlist_iter::cur, dlist_container, dlist_foreach, dlist_is_empty(), fb(), SERIALIZABLEXACT::outConflicts, SxactIsDoomed, and unconstify.

Referenced by CheckForSerializableConflictOut(), CheckTableForSerializableConflictIn(), CheckTargetForConflictsIn(), and SetRWConflict().

◆ serial_errdetail_for_io_error()

static int serial_errdetail_for_io_error ( const void opaque_data)
static

Definition at line 760 of file predicate.c.

761{
762 TransactionId xid = *(const TransactionId *) opaque_data;
763
764 return errdetail("Could not access serializable CSN of transaction %u.", xid);
765}

References errdetail(), and fb().

Referenced by PredicateLockShmemRequest().

◆ SerialAdd()

static void SerialAdd ( TransactionId  xid,
SerCommitSeqNo  minConflictCommitSeqNo 
)
static

Definition at line 839 of file predicate.c.

840{
841 TransactionId tailXid;
843 int slotno;
845 bool isNewPage;
846 LWLock *lock;
847
849
850 targetPage = SerialPage(xid);
852
853 /*
854 * In this routine, we must hold both SerialControlLock and the SLRU bank
855 * lock simultaneously while making the SLRU data catch up with the new
856 * state that we determine.
857 */
859
860 /*
861 * If 'xid' is older than the global xmin (== tailXid), there's no need to
862 * store it, after all. This can happen if the oldest transaction holding
863 * back the global xmin just finished, making 'xid' uninteresting, but
864 * ClearOldPredicateLocks() has not yet run.
865 */
866 tailXid = serialControl->tailXid;
867 if (!TransactionIdIsValid(tailXid) || TransactionIdPrecedes(xid, tailXid))
868 {
870 return;
871 }
872
873 /*
874 * If the SLRU is currently unused, zero out the whole active region from
875 * tailXid to headXid before taking it into use. Otherwise zero out only
876 * any new pages that enter the tailXid-headXid range as we advance
877 * headXid.
878 */
879 if (serialControl->headPage < 0)
880 {
881 firstZeroPage = SerialPage(tailXid);
882 isNewPage = true;
883 }
884 else
885 {
888 targetPage);
889 }
890
893 serialControl->headXid = xid;
894 if (isNewPage)
896
897 if (isNewPage)
898 {
899 /* Initialize intervening pages; might involve trading locks */
900 for (;;)
901 {
906 break;
908 LWLockRelease(lock);
909 }
910 }
911 else
912 {
915 }
916
918 SerialSlruCtl->shared->page_dirty[slotno] = true;
919
920 LWLockRelease(lock);
922}

References Assert, fb(), SerialControlData::headPage, SerialControlData::headXid, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), serialControl, SerialNextPage, SerialPage, SerialPagePrecedesLogically(), SerialSlruCtl, SerialValue, SimpleLruGetBankLock(), SimpleLruReadPage(), SimpleLruZeroPage(), SerialControlData::tailXid, TransactionIdFollows(), TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by SummarizeOldestCommittedSxact().

◆ SerialGetMinConflictCommitSeqNo()

static SerCommitSeqNo SerialGetMinConflictCommitSeqNo ( TransactionId  xid)
static

Definition at line 930 of file predicate.c.

931{
932 TransactionId headXid;
933 TransactionId tailXid;
935 int slotno;
936
938
940 headXid = serialControl->headXid;
941 tailXid = serialControl->tailXid;
943
944 if (!TransactionIdIsValid(headXid))
945 return 0;
946
948
949 if (TransactionIdPrecedes(xid, tailXid)
950 || TransactionIdFollows(xid, headXid))
951 return 0;
952
953 /*
954 * The following function must be called without holding SLRU bank lock,
955 * but will return with that lock held, which must then be released.
956 */
958 SerialPage(xid), &xid);
959 val = SerialValue(slotno, xid);
961 return val;
962}

References Assert, fb(), SerialControlData::headXid, LW_SHARED, LWLockAcquire(), LWLockRelease(), serialControl, SerialPage, SerialSlruCtl, SerialValue, SimpleLruGetBankLock(), SimpleLruReadPage_ReadOnly(), SerialControlData::tailXid, TransactionIdFollows(), TransactionIdIsValid, TransactionIdPrecedes(), and val.

Referenced by CheckForSerializableConflictOut().

◆ SerializationNeededForRead()

static bool SerializationNeededForRead ( Relation  relation,
Snapshot  snapshot 
)
inlinestatic

Definition at line 530 of file predicate.c.

531{
532 /* Nothing to do if this is not a serializable transaction */
534 return false;
535
536 /*
537 * Don't acquire locks or conflict when scanning with a special snapshot.
538 * This excludes things like CLUSTER and REINDEX. They use the wholesale
539 * functions TransferPredicateLocksToHeapRelation() and
540 * CheckTableForSerializableConflictIn() to participate in serialization,
541 * but the scans involved don't need serialization.
542 */
543 if (!IsMVCCSnapshot(snapshot))
544 return false;
545
546 /*
547 * Check if we have just become "RO-safe". If we have, immediately release
548 * all locks as they're not needed anymore. This also resets
549 * MySerializableXact, so that subsequent calls to this function can exit
550 * quickly.
551 *
552 * A transaction is flagged as RO_SAFE if all concurrent R/W transactions
553 * commit without having conflicts out to an earlier snapshot, thus
554 * ensuring that no conflicts are possible for this transaction.
555 */
557 {
558 ReleasePredicateLocks(false, true);
559 return false;
560 }
561
562 /* Check if the relation doesn't participate in predicate locking */
564 return false;
565
566 return true; /* no excuse to skip predicate locking */
567}

References InvalidSerializableXact, IsMVCCSnapshot, MySerializableXact, PredicateLockingNeededForRelation(), ReleasePredicateLocks(), and SxactIsROSafe.

Referenced by CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), PredicateLockPage(), PredicateLockRelation(), and PredicateLockTID().

◆ SerializationNeededForWrite()

static bool SerializationNeededForWrite ( Relation  relation)
inlinestatic

Definition at line 574 of file predicate.c.

575{
576 /* Nothing to do if this is not a serializable transaction */
578 return false;
579
580 /* Check if the relation doesn't participate in predicate locking */
582 return false;
583
584 return true; /* no excuse to skip predicate locking */
585}

References InvalidSerializableXact, MySerializableXact, and PredicateLockingNeededForRelation().

Referenced by CheckForSerializableConflictIn(), and CheckTableForSerializableConflictIn().

◆ SerialPagePrecedesLogically()

◆ SerialSetActiveSerXmin()

static void SerialSetActiveSerXmin ( TransactionId  xid)
static

Definition at line 971 of file predicate.c.

972{
974
975 /*
976 * When no sxacts are active, nothing overlaps, set the xid values to
977 * invalid to show that there are no valid entries. Don't clear headPage,
978 * though. A new xmin might still land on that page, and we don't want to
979 * repeatedly zero out the same page.
980 */
981 if (!TransactionIdIsValid(xid))
982 {
986 return;
987 }
988
989 /*
990 * When we're recovering prepared transactions, the global xmin might move
991 * backwards depending on the order they're recovered. Normally that's not
992 * OK, but during recovery no serializable transactions will commit, so
993 * the SLRU is empty and we can get away with it.
994 */
995 if (RecoveryInProgress())
996 {
1000 {
1001 serialControl->tailXid = xid;
1002 }
1004 return;
1005 }
1006
1009
1010 serialControl->tailXid = xid;
1011
1013}

References Assert, fb(), SerialControlData::headPage, SerialControlData::headXid, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), RecoveryInProgress(), serialControl, SerialControlData::tailXid, TransactionIdFollows(), TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by GetSerializableTransactionSnapshotInt(), predicatelock_twophase_recover(), and SetNewSxactGlobalXmin().

◆ SetNewSxactGlobalXmin()

◆ SetPossibleUnsafeConflict()

static void SetPossibleUnsafeConflict ( SERIALIZABLEXACT roXact,
SERIALIZABLEXACT activeXact 
)
static

Definition at line 680 of file predicate.c.

682{
684
688
692 errmsg("not enough elements in RWConflictPool to record a potential read/write conflict"),
693 errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
694
696 dlist_delete(&conflict->outLink);
697
698 conflict->sxactOut = activeXact;
699 conflict->sxactIn = roXact;
700 dlist_push_tail(&activeXact->possibleUnsafeConflicts, &conflict->outLink);
701 dlist_push_tail(&roXact->possibleUnsafeConflicts, &conflict->inLink);
702}

References Assert, RWConflictPoolHeaderData::availableList, dlist_delete(), dlist_head_element, dlist_is_empty(), dlist_push_tail(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), RWConflictPool, and SxactIsReadOnly.

Referenced by GetSerializableTransactionSnapshotInt().

◆ SetRWConflict()

static void SetRWConflict ( SERIALIZABLEXACT reader,
SERIALIZABLEXACT writer 
)
static

Definition at line 657 of file predicate.c.

658{
660
661 Assert(reader != writer);
662 Assert(!RWConflictExists(reader, writer));
663
667 errmsg("not enough elements in RWConflictPool to record a read/write conflict"),
668 errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
669
671 dlist_delete(&conflict->outLink);
672
673 conflict->sxactOut = reader;
674 conflict->sxactIn = writer;
675 dlist_push_tail(&reader->outConflicts, &conflict->outLink);
676 dlist_push_tail(&writer->inConflicts, &conflict->inLink);
677}

References Assert, RWConflictPoolHeaderData::availableList, dlist_delete(), dlist_head_element, dlist_is_empty(), dlist_push_tail(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), SERIALIZABLEXACT::outConflicts, RWConflictExists(), and RWConflictPool.

Referenced by FlagRWConflict().

◆ SetSerializableTransactionSnapshot()

void SetSerializableTransactionSnapshot ( Snapshot  snapshot,
VirtualTransactionId sourcevxid,
int  sourcepid 
)

Definition at line 1652 of file predicate.c.

1655{
1657
1658 /*
1659 * If this is called by parallel.c in a parallel worker, we don't want to
1660 * create a SERIALIZABLEXACT just yet because the leader's
1661 * SERIALIZABLEXACT will be installed with AttachSerializableXact(). We
1662 * also don't want to reject SERIALIZABLE READ ONLY DEFERRABLE in this
1663 * case, because the leader has already determined that the snapshot it
1664 * has passed us is safe. So there is nothing for us to do.
1665 */
1666 if (IsParallelWorker())
1667 return;
1668
1669 /*
1670 * We do not allow SERIALIZABLE READ ONLY DEFERRABLE transactions to
1671 * import snapshots, since there's no way to wait for a safe snapshot when
1672 * we're using the snap we're told to. (XXX instead of throwing an error,
1673 * we could just ignore the XactDeferrable flag?)
1674 */
1676 ereport(ERROR,
1678 errmsg("a snapshot-importing transaction must not be READ ONLY DEFERRABLE")));
1679
1681 sourcepid);
1682}

References Assert, ereport, errcode(), errmsg, ERROR, fb(), GetSerializableTransactionSnapshotInt(), IsolationIsSerializable, IsParallelWorker, XactDeferrable, and XactReadOnly.

Referenced by SetTransactionSnapshot().

◆ ShareSerializableXact()

SerializableXactHandle ShareSerializableXact ( void  )

Definition at line 4977 of file predicate.c.

4978{
4979 return MySerializableXact;
4980}

References MySerializableXact.

Referenced by InitializeParallelDSM().

◆ SummarizeOldestCommittedSxact()

static void SummarizeOldestCommittedSxact ( void  )
static

Definition at line 1433 of file predicate.c.

1434{
1436
1438
1439 /*
1440 * This function is only called if there are no sxact slots available.
1441 * Some of them must belong to old, already-finished transactions, so
1442 * there should be something in FinishedSerializableTransactions list that
1443 * we can summarize. However, there's a race condition: while we were not
1444 * holding any locks, a transaction might have ended and cleaned up all
1445 * the finished sxact entries already, freeing up their sxact slots. In
1446 * that case, we have nothing to do here. The caller will find one of the
1447 * slots released by the other backend when it retries.
1448 */
1450 {
1452 return;
1453 }
1454
1455 /*
1456 * Grab the first sxact off the finished list -- this will be the earliest
1457 * commit. Remove it from the list.
1458 */
1461 dlist_delete_thoroughly(&sxact->finishedLink);
1462
1463 /* Add to SLRU summary information. */
1466 ? sxact->SeqNo.earliestOutConflictCommit : InvalidSerCommitSeqNo);
1467
1468 /* Summarize and release the detail. */
1469 ReleaseOneSerializableXact(sxact, false, true);
1470
1472}

References dlist_delete_thoroughly(), dlist_head_element, dlist_is_empty(), fb(), FinishedSerializableTransactions, InvalidSerCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ReleaseOneSerializableXact(), SerialAdd(), SxactHasConflictOut, SxactIsReadOnly, and TransactionIdIsValid.

Referenced by GetSerializableTransactionSnapshotInt().

◆ TransferPredicateLocksToHeapRelation()

void TransferPredicateLocksToHeapRelation ( Relation  relation)

◆ TransferPredicateLocksToNewTarget()

static bool TransferPredicateLocksToNewTarget ( PREDICATELOCKTARGETTAG  oldtargettag,
PREDICATELOCKTARGETTAG  newtargettag,
bool  removeOld 
)
static

Definition at line 2660 of file predicate.c.

2663{
2669 bool found;
2670 bool outOfShmem = false;
2671
2673 LW_EXCLUSIVE));
2674
2679
2680 if (removeOld)
2681 {
2682 /*
2683 * Remove the dummy entry to give us scratch space, so we know we'll
2684 * be able to create the new lock target.
2685 */
2686 RemoveScratchTarget(false);
2687 }
2688
2689 /*
2690 * We must get the partition locks in ascending sequence to avoid
2691 * deadlocks. If old and new partitions are the same, we must request the
2692 * lock only once.
2693 */
2695 {
2699 }
2701 {
2705 }
2706 else
2708
2709 /*
2710 * Look for the old target. If not found, that's OK; no predicate locks
2711 * are affected, so we can just clean up and return. If it does exist,
2712 * walk its list of predicate locks and move or copy them to the new
2713 * target.
2714 */
2716 &oldtargettag,
2718 HASH_FIND, NULL);
2719
2720 if (oldtarget)
2721 {
2724 dlist_mutable_iter iter;
2725
2727 &newtargettag,
2729 HASH_ENTER_NULL, &found);
2730
2731 if (!newtarget)
2732 {
2733 /* Failed to allocate due to insufficient shmem */
2734 outOfShmem = true;
2735 goto exit;
2736 }
2737
2738 /* If we created a new entry, initialize it */
2739 if (!found)
2740 dlist_init(&newtarget->predicateLocks);
2741
2742 newpredlocktag.myTarget = newtarget;
2743
2744 /*
2745 * Loop through all the locks on the old target, replacing them with
2746 * locks on the new target.
2747 */
2749
2750 dlist_foreach_modify(iter, &oldtarget->predicateLocks)
2751 {
2753 dlist_container(PREDICATELOCK, targetLink, iter.cur);
2756
2757 newpredlocktag.myXact = oldpredlock->tag.myXact;
2758
2759 if (removeOld)
2760 {
2761 dlist_delete(&(oldpredlock->xactLink));
2762 dlist_delete(&(oldpredlock->targetLink));
2763
2766 &oldpredlock->tag,
2769 HASH_REMOVE, &found);
2770 Assert(found);
2771 }
2772
2779 &found);
2780 if (!newpredlock)
2781 {
2782 /* Out of shared memory. Undo what we've done so far. */
2785 outOfShmem = true;
2786 goto exit;
2787 }
2788 if (!found)
2789 {
2790 dlist_push_tail(&(newtarget->predicateLocks),
2791 &(newpredlock->targetLink));
2792 dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
2793 &(newpredlock->xactLink));
2794 newpredlock->commitSeqNo = oldCommitSeqNo;
2795 }
2796 else
2797 {
2798 if (newpredlock->commitSeqNo < oldCommitSeqNo)
2799 newpredlock->commitSeqNo = oldCommitSeqNo;
2800 }
2801
2802 Assert(newpredlock->commitSeqNo != 0);
2803 Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
2804 || (newpredlock->tag.myXact == OldCommittedSxact));
2805 }
2807
2808 if (removeOld)
2809 {
2810 Assert(dlist_is_empty(&oldtarget->predicateLocks));
2812 }
2813 }
2814
2815
2816exit:
2817 /* Release partition locks in reverse order of acquisition. */
2819 {
2822 }
2824 {
2827 }
2828 else
2830
2831 if (removeOld)
2832 {
2833 /* We shouldn't run out of memory if we're moving locks */
2835
2836 /* Put the scratch entry back */
2837 RestoreScratchTarget(false);
2838 }
2839
2840 return !outOfShmem;
2841}

References Assert, PREDICATELOCK::commitSeqNo, dlist_mutable_iter::cur, DeleteLockTarget(), dlist_container, dlist_delete(), dlist_foreach_modify, dlist_init(), dlist_is_empty(), dlist_push_tail(), fb(), HASH_ENTER_NULL, HASH_FIND, HASH_REMOVE, hash_search_with_hash_value(), InvalidSerCommitSeqNo, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), OldCommittedSxact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, PredicateLockTargetHash, PredicateLockTargetTagHashCode, RemoveScratchTarget(), RemoveTargetIfNoLongerUsed(), and RestoreScratchTarget().

Referenced by PredicateLockPageSplit().

◆ XidIsConcurrent()

static bool XidIsConcurrent ( TransactionId  xid)
static

Definition at line 3902 of file predicate.c.

3903{
3904 Snapshot snap;
3905
3908
3910
3911 if (TransactionIdPrecedes(xid, snap->xmin))
3912 return false;
3913
3914 if (TransactionIdFollowsOrEquals(xid, snap->xmax))
3915 return true;
3916
3917 return pg_lfind32(xid, snap->xip, snap->xcnt);
3918}

References Assert, fb(), GetTopTransactionIdIfAny(), GetTransactionSnapshot(), pg_lfind32(), TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by CheckForSerializableConflictOut().

Variable Documentation

◆ FinishedSerializableTransactions

◆ LocalPredicateLockHash

◆ max_predicate_locks_per_page

int max_predicate_locks_per_page

Definition at line 375 of file predicate.c.

Referenced by MaxPredicateChildLocks().

◆ max_predicate_locks_per_relation

int max_predicate_locks_per_relation

Definition at line 374 of file predicate.c.

Referenced by MaxPredicateChildLocks().

◆ max_predicate_locks_per_xact

int max_predicate_locks_per_xact

Definition at line 373 of file predicate.c.

Referenced by CreateLocalPredicateLockHash(), and MaxPredicateChildLocks().

◆ max_serializable_xacts

int64 max_serializable_xacts
static

Definition at line 446 of file predicate.c.

Referenced by PredicateLockShmemInit(), and PredicateLockShmemRequest().

◆ MySerializableXact

◆ MyXactDidWrite

◆ OldCommittedSxact

◆ PredicateLockHash

◆ PredicateLockShmemCallbacks

const ShmemCallbacks PredicateLockShmemCallbacks
Initial value:
= {
}

Definition at line 392 of file predicate.c.

392 {
393 .request_fn = PredicateLockShmemRequest,
394 .init_fn = PredicateLockShmemInit,
395 .attach_fn = PredicateLockShmemAttach,
396};

◆ PredicateLockTargetHash

◆ PredXact

◆ RWConflictPool

◆ SavedSerializableXact

SERIALIZABLEXACT* SavedSerializableXact = InvalidSerializableXact
static

Definition at line 444 of file predicate.c.

Referenced by ReleasePredicateLocks().

◆ ScratchPartitionLock

LWLock* ScratchPartitionLock
static

◆ ScratchTargetTag

const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0}
static

Definition at line 419 of file predicate.c.

419{0, 0, 0, 0};

Referenced by PredicateLockShmemAttach(), PredicateLockShmemInit(), RemoveScratchTarget(), and RestoreScratchTarget().

◆ ScratchTargetTagHash

uint32 ScratchTargetTagHash
static

◆ serialControl

◆ SerializableXidHash

◆ SerialSlruDesc

SlruDesc SerialSlruDesc
static

Definition at line 326 of file predicate.c.

Referenced by PredicateLockShmemRequest().