PostgreSQL Source Code git master
Loading...
Searching...
No Matches
predicate.c File Reference
#include "postgres.h"
#include "access/parallel.h"
#include "access/slru.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_lfind.h"
#include "storage/predicate.h"
#include "storage/predicate_internals.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/guc_hooks.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
#include "utils/wait_event.h"
Include dependency graph for predicate.c:

Go to the source code of this file.

Data Structures

struct  SerialControlData
 

Macros

#define TargetTagIsCoveredBy(covered_target, covering_target)
 
#define PredicateLockHashPartition(hashcode)    ((hashcode) % NUM_PREDICATELOCK_PARTITIONS)
 
#define PredicateLockHashPartitionLock(hashcode)
 
#define PredicateLockHashPartitionLockByIndex(i)    (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
 
#define NPREDICATELOCKTARGETENTS()    mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
 
#define SxactIsOnFinishedList(sxact)   (!dlist_node_is_detached(&(sxact)->finishedLink))
 
#define SxactIsCommitted(sxact)   (((sxact)->flags & SXACT_FLAG_COMMITTED) != 0)
 
#define SxactIsPrepared(sxact)   (((sxact)->flags & SXACT_FLAG_PREPARED) != 0)
 
#define SxactIsRolledBack(sxact)   (((sxact)->flags & SXACT_FLAG_ROLLED_BACK) != 0)
 
#define SxactIsDoomed(sxact)   (((sxact)->flags & SXACT_FLAG_DOOMED) != 0)
 
#define SxactIsReadOnly(sxact)   (((sxact)->flags & SXACT_FLAG_READ_ONLY) != 0)
 
#define SxactHasSummaryConflictIn(sxact)   (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_IN) != 0)
 
#define SxactHasSummaryConflictOut(sxact)   (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_OUT) != 0)
 
#define SxactHasConflictOut(sxact)   (((sxact)->flags & SXACT_FLAG_CONFLICT_OUT) != 0)
 
#define SxactIsDeferrableWaiting(sxact)   (((sxact)->flags & SXACT_FLAG_DEFERRABLE_WAITING) != 0)
 
#define SxactIsROSafe(sxact)   (((sxact)->flags & SXACT_FLAG_RO_SAFE) != 0)
 
#define SxactIsROUnsafe(sxact)   (((sxact)->flags & SXACT_FLAG_RO_UNSAFE) != 0)
 
#define SxactIsPartiallyReleased(sxact)   (((sxact)->flags & SXACT_FLAG_PARTIALLY_RELEASED) != 0)
 
#define PredicateLockTargetTagHashCode(predicatelocktargettag)    get_hash_value(PredicateLockTargetHash, predicatelocktargettag)
 
#define PredicateLockHashCodeFromTargetHashCode(predicatelocktag, targethash)
 
#define SerialSlruCtl   (&SerialSlruCtlData)
 
#define SERIAL_PAGESIZE   BLCKSZ
 
#define SERIAL_ENTRYSIZE   sizeof(SerCommitSeqNo)
 
#define SERIAL_ENTRIESPERPAGE   (SERIAL_PAGESIZE / SERIAL_ENTRYSIZE)
 
#define SERIAL_MAX_PAGE   (MaxTransactionId / SERIAL_ENTRIESPERPAGE)
 
#define SerialNextPage(page)   (((page) >= SERIAL_MAX_PAGE) ? 0 : (page) + 1)
 
#define SerialValue(slotno, xid)
 
#define SerialPage(xid)   (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
 

Typedefs

typedef struct SerialControlData SerialControlData
 
typedef struct SerialControlDataSerialControl
 

Functions

static SERIALIZABLEXACTCreatePredXact (void)
 
static void ReleasePredXact (SERIALIZABLEXACT *sxact)
 
static bool RWConflictExists (const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer)
 
static void SetRWConflict (SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
 
static void SetPossibleUnsafeConflict (SERIALIZABLEXACT *roXact, SERIALIZABLEXACT *activeXact)
 
static void ReleaseRWConflict (RWConflict conflict)
 
static void FlagSxactUnsafe (SERIALIZABLEXACT *sxact)
 
static bool SerialPagePrecedesLogically (int64 page1, int64 page2)
 
static int serial_errdetail_for_io_error (const void *opaque_data)
 
static void SerialInit (void)
 
static void SerialAdd (TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 
static SerCommitSeqNo SerialGetMinConflictCommitSeqNo (TransactionId xid)
 
static void SerialSetActiveSerXmin (TransactionId xid)
 
static uint32 predicatelock_hash (const void *key, Size keysize)
 
static void SummarizeOldestCommittedSxact (void)
 
static Snapshot GetSafeSnapshot (Snapshot origSnapshot)
 
static Snapshot GetSerializableTransactionSnapshotInt (Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
 
static bool PredicateLockExists (const PREDICATELOCKTARGETTAG *targettag)
 
static bool GetParentPredicateLockTag (const PREDICATELOCKTARGETTAG *tag, PREDICATELOCKTARGETTAG *parent)
 
static bool CoarserLockCovers (const PREDICATELOCKTARGETTAG *newtargettag)
 
static void RemoveScratchTarget (bool lockheld)
 
static void RestoreScratchTarget (bool lockheld)
 
static void RemoveTargetIfNoLongerUsed (PREDICATELOCKTARGET *target, uint32 targettaghash)
 
static void DeleteChildTargetLocks (const PREDICATELOCKTARGETTAG *newtargettag)
 
static int MaxPredicateChildLocks (const PREDICATELOCKTARGETTAG *tag)
 
static bool CheckAndPromotePredicateLockRequest (const PREDICATELOCKTARGETTAG *reqtag)
 
static void DecrementParentLocks (const PREDICATELOCKTARGETTAG *targettag)
 
static void CreatePredicateLock (const PREDICATELOCKTARGETTAG *targettag, uint32 targettaghash, SERIALIZABLEXACT *sxact)
 
static void DeleteLockTarget (PREDICATELOCKTARGET *target, uint32 targettaghash)
 
static bool TransferPredicateLocksToNewTarget (PREDICATELOCKTARGETTAG oldtargettag, PREDICATELOCKTARGETTAG newtargettag, bool removeOld)
 
static void PredicateLockAcquire (const PREDICATELOCKTARGETTAG *targettag)
 
static void DropAllPredicateLocksFromTable (Relation relation, bool transfer)
 
static void SetNewSxactGlobalXmin (void)
 
static void ClearOldPredicateLocks (void)
 
static void ReleaseOneSerializableXact (SERIALIZABLEXACT *sxact, bool partial, bool summarize)
 
static bool XidIsConcurrent (TransactionId xid)
 
static void CheckTargetForConflictsIn (PREDICATELOCKTARGETTAG *targettag)
 
static void FlagRWConflict (SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
 
static void OnConflict_CheckForSerializationFailure (const SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
 
static void CreateLocalPredicateLockHash (void)
 
static void ReleasePredicateLocksLocal (void)
 
static bool PredicateLockingNeededForRelation (Relation relation)
 
static bool SerializationNeededForRead (Relation relation, Snapshot snapshot)
 
static bool SerializationNeededForWrite (Relation relation)
 
bool check_serial_buffers (int *newval, void **extra, GucSource source)
 
void CheckPointPredicate (void)
 
void PredicateLockShmemInit (void)
 
Size PredicateLockShmemSize (void)
 
PredicateLockDataGetPredicateLockStatusData (void)
 
int GetSafeSnapshotBlockingPids (int blocked_pid, int *output, int output_size)
 
Snapshot GetSerializableTransactionSnapshot (Snapshot snapshot)
 
void SetSerializableTransactionSnapshot (Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
 
void RegisterPredicateLockingXid (TransactionId xid)
 
bool PageIsPredicateLocked (Relation relation, BlockNumber blkno)
 
void PredicateLockRelation (Relation relation, Snapshot snapshot)
 
void PredicateLockPage (Relation relation, BlockNumber blkno, Snapshot snapshot)
 
void PredicateLockTID (Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
 
void TransferPredicateLocksToHeapRelation (Relation relation)
 
void PredicateLockPageSplit (Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
 
void PredicateLockPageCombine (Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
 
void ReleasePredicateLocks (bool isCommit, bool isReadOnlySafe)
 
bool CheckForSerializableConflictOutNeeded (Relation relation, Snapshot snapshot)
 
void CheckForSerializableConflictOut (Relation relation, TransactionId xid, Snapshot snapshot)
 
void CheckForSerializableConflictIn (Relation relation, const ItemPointerData *tid, BlockNumber blkno)
 
void CheckTableForSerializableConflictIn (Relation relation)
 
void PreCommit_CheckForSerializationFailure (void)
 
void AtPrepare_PredicateLocks (void)
 
void PostPrepare_PredicateLocks (FullTransactionId fxid)
 
void PredicateLockTwoPhaseFinish (FullTransactionId fxid, bool isCommit)
 
void predicatelock_twophase_recover (FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
 
SerializableXactHandle ShareSerializableXact (void)
 
void AttachSerializableXact (SerializableXactHandle handle)
 

Variables

static SlruCtlData SerialSlruCtlData
 
static SerialControl serialControl
 
static SERIALIZABLEXACTOldCommittedSxact
 
int max_predicate_locks_per_xact
 
int max_predicate_locks_per_relation
 
int max_predicate_locks_per_page
 
static PredXactList PredXact
 
static RWConflictPoolHeader RWConflictPool
 
static HTABSerializableXidHash
 
static HTABPredicateLockTargetHash
 
static HTABPredicateLockHash
 
static dlist_headFinishedSerializableTransactions
 
static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0}
 
static uint32 ScratchTargetTagHash
 
static LWLockScratchPartitionLock
 
static HTABLocalPredicateLockHash = NULL
 
static SERIALIZABLEXACTMySerializableXact = InvalidSerializableXact
 
static bool MyXactDidWrite = false
 
static SERIALIZABLEXACTSavedSerializableXact = InvalidSerializableXact
 

Macro Definition Documentation

◆ NPREDICATELOCKTARGETENTS

Definition at line 265 of file predicate.c.

337 : (page) + 1)
338
339#define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \
340 (SerialSlruCtl->shared->page_buffer[slotno] + \
341 ((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
342
343#define SerialPage(xid) (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
344
345typedef struct SerialControlData
346{
347 int64 headPage; /* newest initialized page */
348 TransactionId headXid; /* newest valid Xid in the SLRU */
349 TransactionId tailXid; /* oldest xmin we might be interested in */
351
352typedef struct SerialControlData *SerialControl;
353
355
356/*
357 * When the oldest committed transaction on the "finished" list is moved to
358 * SLRU, its predicate locks will be moved to this "dummy" transaction,
359 * collapsing duplicate targets. When a duplicate is found, the later
360 * commitSeqNo is used.
361 */
363
364
365/*
366 * These configuration variables are used to set the predicate lock table size
367 * and to control promotion of predicate locks to coarser granularity in an
368 * attempt to degrade performance (mostly as false positive serialization
369 * failure) gracefully in the face of memory pressure.
370 */
371int max_predicate_locks_per_xact; /* in guc_tables.c */
372int max_predicate_locks_per_relation; /* in guc_tables.c */
373int max_predicate_locks_per_page; /* in guc_tables.c */
374
375/*
376 * This provides a list of objects in order to track transactions
377 * participating in predicate locking. Entries in the list are fixed size,
378 * and reside in shared memory. The memory address of an entry must remain
379 * fixed during its lifetime. The list will be protected from concurrent
380 * update externally; no provision is made in this code to manage that. The
381 * number of entries in the list, and the size allowed for each entry is
382 * fixed upon creation.
383 */
385
386/*
387 * This provides a pool of RWConflict data elements to use in conflict lists
388 * between transactions.
389 */
391
392/*
393 * The predicate locking hash tables are in shared memory.
394 * Each backend keeps pointers to them.
395 */
398static HTAB *PredicateLockHash;
400
401/*
402 * Tag for a dummy entry in PredicateLockTargetHash. By temporarily removing
403 * this entry, you can ensure that there's enough scratch space available for
404 * inserting one entry in the hash table. This is an otherwise-invalid tag.
405 */
406static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0};
409
410/*
411 * The local hash table used to determine when to combine multiple fine-
412 * grained locks into a single courser-grained lock.
413 */
415
416/*
417 * Keep a pointer to the currently-running serializable transaction (if any)
418 * for quick reference. Also, remember if we have written anything that could
419 * cause a rw-conflict.
420 */
422static bool MyXactDidWrite = false;
423
424/*
425 * The SXACT_FLAG_RO_UNSAFE optimization might lead us to release
426 * MySerializableXact early. If that happens in a parallel query, the leader
427 * needs to defer the destruction of the SERIALIZABLEXACT until end of
428 * transaction, because the workers still have a reference to it. In that
429 * case, the leader stores it here.
430 */
432
433/* local functions */
434
435static SERIALIZABLEXACT *CreatePredXact(void);
437
438static bool RWConflictExists(const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer);
443
445static int serial_errdetail_for_io_error(const void *opaque_data);
446static void SerialInit(void);
450
451static uint32 predicatelock_hash(const void *key, Size keysize);
452static void SummarizeOldestCommittedSxact(void);
456 int sourcepid);
459 PREDICATELOCKTARGETTAG *parent);
461static void RemoveScratchTarget(bool lockheld);
462static void RestoreScratchTarget(bool lockheld);
475 bool removeOld);
477static void DropAllPredicateLocksFromTable(Relation relation,
478 bool transfer);
479static void SetNewSxactGlobalXmin(void);
480static void ClearOldPredicateLocks(void);
481static void ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
482 bool summarize);
483static bool XidIsConcurrent(TransactionId xid);
488static void CreateLocalPredicateLockHash(void);
489static void ReleasePredicateLocksLocal(void);
490
491
492/*------------------------------------------------------------------------*/
493
494/*
495 * Does this relation participate in predicate locking? Temporary and system
496 * relations are exempt.
497 */
498static inline bool
500{
501 return !(relation->rd_id < FirstUnpinnedObjectId ||
502 RelationUsesLocalBuffers(relation));
503}
504
505/*
506 * When a public interface method is called for a read, this is the test to
507 * see if we should do a quick return.
508 *
509 * Note: this function has side-effects! If this transaction has been flagged
510 * as RO-safe since the last call, we release all predicate locks and reset
511 * MySerializableXact. That makes subsequent calls to return quickly.
512 *
513 * This is marked as 'inline' to eliminate the function call overhead in the
514 * common case that serialization is not needed.
515 */
516static inline bool
518{
519 /* Nothing to do if this is not a serializable transaction */
521 return false;
522
523 /*
524 * Don't acquire locks or conflict when scanning with a special snapshot.
525 * This excludes things like CLUSTER and REINDEX. They use the wholesale
526 * functions TransferPredicateLocksToHeapRelation() and
527 * CheckTableForSerializableConflictIn() to participate in serialization,
528 * but the scans involved don't need serialization.
529 */
530 if (!IsMVCCSnapshot(snapshot))
531 return false;
532
533 /*
534 * Check if we have just become "RO-safe". If we have, immediately release
535 * all locks as they're not needed anymore. This also resets
536 * MySerializableXact, so that subsequent calls to this function can exit
537 * quickly.
538 *
539 * A transaction is flagged as RO_SAFE if all concurrent R/W transactions
540 * commit without having conflicts out to an earlier snapshot, thus
541 * ensuring that no conflicts are possible for this transaction.
542 */
544 {
545 ReleasePredicateLocks(false, true);
546 return false;
547 }
548
549 /* Check if the relation doesn't participate in predicate locking */
551 return false;
552
553 return true; /* no excuse to skip predicate locking */
554}
555
556/*
557 * Like SerializationNeededForRead(), but called on writes.
558 * The logic is the same, but there is no snapshot and we can't be RO-safe.
559 */
560static inline bool
562{
563 /* Nothing to do if this is not a serializable transaction */
565 return false;
566
567 /* Check if the relation doesn't participate in predicate locking */
569 return false;
570
571 return true; /* no excuse to skip predicate locking */
572}
573
574
575/*------------------------------------------------------------------------*/
576
577/*
578 * These functions are a simple implementation of a list for this specific
579 * type of struct. If there is ever a generalized shared memory list, we
580 * should probably switch to that.
581 */
582static SERIALIZABLEXACT *
583CreatePredXact(void)
584{
586
588 return NULL;
589
593 return sxact;
594}
595
596static void
598{
600
601 dlist_delete(&sxact->xactLink);
603}
604
605/*------------------------------------------------------------------------*/
606
607/*
608 * These functions manage primitive access to the RWConflict pool and lists.
609 */
610static bool
612{
613 dlist_iter iter;
614
615 Assert(reader != writer);
616
617 /* Check the ends of the purported conflict first. */
618 if (SxactIsDoomed(reader)
620 || dlist_is_empty(&reader->outConflicts)
621 || dlist_is_empty(&writer->inConflicts))
622 return false;
623
624 /*
625 * A conflict is possible; walk the list to find out.
626 *
627 * The unconstify is needed as we have no const version of
628 * dlist_foreach().
629 */
630 dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->outConflicts)
631 {
633 dlist_container(RWConflictData, outLink, iter.cur);
634
635 if (conflict->sxactIn == writer)
636 return true;
637 }
638
639 /* No conflict found. */
640 return false;
641}
642
643static void
645{
647
648 Assert(reader != writer);
649 Assert(!RWConflictExists(reader, writer));
650
654 errmsg("not enough elements in RWConflictPool to record a read/write conflict"),
655 errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
656
658 dlist_delete(&conflict->outLink);
659
660 conflict->sxactOut = reader;
661 conflict->sxactIn = writer;
662 dlist_push_tail(&reader->outConflicts, &conflict->outLink);
663 dlist_push_tail(&writer->inConflicts, &conflict->inLink);
664}
665
666static void
669{
671
675
679 errmsg("not enough elements in RWConflictPool to record a potential read/write conflict"),
680 errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
681
683 dlist_delete(&conflict->outLink);
684
685 conflict->sxactOut = activeXact;
686 conflict->sxactIn = roXact;
687 dlist_push_tail(&activeXact->possibleUnsafeConflicts, &conflict->outLink);
688 dlist_push_tail(&roXact->possibleUnsafeConflicts, &conflict->inLink);
689}
690
691static void
693{
694 dlist_delete(&conflict->inLink);
695 dlist_delete(&conflict->outLink);
697}
698
699static void
701{
703
706
707 sxact->flags |= SXACT_FLAG_RO_UNSAFE;
708
709 /*
710 * We know this isn't a safe snapshot, so we can stop looking for other
711 * potential conflicts.
712 */
713 dlist_foreach_modify(iter, &sxact->possibleUnsafeConflicts)
714 {
716 dlist_container(RWConflictData, inLink, iter.cur);
717
718 Assert(!SxactIsReadOnly(conflict->sxactOut));
719 Assert(sxact == conflict->sxactIn);
720
722 }
723}
724
725/*------------------------------------------------------------------------*/
726
727/*
728 * Decide whether a Serial page number is "older" for truncation purposes.
729 * Analogous to CLOGPagePrecedes().
730 */
731static bool
733{
736
741
742 return (TransactionIdPrecedes(xid1, xid2) &&
744}
745
746static int
748{
749 TransactionId xid = *(const TransactionId *) opaque_data;
750
751 return errdetail("Could not access serializable CSN of transaction %u.", xid);
752}
753
754#ifdef USE_ASSERT_CHECKING
755static void
757{
759 offset = per_page / 2;
762 headPage,
765 oldestXact;
766
767 /* GetNewTransactionId() has assigned the last XID it can safely use. */
768 newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; /* nothing special */
769 newestXact = newestPage * per_page + offset;
771 oldestXact = newestXact + 1;
772 oldestXact -= 1U << 31;
773 oldestPage = oldestXact / per_page;
774
775 /*
776 * In this scenario, the SLRU headPage pertains to the last ~1000 XIDs
777 * assigned. oldestXact finishes, ~2B XIDs having elapsed since it
778 * started. Further transactions cause us to summarize oldestXact to
779 * tailPage. Function must return false so SerialAdd() doesn't zero
780 * tailPage (which may contain entries for other old, recently-finished
781 * XIDs) and half the SLRU. Reaching this requires burning ~2B XIDs in
782 * single-user mode, a negligible possibility.
783 */
787
788 /*
789 * In this scenario, the SLRU headPage pertains to oldestXact. We're
790 * summarizing an XID near newestXact. (Assume few other XIDs used
791 * SERIALIZABLE, hence the minimal headPage advancement. Assume
792 * oldestXact was long-running and only recently reached the SLRU.)
793 * Function must return true to make SerialAdd() create targetPage.
794 *
795 * Today's implementation mishandles this case, but it doesn't matter
796 * enough to fix. Verify that the defect affects just one page by
797 * asserting correct treatment of its prior page. Reaching this case
798 * requires burning ~2B XIDs in single-user mode, a negligible
799 * possibility. Moreover, if it does happen, the consequence would be
800 * mild, namely a new transaction failing in SimpleLruReadPage().
801 */
805#if 0
807#endif
808}
809#endif
810
811/*
812 * Initialize for the tracking of old serializable committed xids.
813 */
814static void
815SerialInit(void)
816{
817 bool found;
818
819 /*
820 * Set up SLRU management of the pg_serial data.
821 */
823 SerialSlruCtl->errdetail_for_io_error = serial_errdetail_for_io_error;
824 SimpleLruInit(SerialSlruCtl, "serializable",
825 serializable_buffers, 0, "pg_serial",
827 SYNC_HANDLER_NONE, false);
828#ifdef USE_ASSERT_CHECKING
830#endif
832
833 /*
834 * Create or attach to the SerialControl structure.
835 */
837 ShmemInitStruct("SerialControlData", sizeof(SerialControlData), &found);
838
839 Assert(found == IsUnderPostmaster);
840 if (!found)
841 {
842 /*
843 * Set control information to reflect empty SLRU.
844 */
850 }
851}
852
853/*
854 * GUC check_hook for serializable_buffers
855 */
856bool
857check_serial_buffers(int *newval, void **extra, GucSource source)
858{
859 return check_slru_buffers("serializable_buffers", newval);
860}
861
862/*
863 * Record a committed read write serializable xid and the minimum
864 * commitSeqNo of any transactions to which this xid had a rw-conflict out.
865 * An invalid commitSeqNo means that there were no conflicts out from xid.
866 */
867static void
869{
872 int slotno;
874 bool isNewPage;
875 LWLock *lock;
876
878
879 targetPage = SerialPage(xid);
881
882 /*
883 * In this routine, we must hold both SerialControlLock and the SLRU bank
884 * lock simultaneously while making the SLRU data catch up with the new
885 * state that we determine.
886 */
888
889 /*
890 * If 'xid' is older than the global xmin (== tailXid), there's no need to
891 * store it, after all. This can happen if the oldest transaction holding
892 * back the global xmin just finished, making 'xid' uninteresting, but
893 * ClearOldPredicateLocks() has not yet run.
894 */
897 {
899 return;
900 }
901
902 /*
903 * If the SLRU is currently unused, zero out the whole active region from
904 * tailXid to headXid before taking it into use. Otherwise zero out only
905 * any new pages that enter the tailXid-headXid range as we advance
906 * headXid.
907 */
908 if (serialControl->headPage < 0)
909 {
911 isNewPage = true;
912 }
913 else
914 {
917 targetPage);
918 }
919
922 serialControl->headXid = xid;
923 if (isNewPage)
925
926 if (isNewPage)
927 {
928 /* Initialize intervening pages; might involve trading locks */
929 for (;;)
930 {
935 break;
937 LWLockRelease(lock);
938 }
939 }
940 else
941 {
944 }
945
947 SerialSlruCtl->shared->page_dirty[slotno] = true;
948
949 LWLockRelease(lock);
951}
952
953/*
954 * Get the minimum commitSeqNo for any conflict out for the given xid. For
955 * a transaction which exists but has no conflict out, InvalidSerCommitSeqNo
956 * will be returned.
957 */
958static SerCommitSeqNo
960{
964 int slotno;
965
967
972
974 return 0;
975
977
980 return 0;
981
982 /*
983 * The following function must be called without holding SLRU bank lock,
984 * but will return with that lock held, which must then be released.
985 */
987 SerialPage(xid), &xid);
988 val = SerialValue(slotno, xid);
990 return val;
991}
992
993/*
994 * Call this whenever there is a new xmin for active serializable
995 * transactions. We don't need to keep information on transactions which
996 * precede that. InvalidTransactionId means none active, so everything in
997 * the SLRU can be discarded.
998 */
999static void
1001{
1003
1004 /*
1005 * When no sxacts are active, nothing overlaps, set the xid values to
1006 * invalid to show that there are no valid entries. Don't clear headPage,
1007 * though. A new xmin might still land on that page, and we don't want to
1008 * repeatedly zero out the same page.
1009 */
1010 if (!TransactionIdIsValid(xid))
1011 {
1015 return;
1016 }
1017
1018 /*
1019 * When we're recovering prepared transactions, the global xmin might move
1020 * backwards depending on the order they're recovered. Normally that's not
1021 * OK, but during recovery no serializable transactions will commit, so
1022 * the SLRU is empty and we can get away with it.
1023 */
1024 if (RecoveryInProgress())
1025 {
1029 {
1030 serialControl->tailXid = xid;
1031 }
1033 return;
1034 }
1035
1038
1039 serialControl->tailXid = xid;
1040
1042}
1043
1044/*
1045 * Perform a checkpoint --- either during shutdown, or on-the-fly
1046 *
1047 * We don't have any data that needs to survive a restart, but this is a
1048 * convenient place to truncate the SLRU.
1049 */
1050void
1052{
1054
1056
1057 /* Exit quickly if the SLRU is currently not in use. */
1058 if (serialControl->headPage < 0)
1059 {
1061 return;
1062 }
1063
1065 {
1067
1069
1070 /*
1071 * It is possible for the tailXid to be ahead of the headXid. This
1072 * occurs if we checkpoint while there are in-progress serializable
1073 * transaction(s) advancing the tail but we are yet to summarize the
1074 * transactions. In this case, we cutoff up to the headPage and the
1075 * next summary will advance the headXid.
1076 */
1078 {
1079 /* We can truncate the SLRU up to the page containing tailXid */
1081 }
1082 else
1084 }
1085 else
1086 {
1087 /*----------
1088 * The SLRU is no longer needed. Truncate to head before we set head
1089 * invalid.
1090 *
1091 * XXX: It's possible that the SLRU is not needed again until XID
1092 * wrap-around has happened, so that the segment containing headPage
1093 * that we leave behind will appear to be new again. In that case it
1094 * won't be removed until XID horizon advances enough to make it
1095 * current again.
1096 *
1097 * XXX: This should happen in vac_truncate_clog(), not in checkpoints.
1098 * Consider this scenario, starting from a system with no in-progress
1099 * transactions and VACUUM FREEZE having maximized oldestXact:
1100 * - Start a SERIALIZABLE transaction.
1101 * - Start, finish, and summarize a SERIALIZABLE transaction, creating
1102 * one SLRU page.
1103 * - Consume XIDs to reach xidStopLimit.
1104 * - Finish all transactions. Due to the long-running SERIALIZABLE
1105 * transaction, earlier checkpoints did not touch headPage. The
1106 * next checkpoint will change it, but that checkpoint happens after
1107 * the end of the scenario.
1108 * - VACUUM to advance XID limits.
1109 * - Consume ~2M XIDs, crossing the former xidWrapLimit.
1110 * - Start, finish, and summarize a SERIALIZABLE transaction.
1111 * SerialAdd() declines to create the targetPage, because headPage
1112 * is not regarded as in the past relative to that targetPage. The
1113 * transaction instigating the summarize fails in
1114 * SimpleLruReadPage().
1115 */
1117 serialControl->headPage = -1;
1118 }
1119
1121
1122 /*
1123 * Truncate away pages that are no longer required. Note that no
1124 * additional locking is required, because this is only called as part of
1125 * a checkpoint, and the validity limits have already been determined.
1126 */
1128
1129 /*
1130 * Write dirty SLRU pages to disk
1131 *
1132 * This is not actually necessary from a correctness point of view. We do
1133 * it merely as a debugging aid.
1134 *
1135 * We're doing this after the truncation to avoid writing pages right
1136 * before deleting the file in which they sit, which would be completely
1137 * pointless.
1138 */
1140}
1141
1142/*------------------------------------------------------------------------*/
1143
1144/*
1145 * PredicateLockShmemInit -- Initialize the predicate locking data structures.
1146 *
1147 * This is called from CreateSharedMemoryAndSemaphores(), which see for
1148 * more comments. In the normal postmaster case, the shared hash tables
1149 * are created here. Backends inherit the pointers
1150 * to the shared tables via fork(). In the EXEC_BACKEND case, each
1151 * backend re-executes this code to obtain pointers to the already existing
1152 * shared hash tables.
1153 */
1154void
1156{
1157 HASHCTL info;
1160 bool found;
1161
1162#ifndef EXEC_BACKEND
1164#endif
1165
1166 /*
1167 * Compute size of predicate lock target hashtable. Note these
1168 * calculations must agree with PredicateLockShmemSize!
1169 */
1171
1172 /*
1173 * Allocate hash table for PREDICATELOCKTARGET structs. This stores
1174 * per-predicate-lock-target information.
1175 */
1176 info.keysize = sizeof(PREDICATELOCKTARGETTAG);
1177 info.entrysize = sizeof(PREDICATELOCKTARGET);
1179
1180 PredicateLockTargetHash = ShmemInitHash("PREDICATELOCKTARGET hash",
1183 &info,
1186
1187 /*
1188 * Reserve a dummy entry in the hash table; we use it to make sure there's
1189 * always one entry available when we need to split or combine a page,
1190 * because running out of space there could mean aborting a
1191 * non-serializable transaction.
1192 */
1193 if (!IsUnderPostmaster)
1194 {
1196 HASH_ENTER, &found);
1197 Assert(!found);
1198 }
1199
1200 /* Pre-calculate the hash and partition lock of the scratch entry */
1203
1204 /*
1205 * Allocate hash table for PREDICATELOCK structs. This stores per
1206 * xact-lock-of-a-target information.
1207 */
1208 info.keysize = sizeof(PREDICATELOCKTAG);
1209 info.entrysize = sizeof(PREDICATELOCK);
1210 info.hash = predicatelock_hash;
1212
1213 /* Assume an average of 2 xacts per target */
1214 max_table_size *= 2;
1215
1216 PredicateLockHash = ShmemInitHash("PREDICATELOCK hash",
1219 &info,
1222
1223 /*
1224 * Compute size for serializable transaction hashtable. Note these
1225 * calculations must agree with PredicateLockShmemSize!
1226 */
1228
1229 /*
1230 * Allocate a list to hold information on transactions participating in
1231 * predicate locking.
1232 *
1233 * Assume an average of 10 predicate locking transactions per backend.
1234 * This allows aggressive cleanup while detail is present before data must
1235 * be summarized for storage in SLRU and the "dummy" transaction.
1236 */
1237 max_table_size *= 10;
1238
1241 sizeof(SERIALIZABLEXACT))));
1242
1243 PredXact = ShmemInitStruct("PredXactList",
1245 &found);
1246 Assert(found == IsUnderPostmaster);
1247 if (!found)
1248 {
1249 int i;
1250
1251 /* clean everything, both the header and the element */
1253
1264 /* Add all elements to available list, clean. */
1265 for (i = 0; i < max_table_size; i++)
1266 {
1270 }
1287 }
1288 /* This never changes, so let's keep a local copy. */
1290
1291 /*
1292 * Allocate hash table for SERIALIZABLEXID structs. This stores per-xid
1293 * information for serializable transactions which have accessed data.
1294 */
1295 info.keysize = sizeof(SERIALIZABLEXIDTAG);
1296 info.entrysize = sizeof(SERIALIZABLEXID);
1297
1298 SerializableXidHash = ShmemInitHash("SERIALIZABLEXID hash",
1301 &info,
1304
1305 /*
1306 * Allocate space for tracking rw-conflicts in lists attached to the
1307 * transactions.
1308 *
1309 * Assume an average of 5 conflicts per transaction. Calculations suggest
1310 * that this will prevent resource exhaustion in even the most pessimal
1311 * loads up to max_connections = 200 with all 200 connections pounding the
1312 * database with serializable transactions. Beyond that, there may be
1313 * occasional transactions canceled when trying to flag conflicts. That's
1314 * probably OK.
1315 */
1316 max_table_size *= 5;
1317
1321
1322 RWConflictPool = ShmemInitStruct("RWConflictPool",
1324 &found);
1325 Assert(found == IsUnderPostmaster);
1326 if (!found)
1327 {
1328 int i;
1329
1330 /* clean everything, including the elements */
1332
1336 /* Add all elements to available list, clean. */
1337 for (i = 0; i < max_table_size; i++)
1338 {
1341 }
1342 }
1343
1344 /*
1345 * Create or attach to the header for the list of finished serializable
1346 * transactions.
1347 */
1349 ShmemInitStruct("FinishedSerializableTransactions",
1350 sizeof(dlist_head),
1351 &found);
1352 Assert(found == IsUnderPostmaster);
1353 if (!found)
1355
1356 /*
1357 * Initialize the SLRU storage for old committed serializable
1358 * transactions.
1359 */
1360 SerialInit();
1361}
1362
1363/*
1364 * Estimate shared-memory space used for predicate lock table
1365 */
1366Size
1368{
1369 Size size = 0;
1370 long max_table_size;
1371
1372 /* predicate lock target hash table */
1375 sizeof(PREDICATELOCKTARGET)));
1376
1377 /* predicate lock hash table */
1378 max_table_size *= 2;
1380 sizeof(PREDICATELOCK)));
1381
1382 /*
1383 * Since NPREDICATELOCKTARGETENTS is only an estimate, add 10% safety
1384 * margin.
1385 */
1386 size = add_size(size, size / 10);
1387
1388 /* transaction list */
1390 max_table_size *= 10;
1391 size = add_size(size, PredXactListDataSize);
1392 size = add_size(size, mul_size((Size) max_table_size,
1393 sizeof(SERIALIZABLEXACT)));
1394
1395 /* transaction xid table */
1397 sizeof(SERIALIZABLEXID)));
1398
1399 /* rw-conflict pool */
1400 max_table_size *= 5;
1402 size = add_size(size, mul_size((Size) max_table_size,
1404
1405 /* Head for list of finished serializable transactions. */
1406 size = add_size(size, sizeof(dlist_head));
1407
1408 /* Shared memory structures for SLRU tracking of old committed xids. */
1409 size = add_size(size, sizeof(SerialControlData));
1411
1412 return size;
1413}
1414
1415
1416/*
1417 * Compute the hash code associated with a PREDICATELOCKTAG.
1418 *
1419 * Because we want to use just one set of partition locks for both the
1420 * PREDICATELOCKTARGET and PREDICATELOCK hash tables, we have to make sure
1421 * that PREDICATELOCKs fall into the same partition number as their
1422 * associated PREDICATELOCKTARGETs. dynahash.c expects the partition number
1423 * to be the low-order bits of the hash code, and therefore a
1424 * PREDICATELOCKTAG's hash code must have the same low-order bits as the
1425 * associated PREDICATELOCKTARGETTAG's hash code. We achieve this with this
1426 * specialized hash function.
1427 */
1428static uint32
1429predicatelock_hash(const void *key, Size keysize)
1430{
1431 const PREDICATELOCKTAG *predicatelocktag = (const PREDICATELOCKTAG *) key;
1433
1434 Assert(keysize == sizeof(PREDICATELOCKTAG));
1435
1436 /* Look into the associated target object, and compute its hash code */
1438
1440}
1441
1442
1443/*
1444 * GetPredicateLockStatusData
1445 * Return a table containing the internal state of the predicate
1446 * lock manager for use in pg_lock_status.
1447 *
1448 * Like GetLockStatusData, this function tries to hold the partition LWLocks
1449 * for as short a time as possible by returning two arrays that simply
1450 * contain the PREDICATELOCKTARGETTAG and SERIALIZABLEXACT for each lock
1451 * table entry. Multiple copies of the same PREDICATELOCKTARGETTAG and
1452 * SERIALIZABLEXACT will likely appear.
1453 */
1456{
1458 int i;
1459 int els,
1460 el;
1463
1465
1466 /*
1467 * To ensure consistency, take simultaneous locks on all partition locks
1468 * in ascending order, then SerializableXactHashLock.
1469 */
1470 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
1473
1474 /* Get number of locks and allocate appropriately-sized arrays. */
1476 data->nelements = els;
1479
1480
1481 /* Scan through PredicateLockHash and copy contents */
1483
1484 el = 0;
1485
1487 {
1488 data->locktags[el] = predlock->tag.myTarget->tag;
1489 data->xacts[el] = *predlock->tag.myXact;
1490 el++;
1491 }
1492
1493 Assert(el == els);
1494
1495 /* Release locks in reverse order */
1497 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
1499
1500 return data;
1501}
1502
1503/*
1504 * Free up shared memory structures by pushing the oldest sxact (the one at
1505 * the front of the SummarizeOldestCommittedSxact queue) into summary form.
1506 * Each call will free exactly one SERIALIZABLEXACT structure and may also
1507 * free one or more of these structures: SERIALIZABLEXID, PREDICATELOCK,
1508 * PREDICATELOCKTARGET, RWConflictData.
1509 */
1510static void
1512{
1514
1516
1517 /*
1518 * This function is only called if there are no sxact slots available.
1519 * Some of them must belong to old, already-finished transactions, so
1520 * there should be something in FinishedSerializableTransactions list that
1521 * we can summarize. However, there's a race condition: while we were not
1522 * holding any locks, a transaction might have ended and cleaned up all
1523 * the finished sxact entries already, freeing up their sxact slots. In
1524 * that case, we have nothing to do here. The caller will find one of the
1525 * slots released by the other backend when it retries.
1526 */
1528 {
1530 return;
1531 }
1532
1533 /*
1534 * Grab the first sxact off the finished list -- this will be the earliest
1535 * commit. Remove it from the list.
1536 */
1539 dlist_delete_thoroughly(&sxact->finishedLink);
1540
1541 /* Add to SLRU summary information. */
1544 ? sxact->SeqNo.earliestOutConflictCommit : InvalidSerCommitSeqNo);
1545
1546 /* Summarize and release the detail. */
1547 ReleaseOneSerializableXact(sxact, false, true);
1548
1550}
1551
1552/*
1553 * GetSafeSnapshot
1554 * Obtain and register a snapshot for a READ ONLY DEFERRABLE
1555 * transaction. Ensures that the snapshot is "safe", i.e. a
1556 * read-only transaction running on it can execute serializably
1557 * without further checks. This requires waiting for concurrent
1558 * transactions to complete, and retrying with a new snapshot if
1559 * one of them could possibly create a conflict.
1560 *
1561 * As with GetSerializableTransactionSnapshot (which this is a subroutine
1562 * for), the passed-in Snapshot pointer should reference a static data
1563 * area that can safely be passed to GetSnapshotData.
1564 */
1565static Snapshot
1567{
1568 Snapshot snapshot;
1569
1571
1572 while (true)
1573 {
1574 /*
1575 * GetSerializableTransactionSnapshotInt is going to call
1576 * GetSnapshotData, so we need to provide it the static snapshot area
1577 * our caller passed to us. The pointer returned is actually the same
1578 * one passed to it, but we avoid assuming that here.
1579 */
1581 NULL, InvalidPid);
1582
1584 return snapshot; /* no concurrent r/w xacts; it's safe */
1585
1587
1588 /*
1589 * Wait for concurrent transactions to finish. Stop early if one of
1590 * them marked us as conflicted.
1591 */
1595 {
1599 }
1601
1603 {
1605 break; /* success */
1606 }
1607
1609
1610 /* else, need to retry... */
1613 errmsg_internal("deferrable snapshot was unsafe; trying a new one")));
1614 ReleasePredicateLocks(false, false);
1615 }
1616
1617 /*
1618 * Now we have a safe snapshot, so we don't need to do any further checks.
1619 */
1621 ReleasePredicateLocks(false, true);
1622
1623 return snapshot;
1624}
1625
1626/*
1627 * GetSafeSnapshotBlockingPids
1628 * If the specified process is currently blocked in GetSafeSnapshot,
1629 * write the process IDs of all processes that it is blocked by
1630 * into the caller-supplied buffer output[]. The list is truncated at
1631 * output_size, and the number of PIDs written into the buffer is
1632 * returned. Returns zero if the given PID is not currently blocked
1633 * in GetSafeSnapshot.
1634 */
1635int
1637{
1638 int num_written = 0;
1639 dlist_iter iter;
1641
1643
1644 /* Find blocked_pid's SERIALIZABLEXACT by linear search. */
1646 {
1648 dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
1649
1650 if (sxact->pid == blocked_pid)
1651 {
1653 break;
1654 }
1655 }
1656
1657 /* Did we find it, and is it currently waiting in GetSafeSnapshot? */
1659 {
1660 /* Traverse the list of possible unsafe conflicts collecting PIDs. */
1661 dlist_foreach(iter, &blocking_sxact->possibleUnsafeConflicts)
1662 {
1664 dlist_container(RWConflictData, inLink, iter.cur);
1665
1666 output[num_written++] = possibleUnsafeConflict->sxactOut->pid;
1667
1668 if (num_written >= output_size)
1669 break;
1670 }
1671 }
1672
1674
1675 return num_written;
1676}
1677
1678/*
1679 * Acquire a snapshot that can be used for the current transaction.
1680 *
1681 * Make sure we have a SERIALIZABLEXACT reference in MySerializableXact.
1682 * It should be current for this process and be contained in PredXact.
1683 *
1684 * The passed-in Snapshot pointer should reference a static data area that
1685 * can safely be passed to GetSnapshotData. The return value is actually
1686 * always this same pointer; no new snapshot data structure is allocated
1687 * within this function.
1688 */
1691{
1693
1694 /*
1695 * Can't use serializable mode while recovery is still active, as it is,
1696 * for example, on a hot standby. We could get here despite the check in
1697 * check_transaction_isolation() if default_transaction_isolation is set
1698 * to serializable, so phrase the hint accordingly.
1699 */
1700 if (RecoveryInProgress())
1701 ereport(ERROR,
1703 errmsg("cannot use serializable mode in a hot standby"),
1704 errdetail("\"default_transaction_isolation\" is set to \"serializable\"."),
1705 errhint("You can use \"SET default_transaction_isolation = 'repeatable read'\" to change the default.")));
1706
1707 /*
1708 * A special optimization is available for SERIALIZABLE READ ONLY
1709 * DEFERRABLE transactions -- we can wait for a suitable snapshot and
1710 * thereby avoid all SSI overhead once it's running.
1711 */
1713 return GetSafeSnapshot(snapshot);
1714
1716 NULL, InvalidPid);
1717}
1718
1719/*
1720 * Import a snapshot to be used for the current transaction.
1721 *
1722 * This is nearly the same as GetSerializableTransactionSnapshot, except that
1723 * we don't take a new snapshot, but rather use the data we're handed.
1724 *
1725 * The caller must have verified that the snapshot came from a serializable
1726 * transaction; and if we're read-write, the source transaction must not be
1727 * read-only.
1728 */
1729void
1732 int sourcepid)
1733{
1735
1736 /*
1737 * If this is called by parallel.c in a parallel worker, we don't want to
1738 * create a SERIALIZABLEXACT just yet because the leader's
1739 * SERIALIZABLEXACT will be installed with AttachSerializableXact(). We
1740 * also don't want to reject SERIALIZABLE READ ONLY DEFERRABLE in this
1741 * case, because the leader has already determined that the snapshot it
1742 * has passed us is safe. So there is nothing for us to do.
1743 */
1744 if (IsParallelWorker())
1745 return;
1746
1747 /*
1748 * We do not allow SERIALIZABLE READ ONLY DEFERRABLE transactions to
1749 * import snapshots, since there's no way to wait for a safe snapshot when
1750 * we're using the snap we're told to. (XXX instead of throwing an error,
1751 * we could just ignore the XactDeferrable flag?)
1752 */
1754 ereport(ERROR,
1756 errmsg("a snapshot-importing transaction must not be READ ONLY DEFERRABLE")));
1757
1759 sourcepid);
1760}
1761
1762/*
1763 * Guts of GetSerializableTransactionSnapshot
1764 *
1765 * If sourcevxid is valid, this is actually an import operation and we should
1766 * skip calling GetSnapshotData, because the snapshot contents are already
1767 * loaded up. HOWEVER: to avoid race conditions, we must check that the
1768 * source xact is still running after we acquire SerializableXactHashLock.
1769 * We do that by calling ProcArrayInstallImportedXmin.
1770 */
1771static Snapshot
1774 int sourcepid)
1775{
1776 PGPROC *proc;
1779 *othersxact;
1780
1781 /* We only do this for serializable transactions. Once. */
1783
1785
1786 /*
1787 * Since all parts of a serializable transaction must use the same
1788 * snapshot, it is too late to establish one after a parallel operation
1789 * has begun.
1790 */
1791 if (IsInParallelMode())
1792 elog(ERROR, "cannot establish serializable snapshot during a parallel operation");
1793
1794 proc = MyProc;
1795 Assert(proc != NULL);
1796 GET_VXID_FROM_PGPROC(vxid, *proc);
1797
1798 /*
1799 * First we get the sxact structure, which may involve looping and access
1800 * to the "finished" list to free a structure for use.
1801 *
1802 * We must hold SerializableXactHashLock when taking/checking the snapshot
1803 * to avoid race conditions, for much the same reasons that
1804 * GetSnapshotData takes the ProcArrayLock. Since we might have to
1805 * release SerializableXactHashLock to call SummarizeOldestCommittedSxact,
1806 * this means we have to create the sxact first, which is a bit annoying
1807 * (in particular, an elog(ERROR) in procarray.c would cause us to leak
1808 * the sxact). Consider refactoring to avoid this.
1809 */
1810#ifdef TEST_SUMMARIZE_SERIAL
1812#endif
1814 do
1815 {
1817 /* If null, push out committed sxact to SLRU summary & retry. */
1818 if (!sxact)
1819 {
1823 }
1824 } while (!sxact);
1825
1826 /* Get the snapshot, or check that it's safe to use */
1827 if (!sourcevxid)
1828 snapshot = GetSnapshotData(snapshot);
1829 else if (!ProcArrayInstallImportedXmin(snapshot->xmin, sourcevxid))
1830 {
1833 ereport(ERROR,
1835 errmsg("could not import the requested snapshot"),
1836 errdetail("The source process with PID %d is not running anymore.",
1837 sourcepid)));
1838 }
1839
1840 /*
1841 * If there are no serializable transactions which are not read-only, we
1842 * can "opt out" of predicate locking and conflict checking for a
1843 * read-only transaction.
1844 *
1845 * The reason this is safe is that a read-only transaction can only become
1846 * part of a dangerous structure if it overlaps a writable transaction
1847 * which in turn overlaps a writable transaction which committed before
1848 * the read-only transaction started. A new writable transaction can
1849 * overlap this one, but it can't meet the other condition of overlapping
1850 * a transaction which committed before this one started.
1851 */
1853 {
1856 return snapshot;
1857 }
1858
1859 /* Initialize the structure. */
1860 sxact->vxid = vxid;
1861 sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo;
1862 sxact->prepareSeqNo = InvalidSerCommitSeqNo;
1863 sxact->commitSeqNo = InvalidSerCommitSeqNo;
1864 dlist_init(&(sxact->outConflicts));
1865 dlist_init(&(sxact->inConflicts));
1866 dlist_init(&(sxact->possibleUnsafeConflicts));
1867 sxact->topXid = GetTopTransactionIdIfAny();
1868 sxact->finishedBefore = InvalidTransactionId;
1869 sxact->xmin = snapshot->xmin;
1870 sxact->pid = MyProcPid;
1871 sxact->pgprocno = MyProcNumber;
1872 dlist_init(&sxact->predicateLocks);
1873 dlist_node_init(&sxact->finishedLink);
1874 sxact->flags = 0;
1875 if (XactReadOnly)
1876 {
1877 dlist_iter iter;
1878
1879 sxact->flags |= SXACT_FLAG_READ_ONLY;
1880
1881 /*
1882 * Register all concurrent r/w transactions as possible conflicts; if
1883 * all of them commit without any outgoing conflicts to earlier
1884 * transactions then this snapshot can be deemed safe (and we can run
1885 * without tracking predicate locks).
1886 */
1888 {
1890
1894 {
1896 }
1897 }
1898
1899 /*
1900 * If we didn't find any possibly unsafe conflicts because every
1901 * uncommitted writable transaction turned out to be doomed, then we
1902 * can "opt out" immediately. See comments above the earlier check
1903 * for PredXact->WritableSxactCount == 0.
1904 */
1905 if (dlist_is_empty(&sxact->possibleUnsafeConflicts))
1906 {
1909 return snapshot;
1910 }
1911 }
1912 else
1913 {
1917 }
1918
1919 /* Maintain serializable global xmin info. */
1921 {
1923 PredXact->SxactGlobalXmin = snapshot->xmin;
1925 SerialSetActiveSerXmin(snapshot->xmin);
1926 }
1927 else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin))
1928 {
1931 }
1932 else
1933 {
1935 }
1936
1938 MyXactDidWrite = false; /* haven't written anything yet */
1939
1941
1943
1944 return snapshot;
1945}
1946
1947static void
1949{
1951
1952 /* Initialize the backend-local hash table of parent locks */
1954 hash_ctl.keysize = sizeof(PREDICATELOCKTARGETTAG);
1955 hash_ctl.entrysize = sizeof(LOCALPREDICATELOCK);
1956 LocalPredicateLockHash = hash_create("Local predicate lock",
1958 &hash_ctl,
1960}
1961
1962/*
1963 * Register the top level XID in SerializableXidHash.
1964 * Also store it for easy reference in MySerializableXact.
1965 */
1966void
1968{
1971 bool found;
1972
1973 /*
1974 * If we're not tracking predicate lock data for this transaction, we
1975 * should ignore the request and return quickly.
1976 */
1978 return;
1979
1980 /* We should have a valid XID and be at the top level. */
1982
1984
1985 /* This should only be done once per transaction. */
1987
1989
1990 sxidtag.xid = xid;
1992 &sxidtag,
1993 HASH_ENTER, &found);
1994 Assert(!found);
1995
1996 /* Initialize the structure. */
1997 sxid->myXact = MySerializableXact;
1999}
2000
2001
2002/*
2003 * Check whether there are any predicate locks held by any transaction
2004 * for the page at the given block number.
2005 *
2006 * Note that the transaction may be completed but not yet subject to
2007 * cleanup due to overlapping serializable transactions. This must
2008 * return valid information regardless of transaction isolation level.
2009 *
2010 * Also note that this doesn't check for a conflicting relation lock,
2011 * just a lock specifically on the given page.
2012 *
2013 * One use is to support proper behavior during GiST index vacuum.
2014 */
2015bool
2017{
2021 PREDICATELOCKTARGET *target;
2022
2024 relation->rd_locator.dbOid,
2025 relation->rd_id,
2026 blkno);
2027
2031 target = (PREDICATELOCKTARGET *)
2034 HASH_FIND, NULL);
2036
2037 return (target != NULL);
2038}
2039
2040
2041/*
2042 * Check whether a particular lock is held by this transaction.
2043 *
2044 * Important note: this function may return false even if the lock is
2045 * being held, because it uses the local lock table which is not
2046 * updated if another transaction modifies our lock list (e.g. to
2047 * split an index page). It can also return true when a coarser
2048 * granularity lock that covers this target is being held. Be careful
2049 * to only use this function in circumstances where such errors are
2050 * acceptable!
2051 */
2052static bool
2054{
2055 LOCALPREDICATELOCK *lock;
2056
2057 /* check local hash table */
2059 targettag,
2060 HASH_FIND, NULL);
2061
2062 if (!lock)
2063 return false;
2064
2065 /*
2066 * Found entry in the table, but still need to check whether it's actually
2067 * held -- it could just be a parent of some held lock.
2068 */
2069 return lock->held;
2070}
2071
2072/*
2073 * Return the parent lock tag in the lock hierarchy: the next coarser
2074 * lock that covers the provided tag.
2075 *
2076 * Returns true and sets *parent to the parent tag if one exists,
2077 * returns false if none exists.
2078 */
2079static bool
2081 PREDICATELOCKTARGETTAG *parent)
2082{
2083 switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2084 {
2086 /* relation locks have no parent lock */
2087 return false;
2088
2089 case PREDLOCKTAG_PAGE:
2090 /* parent lock is relation lock */
2094
2095 return true;
2096
2097 case PREDLOCKTAG_TUPLE:
2098 /* parent lock is page lock */
2103 return true;
2104 }
2105
2106 /* not reachable */
2107 Assert(false);
2108 return false;
2109}
2110
2111/*
2112 * Check whether the lock we are considering is already covered by a
2113 * coarser lock for our transaction.
2114 *
2115 * Like PredicateLockExists, this function might return a false
2116 * negative, but it will never return a false positive.
2117 */
2118static bool
2120{
2122 parenttag;
2123
2125
2126 /* check parents iteratively until no more */
2128 {
2131 return true;
2132 }
2133
2134 /* no more parents to check; lock is not covered */
2135 return false;
2136}
2137
2138/*
2139 * Remove the dummy entry from the predicate lock target hash, to free up some
2140 * scratch space. The caller must be holding SerializablePredicateListLock,
2141 * and must restore the entry with RestoreScratchTarget() before releasing the
2142 * lock.
2143 *
2144 * If lockheld is true, the caller is already holding the partition lock
2145 * of the partition containing the scratch entry.
2146 */
2147static void
2149{
2150 bool found;
2151
2153
2154 if (!lockheld)
2159 HASH_REMOVE, &found);
2160 Assert(found);
2161 if (!lockheld)
2163}
2164
2165/*
2166 * Re-insert the dummy entry in predicate lock target hash.
2167 */
2168static void
2170{
2171 bool found;
2172
2174
2175 if (!lockheld)
2180 HASH_ENTER, &found);
2181 Assert(!found);
2182 if (!lockheld)
2184}
2185
2186/*
2187 * Check whether the list of related predicate locks is empty for a
2188 * predicate lock target, and remove the target if it is.
2189 */
2190static void
2192{
2194
2196
2197 /* Can't remove it until no locks at this target. */
2198 if (!dlist_is_empty(&target->predicateLocks))
2199 return;
2200
2201 /* Actually remove the target. */
2203 &target->tag,
2205 HASH_REMOVE, NULL);
2206 Assert(rmtarget == target);
2207}
2208
2209/*
2210 * Delete child target locks owned by this process.
2211 * This implementation is assuming that the usage of each target tag field
2212 * is uniform. No need to make this hard if we don't have to.
2213 *
2214 * We acquire an LWLock in the case of parallel mode, because worker
2215 * backends have access to the leader's SERIALIZABLEXACT. Otherwise,
2216 * we aren't acquiring LWLocks for the predicate lock or lock
2217 * target structures associated with this transaction unless we're going
2218 * to modify them, because no other process is permitted to modify our
2219 * locks.
2220 */
2221static void
2223{
2226 dlist_mutable_iter iter;
2227
2230 if (IsInParallelMode())
2231 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2232
2233 dlist_foreach_modify(iter, &sxact->predicateLocks)
2234 {
2238
2239 predlock = dlist_container(PREDICATELOCK, xactLink, iter.cur);
2240
2241 oldlocktag = predlock->tag;
2242 Assert(oldlocktag.myXact == sxact);
2243 oldtarget = oldlocktag.myTarget;
2244 oldtargettag = oldtarget->tag;
2245
2247 {
2251
2254
2256
2257 dlist_delete(&predlock->xactLink);
2258 dlist_delete(&predlock->targetLink);
2261 &oldlocktag,
2264 HASH_REMOVE, NULL);
2266
2268
2270
2272 }
2273 }
2274 if (IsInParallelMode())
2275 LWLockRelease(&sxact->perXactPredicateListLock);
2277}
2278
2279/*
2280 * Returns the promotion limit for a given predicate lock target. This is the
2281 * max number of descendant locks allowed before promoting to the specified
2282 * tag. Note that the limit includes non-direct descendants (e.g., both tuples
2283 * and pages for a relation lock).
2284 *
2285 * Currently the default limit is 2 for a page lock, and half of the value of
2286 * max_pred_locks_per_transaction - 1 for a relation lock, to match behavior
2287 * of earlier releases when upgrading.
2288 *
2289 * TODO SSI: We should probably add additional GUCs to allow a maximum ratio
2290 * of page and tuple locks based on the pages in a relation, and the maximum
2291 * ratio of tuple locks to tuples in a page. This would provide more
2292 * generally "balanced" allocation of locks to where they are most useful,
2293 * while still allowing the absolute numbers to prevent one relation from
2294 * tying up all predicate lock resources.
2295 */
2296static int
2298{
2299 switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2300 {
2306
2307 case PREDLOCKTAG_PAGE:
2309
2310 case PREDLOCKTAG_TUPLE:
2311
2312 /*
2313 * not reachable: nothing is finer-granularity than a tuple, so we
2314 * should never try to promote to it.
2315 */
2316 Assert(false);
2317 return 0;
2318 }
2319
2320 /* not reachable */
2321 Assert(false);
2322 return 0;
2323}
2324
2325/*
2326 * For all ancestors of a newly-acquired predicate lock, increment
2327 * their child count in the parent hash table. If any of them have
2328 * more descendants than their promotion threshold, acquire the
2329 * coarsest such lock.
2330 *
2331 * Returns true if a parent lock was acquired and false otherwise.
2332 */
2333static bool
2335{
2337 nexttag,
2340 bool found,
2341 promote;
2342
2343 promote = false;
2344
2345 targettag = *reqtag;
2346
2347 /* check parents iteratively */
2349 {
2352 &targettag,
2353 HASH_ENTER,
2354 &found);
2355 if (!found)
2356 {
2357 parentlock->held = false;
2358 parentlock->childLocks = 1;
2359 }
2360 else
2361 parentlock->childLocks++;
2362
2363 if (parentlock->childLocks >
2365 {
2366 /*
2367 * We should promote to this parent lock. Continue to check its
2368 * ancestors, however, both to get their child counts right and to
2369 * check whether we should just go ahead and promote to one of
2370 * them.
2371 */
2373 promote = true;
2374 }
2375 }
2376
2377 if (promote)
2378 {
2379 /* acquire coarsest ancestor eligible for promotion */
2381 return true;
2382 }
2383 else
2384 return false;
2385}
2386
2387/*
2388 * When releasing a lock, decrement the child count on all ancestor
2389 * locks.
2390 *
2391 * This is called only when releasing a lock via
2392 * DeleteChildTargetLocks (i.e. when a lock becomes redundant because
2393 * we've acquired its parent, possibly due to promotion) or when a new
2394 * MVCC write lock makes the predicate lock unnecessary. There's no
2395 * point in calling it when locks are released at transaction end, as
2396 * this information is no longer needed.
2397 */
2398static void
2400{
2402 nexttag;
2403
2405
2407 {
2411
2417 HASH_FIND, NULL);
2418
2419 /*
2420 * There's a small chance the parent lock doesn't exist in the lock
2421 * table. This can happen if we prematurely removed it because an
2422 * index split caused the child refcount to be off.
2423 */
2424 if (parentlock == NULL)
2425 continue;
2426
2427 parentlock->childLocks--;
2428
2429 /*
2430 * Under similar circumstances the parent lock's refcount might be
2431 * zero. This only happens if we're holding that lock (otherwise we
2432 * would have removed the entry).
2433 */
2434 if (parentlock->childLocks < 0)
2435 {
2436 Assert(parentlock->held);
2437 parentlock->childLocks = 0;
2438 }
2439
2440 if ((parentlock->childLocks == 0) && (!parentlock->held))
2441 {
2445 HASH_REMOVE, NULL);
2447 }
2448 }
2449}
2450
2451/*
2452 * Indicate that a predicate lock on the given target is held by the
2453 * specified transaction. Has no effect if the lock is already held.
2454 *
2455 * This updates the lock table and the sxact's lock list, and creates
2456 * the lock target if necessary, but does *not* do anything related to
2457 * granularity promotion or the local lock table. See
2458 * PredicateLockAcquire for that.
2459 */
2460static void
2464{
2465 PREDICATELOCKTARGET *target;
2466 PREDICATELOCKTAG locktag;
2467 PREDICATELOCK *lock;
2469 bool found;
2470
2472
2474 if (IsInParallelMode())
2475 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2477
2478 /* Make sure that the target is represented. */
2479 target = (PREDICATELOCKTARGET *)
2482 HASH_ENTER_NULL, &found);
2483 if (!target)
2484 ereport(ERROR,
2486 errmsg("out of shared memory"),
2487 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2488 if (!found)
2489 dlist_init(&target->predicateLocks);
2490
2491 /* We've got the sxact and target, make sure they're joined. */
2492 locktag.myTarget = target;
2493 locktag.myXact = sxact;
2494 lock = (PREDICATELOCK *)
2497 HASH_ENTER_NULL, &found);
2498 if (!lock)
2499 ereport(ERROR,
2501 errmsg("out of shared memory"),
2502 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2503
2504 if (!found)
2505 {
2506 dlist_push_tail(&target->predicateLocks, &lock->targetLink);
2507 dlist_push_tail(&sxact->predicateLocks, &lock->xactLink);
2509 }
2510
2512 if (IsInParallelMode())
2513 LWLockRelease(&sxact->perXactPredicateListLock);
2515}
2516
2517/*
2518 * Acquire a predicate lock on the specified target for the current
2519 * connection if not already held. This updates the local lock table
2520 * and uses it to implement granularity promotion. It will consolidate
2521 * multiple locks into a coarser lock if warranted, and will release
2522 * any finer-grained locks covered by the new one.
2523 */
2524static void
2526{
2528 bool found;
2530
2531 /* Do we have the lock already, or a covering lock? */
2533 return;
2534
2536 return;
2537
2538 /* the same hash and LW lock apply to the lock target and the local lock. */
2540
2541 /* Acquire lock in local table */
2545 HASH_ENTER, &found);
2546 locallock->held = true;
2547 if (!found)
2548 locallock->childLocks = 0;
2549
2550 /* Actually create the lock */
2552
2553 /*
2554 * Lock has been acquired. Check whether it should be promoted to a
2555 * coarser granularity, or whether there are finer-granularity locks to
2556 * clean up.
2557 */
2559 {
2560 /*
2561 * Lock request was promoted to a coarser-granularity lock, and that
2562 * lock was acquired. It will delete this lock and any of its
2563 * children, so we're done.
2564 */
2565 }
2566 else
2567 {
2568 /* Clean up any finer-granularity locks */
2571 }
2572}
2573
2574
2575/*
2576 * PredicateLockRelation
2577 *
2578 * Gets a predicate lock at the relation level.
2579 * Skip if not in full serializable transaction isolation level.
2580 * Skip if this is a temporary table.
2581 * Clear any finer-grained predicate locks this session has on the relation.
2582 */
2583void
2584PredicateLockRelation(Relation relation, Snapshot snapshot)
2585{
2587
2588 if (!SerializationNeededForRead(relation, snapshot))
2589 return;
2590
2592 relation->rd_locator.dbOid,
2593 relation->rd_id);
2595}
2596
2597/*
2598 * PredicateLockPage
2599 *
2600 * Gets a predicate lock at the page level.
2601 * Skip if not in full serializable transaction isolation level.
2602 * Skip if this is a temporary table.
2603 * Skip if a coarser predicate lock already covers this page.
2604 * Clear any finer-grained predicate locks this session has on the relation.
2605 */
2606void
2607PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
2608{
2610
2611 if (!SerializationNeededForRead(relation, snapshot))
2612 return;
2613
2615 relation->rd_locator.dbOid,
2616 relation->rd_id,
2617 blkno);
2619}
2620
2621/*
2622 * PredicateLockTID
2623 *
2624 * Gets a predicate lock at the tuple level.
2625 * Skip if not in full serializable transaction isolation level.
2626 * Skip if this is a temporary table.
2627 */
2628void
2629PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot,
2631{
2633
2634 if (!SerializationNeededForRead(relation, snapshot))
2635 return;
2636
2637 /*
2638 * Return if this xact wrote it.
2639 */
2640 if (relation->rd_index == NULL)
2641 {
2642 /* If we wrote it; we already have a write lock. */
2644 return;
2645 }
2646
2647 /*
2648 * Do quick-but-not-definitive test for a relation lock first. This will
2649 * never cause a return when the relation is *not* locked, but will
2650 * occasionally let the check continue when there really *is* a relation
2651 * level lock.
2652 */
2654 relation->rd_locator.dbOid,
2655 relation->rd_id);
2656 if (PredicateLockExists(&tag))
2657 return;
2658
2660 relation->rd_locator.dbOid,
2661 relation->rd_id,
2665}
2666
2667
2668/*
2669 * DeleteLockTarget
2670 *
2671 * Remove a predicate lock target along with any locks held for it.
2672 *
2673 * Caller must hold SerializablePredicateListLock and the
2674 * appropriate hash partition lock for the target.
2675 */
2676static void
2678{
2679 dlist_mutable_iter iter;
2680
2682 LW_EXCLUSIVE));
2684
2686
2687 dlist_foreach_modify(iter, &target->predicateLocks)
2688 {
2690 dlist_container(PREDICATELOCK, targetLink, iter.cur);
2691 bool found;
2692
2693 dlist_delete(&(predlock->xactLink));
2694 dlist_delete(&(predlock->targetLink));
2695
2698 &predlock->tag,
2701 HASH_REMOVE, &found);
2702 Assert(found);
2703 }
2705
2706 /* Remove the target itself, if possible. */
2708}
2709
2710
2711/*
2712 * TransferPredicateLocksToNewTarget
2713 *
2714 * Move or copy all the predicate locks for a lock target, for use by
2715 * index page splits/combines and other things that create or replace
2716 * lock targets. If 'removeOld' is true, the old locks and the target
2717 * will be removed.
2718 *
2719 * Returns true on success, or false if we ran out of shared memory to
2720 * allocate the new target or locks. Guaranteed to always succeed if
2721 * removeOld is set (by using the scratch entry in PredicateLockTargetHash
2722 * for scratch space).
2723 *
2724 * Warning: the "removeOld" option should be used only with care,
2725 * because this function does not (indeed, can not) update other
2726 * backends' LocalPredicateLockHash. If we are only adding new
2727 * entries, this is not a problem: the local lock table is used only
2728 * as a hint, so missing entries for locks that are held are
2729 * OK. Having entries for locks that are no longer held, as can happen
2730 * when using "removeOld", is not in general OK. We can only use it
2731 * safely when replacing a lock with a coarser-granularity lock that
2732 * covers it, or if we are absolutely certain that no one will need to
2733 * refer to that lock in the future.
2734 *
2735 * Caller must hold SerializablePredicateListLock exclusively.
2736 */
2737static bool
2740 bool removeOld)
2741{
2747 bool found;
2748 bool outOfShmem = false;
2749
2751 LW_EXCLUSIVE));
2752
2757
2758 if (removeOld)
2759 {
2760 /*
2761 * Remove the dummy entry to give us scratch space, so we know we'll
2762 * be able to create the new lock target.
2763 */
2764 RemoveScratchTarget(false);
2765 }
2766
2767 /*
2768 * We must get the partition locks in ascending sequence to avoid
2769 * deadlocks. If old and new partitions are the same, we must request the
2770 * lock only once.
2771 */
2773 {
2777 }
2779 {
2783 }
2784 else
2786
2787 /*
2788 * Look for the old target. If not found, that's OK; no predicate locks
2789 * are affected, so we can just clean up and return. If it does exist,
2790 * walk its list of predicate locks and move or copy them to the new
2791 * target.
2792 */
2794 &oldtargettag,
2796 HASH_FIND, NULL);
2797
2798 if (oldtarget)
2799 {
2802 dlist_mutable_iter iter;
2803
2805 &newtargettag,
2807 HASH_ENTER_NULL, &found);
2808
2809 if (!newtarget)
2810 {
2811 /* Failed to allocate due to insufficient shmem */
2812 outOfShmem = true;
2813 goto exit;
2814 }
2815
2816 /* If we created a new entry, initialize it */
2817 if (!found)
2818 dlist_init(&newtarget->predicateLocks);
2819
2820 newpredlocktag.myTarget = newtarget;
2821
2822 /*
2823 * Loop through all the locks on the old target, replacing them with
2824 * locks on the new target.
2825 */
2827
2828 dlist_foreach_modify(iter, &oldtarget->predicateLocks)
2829 {
2831 dlist_container(PREDICATELOCK, targetLink, iter.cur);
2834
2835 newpredlocktag.myXact = oldpredlock->tag.myXact;
2836
2837 if (removeOld)
2838 {
2839 dlist_delete(&(oldpredlock->xactLink));
2840 dlist_delete(&(oldpredlock->targetLink));
2841
2844 &oldpredlock->tag,
2847 HASH_REMOVE, &found);
2848 Assert(found);
2849 }
2850
2857 &found);
2858 if (!newpredlock)
2859 {
2860 /* Out of shared memory. Undo what we've done so far. */
2863 outOfShmem = true;
2864 goto exit;
2865 }
2866 if (!found)
2867 {
2868 dlist_push_tail(&(newtarget->predicateLocks),
2869 &(newpredlock->targetLink));
2870 dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
2871 &(newpredlock->xactLink));
2872 newpredlock->commitSeqNo = oldCommitSeqNo;
2873 }
2874 else
2875 {
2876 if (newpredlock->commitSeqNo < oldCommitSeqNo)
2877 newpredlock->commitSeqNo = oldCommitSeqNo;
2878 }
2879
2880 Assert(newpredlock->commitSeqNo != 0);
2881 Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
2882 || (newpredlock->tag.myXact == OldCommittedSxact));
2883 }
2885
2886 if (removeOld)
2887 {
2888 Assert(dlist_is_empty(&oldtarget->predicateLocks));
2890 }
2891 }
2892
2893
2894exit:
2895 /* Release partition locks in reverse order of acquisition. */
2897 {
2900 }
2902 {
2905 }
2906 else
2908
2909 if (removeOld)
2910 {
2911 /* We shouldn't run out of memory if we're moving locks */
2913
2914 /* Put the scratch entry back */
2915 RestoreScratchTarget(false);
2916 }
2917
2918 return !outOfShmem;
2919}
2920
2921/*
2922 * Drop all predicate locks of any granularity from the specified relation,
2923 * which can be a heap relation or an index relation. If 'transfer' is true,
2924 * acquire a relation lock on the heap for any transactions with any lock(s)
2925 * on the specified relation.
2926 *
2927 * This requires grabbing a lot of LW locks and scanning the entire lock
2928 * target table for matches. That makes this more expensive than most
2929 * predicate lock management functions, but it will only be called for DDL
2930 * type commands that are expensive anyway, and there are fast returns when
2931 * no serializable transactions are active or the relation is temporary.
2932 *
2933 * We don't use the TransferPredicateLocksToNewTarget function because it
2934 * acquires its own locks on the partitions of the two targets involved,
2935 * and we'll already be holding all partition locks.
2936 *
2937 * We can't throw an error from here, because the call could be from a
2938 * transaction which is not serializable.
2939 *
2940 * NOTE: This is currently only called with transfer set to true, but that may
2941 * change. If we decide to clean up the locks from a table on commit of a
2942 * transaction which executed DROP TABLE, the false condition will be useful.
2943 */
2944static void
2946{
2950 Oid dbId;
2951 Oid relId;
2952 Oid heapId;
2953 int i;
2954 bool isIndex;
2955 bool found;
2957
2958 /*
2959 * Bail out quickly if there are no serializable transactions running.
2960 * It's safe to check this without taking locks because the caller is
2961 * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
2962 * would matter here can be acquired while that is held.
2963 */
2965 return;
2966
2967 if (!PredicateLockingNeededForRelation(relation))
2968 return;
2969
2970 dbId = relation->rd_locator.dbOid;
2971 relId = relation->rd_id;
2972 if (relation->rd_index == NULL)
2973 {
2974 isIndex = false;
2975 heapId = relId;
2976 }
2977 else
2978 {
2979 isIndex = true;
2980 heapId = relation->rd_index->indrelid;
2981 }
2983 Assert(transfer || !isIndex); /* index OID only makes sense with
2984 * transfer */
2985
2986 /* Retrieve first time needed, then keep. */
2988 heaptarget = NULL;
2989
2990 /* Acquire locks on all lock partitions */
2992 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
2995
2996 /*
2997 * Remove the dummy entry to give us scratch space, so we know we'll be
2998 * able to create the new lock target.
2999 */
3000 if (transfer)
3001 RemoveScratchTarget(true);
3002
3003 /* Scan through target map */
3005
3007 {
3008 dlist_mutable_iter iter;
3009
3010 /*
3011 * Check whether this is a target which needs attention.
3012 */
3014 continue; /* wrong relation id */
3015 if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId)
3016 continue; /* wrong database id */
3017 if (transfer && !isIndex
3019 continue; /* already the right lock */
3020
3021 /*
3022 * If we made it here, we have work to do. We make sure the heap
3023 * relation lock exists, then we walk the list of predicate locks for
3024 * the old target we found, moving all locks to the heap relation lock
3025 * -- unless they already hold that.
3026 */
3027
3028 /*
3029 * First make sure we have the heap relation target. We only need to
3030 * do this once.
3031 */
3032 if (transfer && heaptarget == NULL)
3033 {
3035
3041 HASH_ENTER, &found);
3042 if (!found)
3043 dlist_init(&heaptarget->predicateLocks);
3044 }
3045
3046 /*
3047 * Loop through all the locks on the old target, replacing them with
3048 * locks on the new target.
3049 */
3050 dlist_foreach_modify(iter, &oldtarget->predicateLocks)
3051 {
3053 dlist_container(PREDICATELOCK, targetLink, iter.cur);
3057
3058 /*
3059 * Remove the old lock first. This avoids the chance of running
3060 * out of lock structure entries for the hash table.
3061 */
3063 oldXact = oldpredlock->tag.myXact;
3064
3065 dlist_delete(&(oldpredlock->xactLink));
3066
3067 /*
3068 * No need for retail delete from oldtarget list, we're removing
3069 * the whole target anyway.
3070 */
3072 &oldpredlock->tag,
3073 HASH_REMOVE, &found);
3074 Assert(found);
3075
3076 if (transfer)
3077 {
3079
3081 newpredlocktag.myXact = oldXact;
3087 HASH_ENTER,
3088 &found);
3089 if (!found)
3090 {
3091 dlist_push_tail(&(heaptarget->predicateLocks),
3092 &(newpredlock->targetLink));
3093 dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
3094 &(newpredlock->xactLink));
3095 newpredlock->commitSeqNo = oldCommitSeqNo;
3096 }
3097 else
3098 {
3099 if (newpredlock->commitSeqNo < oldCommitSeqNo)
3100 newpredlock->commitSeqNo = oldCommitSeqNo;
3101 }
3102
3103 Assert(newpredlock->commitSeqNo != 0);
3104 Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
3105 || (newpredlock->tag.myXact == OldCommittedSxact));
3106 }
3107 }
3108
3110 &found);
3111 Assert(found);
3112 }
3113
3114 /* Put the scratch entry back */
3115 if (transfer)
3117
3118 /* Release locks in reverse order */
3120 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
3123}
3124
3125/*
3126 * TransferPredicateLocksToHeapRelation
3127 * For all transactions, transfer all predicate locks for the given
3128 * relation to a single relation lock on the heap.
3129 */
3130void
3132{
3133 DropAllPredicateLocksFromTable(relation, true);
3134}
3135
3136
3137/*
3138 * PredicateLockPageSplit
3139 *
3140 * Copies any predicate locks for the old page to the new page.
3141 * Skip if this is a temporary table or toast table.
3142 *
3143 * NOTE: A page split (or overflow) affects all serializable transactions,
3144 * even if it occurs in the context of another transaction isolation level.
3145 *
3146 * NOTE: This currently leaves the local copy of the locks without
3147 * information on the new lock which is in shared memory. This could cause
3148 * problems if enough page splits occur on locked pages without the processes
3149 * which hold the locks getting in and noticing.
3150 */
3151void
3154{
3157 bool success;
3158
3159 /*
3160 * Bail out quickly if there are no serializable transactions running.
3161 *
3162 * It's safe to do this check without taking any additional locks. Even if
3163 * a serializable transaction starts concurrently, we know it can't take
3164 * any SIREAD locks on the page being split because the caller is holding
3165 * the associated buffer page lock. Memory reordering isn't an issue; the
3166 * memory barrier in the LWLock acquisition guarantees that this read
3167 * occurs while the buffer page lock is held.
3168 */
3170 return;
3171
3172 if (!PredicateLockingNeededForRelation(relation))
3173 return;
3174
3178
3180 relation->rd_locator.dbOid,
3181 relation->rd_id,
3182 oldblkno);
3184 relation->rd_locator.dbOid,
3185 relation->rd_id,
3186 newblkno);
3187
3189
3190 /*
3191 * Try copying the locks over to the new page's tag, creating it if
3192 * necessary.
3193 */
3196 false);
3197
3198 if (!success)
3199 {
3200 /*
3201 * No more predicate lock entries are available. Failure isn't an
3202 * option here, so promote the page lock to a relation lock.
3203 */
3204
3205 /* Get the parent relation lock's lock tag */
3207 &newtargettag);
3208 Assert(success);
3209
3210 /*
3211 * Move the locks to the parent. This shouldn't fail.
3212 *
3213 * Note that here we are removing locks held by other backends,
3214 * leading to a possible inconsistency in their local lock hash table.
3215 * This is OK because we're replacing it with a lock that covers the
3216 * old one.
3217 */
3220 true);
3221 Assert(success);
3222 }
3223
3225}
3226
3227/*
3228 * PredicateLockPageCombine
3229 *
3230 * Combines predicate locks for two existing pages.
3231 * Skip if this is a temporary table or toast table.
3232 *
3233 * NOTE: A page combine affects all serializable transactions, even if it
3234 * occurs in the context of another transaction isolation level.
3235 */
3236void
3239{
3240 /*
3241 * Page combines differ from page splits in that we ought to be able to
3242 * remove the locks on the old page after transferring them to the new
3243 * page, instead of duplicating them. However, because we can't edit other
3244 * backends' local lock tables, removing the old lock would leave them
3245 * with an entry in their LocalPredicateLockHash for a lock they're not
3246 * holding, which isn't acceptable. So we wind up having to do the same
3247 * work as a page split, acquiring a lock on the new page and keeping the
3248 * old page locked too. That can lead to some false positives, but should
3249 * be rare in practice.
3250 */
3252}
3253
3254/*
3255 * Walk the list of in-progress serializable transactions and find the new
3256 * xmin.
3257 */
3258static void
3260{
3261 dlist_iter iter;
3262
3264
3267
3269 {
3271 dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
3272
3276 {
3281 {
3284 }
3285 else if (TransactionIdEquals(sxact->xmin,
3288 }
3289 }
3290
3292}
3293
3294/*
3295 * ReleasePredicateLocks
3296 *
3297 * Releases predicate locks based on completion of the current transaction,
3298 * whether committed or rolled back. It can also be called for a read only
3299 * transaction when it becomes impossible for the transaction to become
3300 * part of a dangerous structure.
3301 *
3302 * We do nothing unless this is a serializable transaction.
3303 *
3304 * This method must ensure that shared memory hash tables are cleaned
3305 * up in some relatively timely fashion.
3306 *
3307 * If this transaction is committing and is holding any predicate locks,
3308 * it must be added to a list of completed serializable transactions still
3309 * holding locks.
3310 *
3311 * If isReadOnlySafe is true, then predicate locks are being released before
3312 * the end of the transaction because MySerializableXact has been determined
3313 * to be RO_SAFE. In non-parallel mode we can release it completely, but it
3314 * in parallel mode we partially release the SERIALIZABLEXACT and keep it
3315 * around until the end of the transaction, allowing each backend to clear its
3316 * MySerializableXact variable and benefit from the optimization in its own
3317 * time.
3318 */
3319void
3321{
3322 bool partiallyReleasing = false;
3323 bool needToClear;
3325 dlist_mutable_iter iter;
3326
3327 /*
3328 * We can't trust XactReadOnly here, because a transaction which started
3329 * as READ WRITE can show as READ ONLY later, e.g., within
3330 * subtransactions. We want to flag a transaction as READ ONLY if it
3331 * commits without writing so that de facto READ ONLY transactions get the
3332 * benefit of some RO optimizations, so we will use this local variable to
3333 * get some cleanup logic right which is based on whether the transaction
3334 * was declared READ ONLY at the top level.
3335 */
3337
3338 /* We can't be both committing and releasing early due to RO_SAFE. */
3340
3341 /* Are we at the end of a transaction, that is, a commit or abort? */
3342 if (!isReadOnlySafe)
3343 {
3344 /*
3345 * Parallel workers mustn't release predicate locks at the end of
3346 * their transaction. The leader will do that at the end of its
3347 * transaction.
3348 */
3349 if (IsParallelWorker())
3350 {
3352 return;
3353 }
3354
3355 /*
3356 * By the time the leader in a parallel query reaches end of
3357 * transaction, it has waited for all workers to exit.
3358 */
3360
3361 /*
3362 * If the leader in a parallel query earlier stashed a partially
3363 * released SERIALIZABLEXACT for final clean-up at end of transaction
3364 * (because workers might still have been accessing it), then it's
3365 * time to restore it.
3366 */
3368 {
3373 }
3374 }
3375
3377 {
3379 return;
3380 }
3381
3383
3384 /*
3385 * If the transaction is committing, but it has been partially released
3386 * already, then treat this as a roll back. It was marked as rolled back.
3387 */
3389 isCommit = false;
3390
3391 /*
3392 * If we're called in the middle of a transaction because we discovered
3393 * that the SXACT_FLAG_RO_SAFE flag was set, then we'll partially release
3394 * it (that is, release the predicate locks and conflicts, but not the
3395 * SERIALIZABLEXACT itself) if we're the first backend to have noticed.
3396 */
3398 {
3399 /*
3400 * The leader needs to stash a pointer to it, so that it can
3401 * completely release it at end-of-transaction.
3402 */
3403 if (!IsParallelWorker())
3405
3406 /*
3407 * The first backend to reach this condition will partially release
3408 * the SERIALIZABLEXACT. All others will just clear their
3409 * backend-local state so that they stop doing SSI checks for the rest
3410 * of the transaction.
3411 */
3413 {
3416 return;
3417 }
3418 else
3419 {
3421 partiallyReleasing = true;
3422 /* ... and proceed to perform the partial release below. */
3423 }
3424 }
3430
3431 /* may not be serializable during COMMIT/ROLLBACK PREPARED */
3433
3434 /* We'd better not already be on the cleanup list. */
3436
3438
3439 /*
3440 * We don't hold XidGenLock lock here, assuming that TransactionId is
3441 * atomic!
3442 *
3443 * If this value is changing, we don't care that much whether we get the
3444 * old or new value -- it is just used to determine how far
3445 * SxactGlobalXmin must advance before this transaction can be fully
3446 * cleaned up. The worst that could happen is we wait for one more
3447 * transaction to complete before freeing some RAM; correctness of visible
3448 * behavior is not affected.
3449 */
3451
3452 /*
3453 * If it's not a commit it's either a rollback or a read-only transaction
3454 * flagged SXACT_FLAG_RO_SAFE, and we can clear our locks immediately.
3455 */
3456 if (isCommit)
3457 {
3460 /* Recognize implicit read-only transaction (commit without write). */
3461 if (!MyXactDidWrite)
3463 }
3464 else
3465 {
3466 /*
3467 * The DOOMED flag indicates that we intend to roll back this
3468 * transaction and so it should not cause serialization failures for
3469 * other transactions that conflict with it. Note that this flag might
3470 * already be set, if another backend marked this transaction for
3471 * abort.
3472 *
3473 * The ROLLED_BACK flag further indicates that ReleasePredicateLocks
3474 * has been called, and so the SerializableXact is eligible for
3475 * cleanup. This means it should not be considered when calculating
3476 * SxactGlobalXmin.
3477 */
3480
3481 /*
3482 * If the transaction was previously prepared, but is now failing due
3483 * to a ROLLBACK PREPARED or (hopefully very rare) error after the
3484 * prepare, clear the prepared flag. This simplifies conflict
3485 * checking.
3486 */
3488 }
3489
3491 {
3493 if (--(PredXact->WritableSxactCount) == 0)
3494 {
3495 /*
3496 * Release predicate locks and rw-conflicts in for all committed
3497 * transactions. There are no longer any transactions which might
3498 * conflict with the locks and no chance for new transactions to
3499 * overlap. Similarly, existing conflicts in can't cause pivots,
3500 * and any conflicts in which could have completed a dangerous
3501 * structure would already have caused a rollback, so any
3502 * remaining ones must be benign.
3503 */
3505 }
3506 }
3507 else
3508 {
3509 /*
3510 * Read-only transactions: clear the list of transactions that might
3511 * make us unsafe. Note that we use 'inLink' for the iteration as
3512 * opposed to 'outLink' for the r/w xacts.
3513 */
3515 {
3517 dlist_container(RWConflictData, inLink, iter.cur);
3518
3521
3523 }
3524 }
3525
3526 /* Check for conflict out to old committed transactions. */
3527 if (isCommit
3530 {
3531 /*
3532 * we don't know which old committed transaction we conflicted with,
3533 * so be conservative and use FirstNormalSerCommitSeqNo here
3534 */
3538 }
3539
3540 /*
3541 * Release all outConflicts to committed transactions. If we're rolling
3542 * back clear them all. Set SXACT_FLAG_CONFLICT_OUT if any point to
3543 * previously committed transactions.
3544 */
3546 {
3548 dlist_container(RWConflictData, outLink, iter.cur);
3549
3550 if (isCommit
3552 && SxactIsCommitted(conflict->sxactIn))
3553 {
3555 || conflict->sxactIn->prepareSeqNo < MySerializableXact->SeqNo.earliestOutConflictCommit)
3558 }
3559
3560 if (!isCommit
3561 || SxactIsCommitted(conflict->sxactIn)
3562 || (conflict->sxactIn->SeqNo.lastCommitBeforeSnapshot >= PredXact->LastSxactCommitSeqNo))
3564 }
3565
3566 /*
3567 * Release all inConflicts from committed and read-only transactions. If
3568 * we're rolling back, clear them all.
3569 */
3571 {
3573 dlist_container(RWConflictData, inLink, iter.cur);
3574
3575 if (!isCommit
3576 || SxactIsCommitted(conflict->sxactOut)
3577 || SxactIsReadOnly(conflict->sxactOut))
3579 }
3580
3582 {
3583 /*
3584 * Remove ourselves from the list of possible conflicts for concurrent
3585 * READ ONLY transactions, flagging them as unsafe if we have a
3586 * conflict out. If any are waiting DEFERRABLE transactions, wake them
3587 * up if they are known safe or known unsafe.
3588 */
3590 {
3592 dlist_container(RWConflictData, outLink, iter.cur);
3593
3594 roXact = possibleUnsafeConflict->sxactIn;
3597
3598 /* Mark conflicted if necessary. */
3599 if (isCommit
3603 <= roXact->SeqNo.lastCommitBeforeSnapshot))
3604 {
3605 /*
3606 * This releases possibleUnsafeConflict (as well as all other
3607 * possible conflicts for roXact)
3608 */
3610 }
3611 else
3612 {
3614
3615 /*
3616 * If we were the last possible conflict, flag it safe. The
3617 * transaction can now safely release its predicate locks (but
3618 * that transaction's backend has to do that itself).
3619 */
3620 if (dlist_is_empty(&roXact->possibleUnsafeConflicts))
3621 roXact->flags |= SXACT_FLAG_RO_SAFE;
3622 }
3623
3624 /*
3625 * Wake up the process for a waiting DEFERRABLE transaction if we
3626 * now know it's either safe or conflicted.
3627 */
3630 ProcSendSignal(roXact->pgprocno);
3631 }
3632 }
3633
3634 /*
3635 * Check whether it's time to clean up old transactions. This can only be
3636 * done when the last serializable transaction with the oldest xmin among
3637 * serializable transactions completes. We then find the "new oldest"
3638 * xmin and purge any transactions which finished before this transaction
3639 * was launched.
3640 *
3641 * For parallel queries in read-only transactions, it might run twice. We
3642 * only release the reference on the first call.
3643 */
3644 needToClear = false;
3645 if ((partiallyReleasing ||
3649 {
3651 if (--(PredXact->SxactGlobalXminCount) == 0)
3652 {
3654 needToClear = true;
3655 }
3656 }
3657
3659
3661
3662 /* Add this to the list of transactions to check for later cleanup. */
3663 if (isCommit)
3666
3667 /*
3668 * If we're releasing a RO_SAFE transaction in parallel mode, we'll only
3669 * partially release it. That's necessary because other backends may have
3670 * a reference to it. The leader will release the SERIALIZABLEXACT itself
3671 * at the end of the transaction after workers have stopped running.
3672 */
3673 if (!isCommit)
3676 false);
3677
3679
3680 if (needToClear)
3682
3684}
3685
3686static void
3688{
3690 MyXactDidWrite = false;
3691
3692 /* Delete per-transaction lock table */
3694 {
3697 }
3698}
3699
3700/*
3701 * Clear old predicate locks, belonging to committed transactions that are no
3702 * longer interesting to any in-progress transaction.
3703 */
3704static void
3706{
3707 dlist_mutable_iter iter;
3708
3709 /*
3710 * Loop through finished transactions. They are in commit order, so we can
3711 * stop as soon as we find one that's still interesting.
3712 */
3716 {
3718 dlist_container(SERIALIZABLEXACT, finishedLink, iter.cur);
3719
3723 {
3724 /*
3725 * This transaction committed before any in-progress transaction
3726 * took its snapshot. It's no longer interesting.
3727 */
3729 dlist_delete_thoroughly(&finishedSxact->finishedLink);
3732 }
3733 else if (finishedSxact->commitSeqNo > PredXact->HavePartialClearedThrough
3734 && finishedSxact->commitSeqNo <= PredXact->CanPartialClearThrough)
3735 {
3736 /*
3737 * Any active transactions that took their snapshot before this
3738 * transaction committed are read-only, so we can clear part of
3739 * its state.
3740 */
3742
3744 {
3745 /* A read-only transaction can be removed entirely */
3746 dlist_delete_thoroughly(&(finishedSxact->finishedLink));
3748 }
3749 else
3750 {
3751 /*
3752 * A read-write transaction can only be partially cleared. We
3753 * need to keep the SERIALIZABLEXACT but can release the
3754 * SIREAD locks and conflicts in.
3755 */
3757 }
3758
3761 }
3762 else
3763 {
3764 /* Still interesting. */
3765 break;
3766 }
3767 }
3769
3770 /*
3771 * Loop through predicate locks on dummy transaction for summarized data.
3772 */
3775 {
3777 dlist_container(PREDICATELOCK, xactLink, iter.cur);
3779
3781 Assert(predlock->commitSeqNo != 0);
3782 Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3785
3786 /*
3787 * If this lock originally belonged to an old enough transaction, we
3788 * can release it.
3789 */
3791 {
3792 PREDICATELOCKTAG tag;
3793 PREDICATELOCKTARGET *target;
3797
3798 tag = predlock->tag;
3799 target = tag.myTarget;
3800 targettag = target->tag;
3803
3805
3806 dlist_delete(&(predlock->targetLink));
3807 dlist_delete(&(predlock->xactLink));
3808
3812 HASH_REMOVE, NULL);
3814
3816 }
3817 }
3818
3821}
3822
3823/*
3824 * This is the normal way to delete anything from any of the predicate
3825 * locking hash tables. Given a transaction which we know can be deleted:
3826 * delete all predicate locks held by that transaction and any predicate
3827 * lock targets which are now unreferenced by a lock; delete all conflicts
3828 * for the transaction; delete all xid values for the transaction; then
3829 * delete the transaction.
3830 *
3831 * When the partial flag is set, we can release all predicate locks and
3832 * in-conflict information -- we've established that there are no longer
3833 * any overlapping read write transactions for which this transaction could
3834 * matter -- but keep the transaction entry itself and any outConflicts.
3835 *
3836 * When the summarize flag is set, we've run short of room for sxact data
3837 * and must summarize to the SLRU. Predicate locks are transferred to a
3838 * dummy "old" transaction, with duplicate locks on a single target
3839 * collapsing to a single lock with the "latest" commitSeqNo from among
3840 * the conflicting locks..
3841 */
3842static void
3844 bool summarize)
3845{
3847 dlist_mutable_iter iter;
3848
3849 Assert(sxact != NULL);
3851 Assert(partial || !SxactIsOnFinishedList(sxact));
3853
3854 /*
3855 * First release all the predicate locks held by this xact (or transfer
3856 * them to OldCommittedSxact if summarize is true)
3857 */
3859 if (IsInParallelMode())
3860 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
3861 dlist_foreach_modify(iter, &sxact->predicateLocks)
3862 {
3864 dlist_container(PREDICATELOCK, xactLink, iter.cur);
3865 PREDICATELOCKTAG tag;
3866 PREDICATELOCKTARGET *target;
3870
3871 tag = predlock->tag;
3872 target = tag.myTarget;
3873 targettag = target->tag;
3876
3878
3879 dlist_delete(&predlock->targetLink);
3880
3884 HASH_REMOVE, NULL);
3885 if (summarize)
3886 {
3887 bool found;
3888
3889 /* Fold into dummy transaction list. */
3894 HASH_ENTER_NULL, &found);
3895 if (!predlock)
3896 ereport(ERROR,
3898 errmsg("out of shared memory"),
3899 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
3900 if (found)
3901 {
3902 Assert(predlock->commitSeqNo != 0);
3903 Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3904 if (predlock->commitSeqNo < sxact->commitSeqNo)
3905 predlock->commitSeqNo = sxact->commitSeqNo;
3906 }
3907 else
3908 {
3910 &predlock->targetLink);
3912 &predlock->xactLink);
3913 predlock->commitSeqNo = sxact->commitSeqNo;
3914 }
3915 }
3916 else
3918
3920 }
3921
3922 /*
3923 * Rather than retail removal, just re-init the head after we've run
3924 * through the list.
3925 */
3926 dlist_init(&sxact->predicateLocks);
3927
3928 if (IsInParallelMode())
3929 LWLockRelease(&sxact->perXactPredicateListLock);
3931
3932 sxidtag.xid = sxact->topXid;
3934
3935 /* Release all outConflicts (unless 'partial' is true) */
3936 if (!partial)
3937 {
3938 dlist_foreach_modify(iter, &sxact->outConflicts)
3939 {
3941 dlist_container(RWConflictData, outLink, iter.cur);
3942
3943 if (summarize)
3944 conflict->sxactIn->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
3946 }
3947 }
3948
3949 /* Release all inConflicts. */
3950 dlist_foreach_modify(iter, &sxact->inConflicts)
3951 {
3953 dlist_container(RWConflictData, inLink, iter.cur);
3954
3955 if (summarize)
3956 conflict->sxactOut->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
3958 }
3959
3960 /* Finally, get rid of the xid and the record of the transaction itself. */
3961 if (!partial)
3962 {
3963 if (sxidtag.xid != InvalidTransactionId)
3966 }
3967
3969}
3970
3971/*
3972 * Tests whether the given top level transaction is concurrent with
3973 * (overlaps) our current transaction.
3974 *
3975 * We need to identify the top level transaction for SSI, anyway, so pass
3976 * that to this function to save the overhead of checking the snapshot's
3977 * subxip array.
3978 */
3979static bool
3981{
3982 Snapshot snap;
3983
3986
3988
3989 if (TransactionIdPrecedes(xid, snap->xmin))
3990 return false;
3991
3992 if (TransactionIdFollowsOrEquals(xid, snap->xmax))
3993 return true;
3994
3995 return pg_lfind32(xid, snap->xip, snap->xcnt);
3996}
3997
3998bool
4000{
4001 if (!SerializationNeededForRead(relation, snapshot))
4002 return false;
4003
4004 /* Check if someone else has already decided that we need to die */
4006 {
4007 ereport(ERROR,
4009 errmsg("could not serialize access due to read/write dependencies among transactions"),
4010 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
4011 errhint("The transaction might succeed if retried.")));
4012 }
4013
4014 return true;
4015}
4016
4017/*
4018 * CheckForSerializableConflictOut
4019 * A table AM is reading a tuple that has been modified. If it determines
4020 * that the tuple version it is reading is not visible to us, it should
4021 * pass in the top level xid of the transaction that created it.
4022 * Otherwise, if it determines that it is visible to us but it has been
4023 * deleted or there is a newer version available due to an update, it
4024 * should pass in the top level xid of the modifying transaction.
4025 *
4026 * This function will check for overlap with our own transaction. If the given
4027 * xid is also serializable and the transactions overlap (i.e., they cannot see
4028 * each other's writes), then we have a conflict out.
4029 */
4030void
4032{
4036
4037 if (!SerializationNeededForRead(relation, snapshot))
4038 return;
4039
4040 /* Check if someone else has already decided that we need to die */
4042 {
4043 ereport(ERROR,
4045 errmsg("could not serialize access due to read/write dependencies among transactions"),
4046 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
4047 errhint("The transaction might succeed if retried.")));
4048 }
4050
4052 return;
4053
4054 /*
4055 * Find sxact or summarized info for the top level xid.
4056 */
4057 sxidtag.xid = xid;
4059 sxid = (SERIALIZABLEXID *)
4061 if (!sxid)
4062 {
4063 /*
4064 * Transaction not found in "normal" SSI structures. Check whether it
4065 * got pushed out to SLRU storage for "old committed" transactions.
4066 */
4068
4070 if (conflictCommitSeqNo != 0)
4071 {
4076 ereport(ERROR,
4078 errmsg("could not serialize access due to read/write dependencies among transactions"),
4079 errdetail_internal("Reason code: Canceled on conflict out to old pivot %u.", xid),
4080 errhint("The transaction might succeed if retried.")));
4081
4084 ereport(ERROR,
4086 errmsg("could not serialize access due to read/write dependencies among transactions"),
4087 errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction %u.", xid),
4088 errhint("The transaction might succeed if retried.")));
4089
4091 }
4092
4093 /* It's not serializable or otherwise not important. */
4095 return;
4096 }
4097 sxact = sxid->myXact;
4098 Assert(TransactionIdEquals(sxact->topXid, xid));
4100 {
4101 /* Can't conflict with ourself or a transaction that will roll back. */
4103 return;
4104 }
4105
4106 /*
4107 * We have a conflict out to a transaction which has a conflict out to a
4108 * summarized transaction. That summarized transaction must have
4109 * committed first, and we can't tell when it committed in relation to our
4110 * snapshot acquisition, so something needs to be canceled.
4111 */
4113 {
4114 if (!SxactIsPrepared(sxact))
4115 {
4116 sxact->flags |= SXACT_FLAG_DOOMED;
4118 return;
4119 }
4120 else
4121 {
4123 ereport(ERROR,
4125 errmsg("could not serialize access due to read/write dependencies among transactions"),
4126 errdetail_internal("Reason code: Canceled on conflict out to old pivot."),
4127 errhint("The transaction might succeed if retried.")));
4128 }
4129 }
4130
4131 /*
4132 * If this is a read-only transaction and the writing transaction has
4133 * committed, and it doesn't have a rw-conflict to a transaction which
4134 * committed before it, no conflict.
4135 */
4140 || MySerializableXact->SeqNo.lastCommitBeforeSnapshot < sxact->SeqNo.earliestOutConflictCommit))
4141 {
4142 /* Read-only transaction will appear to run first. No conflict. */
4144 return;
4145 }
4146
4147 if (!XidIsConcurrent(xid))
4148 {
4149 /* This write was already in our snapshot; no conflict. */
4151 return;
4152 }
4153
4155 {
4156 /* We don't want duplicate conflict records in the list. */
4158 return;
4159 }
4160
4161 /*
4162 * Flag the conflict. But first, if this conflict creates a dangerous
4163 * structure, ereport an error.
4164 */
4167}
4168
4169/*
4170 * Check a particular target for rw-dependency conflict in. A subroutine of
4171 * CheckForSerializableConflictIn().
4172 */
4173static void
4175{
4178 PREDICATELOCKTARGET *target;
4181 dlist_mutable_iter iter;
4182
4184
4185 /*
4186 * The same hash and LW lock apply to the lock target and the lock itself.
4187 */
4191 target = (PREDICATELOCKTARGET *)
4194 HASH_FIND, NULL);
4195 if (!target)
4196 {
4197 /* Nothing has this target locked; we're done here. */
4199 return;
4200 }
4201
4202 /*
4203 * Each lock for an overlapping transaction represents a conflict: a
4204 * rw-dependency in to this transaction.
4205 */
4207
4208 dlist_foreach_modify(iter, &target->predicateLocks)
4209 {
4211 dlist_container(PREDICATELOCK, targetLink, iter.cur);
4212 SERIALIZABLEXACT *sxact = predlock->tag.myXact;
4213
4215 {
4216 /*
4217 * If we're getting a write lock on a tuple, we don't need a
4218 * predicate (SIREAD) lock on the same tuple. We can safely remove
4219 * our SIREAD lock, but we'll defer doing so until after the loop
4220 * because that requires upgrading to an exclusive partition lock.
4221 *
4222 * We can't use this optimization within a subtransaction because
4223 * the subtransaction could roll back, and we would be left
4224 * without any lock at the top level.
4225 */
4226 if (!IsSubTransaction()
4228 {
4230 mypredlocktag = predlock->tag;
4231 }
4232 }
4233 else if (!SxactIsDoomed(sxact)
4236 sxact->finishedBefore))
4238 {
4241
4242 /*
4243 * Re-check after getting exclusive lock because the other
4244 * transaction may have flagged a conflict.
4245 */
4246 if (!SxactIsDoomed(sxact)
4249 sxact->finishedBefore))
4251 {
4253 }
4254
4257 }
4258 }
4261
4262 /*
4263 * If we found one of our own SIREAD locks to remove, remove it now.
4264 *
4265 * At this point our transaction already has a RowExclusiveLock on the
4266 * relation, so we are OK to drop the predicate lock on the tuple, if
4267 * found, without fearing that another write against the tuple will occur
4268 * before the MVCC information makes it to the buffer.
4269 */
4270 if (mypredlock != NULL)
4271 {
4274
4276 if (IsInParallelMode())
4280
4281 /*
4282 * Remove the predicate lock from shared memory, if it wasn't removed
4283 * while the locks were released. One way that could happen is from
4284 * autovacuum cleaning up an index.
4285 */
4292 HASH_FIND, NULL);
4293 if (rmpredlock != NULL)
4294 {
4296
4297 dlist_delete(&(mypredlock->targetLink));
4298 dlist_delete(&(mypredlock->xactLink));
4299
4304 HASH_REMOVE, NULL);
4306
4308 }
4309
4312 if (IsInParallelMode())
4315
4316 if (rmpredlock != NULL)
4317 {
4318 /*
4319 * Remove entry in local lock table if it exists. It's OK if it
4320 * doesn't exist; that means the lock was transferred to a new
4321 * target by a different backend.
4322 */
4325 HASH_REMOVE, NULL);
4326
4328 }
4329 }
4330}
4331
4332/*
4333 * CheckForSerializableConflictIn
4334 * We are writing the given tuple. If that indicates a rw-conflict
4335 * in from another serializable transaction, take appropriate action.
4336 *
4337 * Skip checking for any granularity for which a parameter is missing.
4338 *
4339 * A tuple update or delete is in conflict if we have a predicate lock
4340 * against the relation or page in which the tuple exists, or against the
4341 * tuple itself.
4342 */
4343void
4345{
4347
4348 if (!SerializationNeededForWrite(relation))
4349 return;
4350
4351 /* Check if someone else has already decided that we need to die */
4353 ereport(ERROR,
4355 errmsg("could not serialize access due to read/write dependencies among transactions"),
4356 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict in checking."),
4357 errhint("The transaction might succeed if retried.")));
4358
4359 /*
4360 * We're doing a write which might cause rw-conflicts now or later.
4361 * Memorize that fact.
4362 */
4363 MyXactDidWrite = true;
4364
4365 /*
4366 * It is important that we check for locks from the finest granularity to
4367 * the coarsest granularity, so that granularity promotion doesn't cause
4368 * us to miss a lock. The new (coarser) lock will be acquired before the
4369 * old (finer) locks are released.
4370 *
4371 * It is not possible to take and hold a lock across the checks for all
4372 * granularities because each target could be in a separate partition.
4373 */
4374 if (tid != NULL)
4375 {
4377 relation->rd_locator.dbOid,
4378 relation->rd_id,
4382 }
4383
4384 if (blkno != InvalidBlockNumber)
4385 {
4387 relation->rd_locator.dbOid,
4388 relation->rd_id,
4389 blkno);
4391 }
4392
4394 relation->rd_locator.dbOid,
4395 relation->rd_id);
4397}
4398
4399/*
4400 * CheckTableForSerializableConflictIn
4401 * The entire table is going through a DDL-style logical mass delete
4402 * like TRUNCATE or DROP TABLE. If that causes a rw-conflict in from
4403 * another serializable transaction, take appropriate action.
4404 *
4405 * While these operations do not operate entirely within the bounds of
4406 * snapshot isolation, they can occur inside a serializable transaction, and
4407 * will logically occur after any reads which saw rows which were destroyed
4408 * by these operations, so we do what we can to serialize properly under
4409 * SSI.
4410 *
4411 * The relation passed in must be a heap relation. Any predicate lock of any
4412 * granularity on the heap will cause a rw-conflict in to this transaction.
4413 * Predicate locks on indexes do not matter because they only exist to guard
4414 * against conflicting inserts into the index, and this is a mass *delete*.
4415 * When a table is truncated or dropped, the index will also be truncated
4416 * or dropped, and we'll deal with locks on the index when that happens.
4417 *
4418 * Dropping or truncating a table also needs to drop any existing predicate
4419 * locks on heap tuples or pages, because they're about to go away. This
4420 * should be done before altering the predicate locks because the transaction
4421 * could be rolled back because of a conflict, in which case the lock changes
4422 * are not needed. (At the moment, we don't actually bother to drop the
4423 * existing locks on a dropped or truncated table at the moment. That might
4424 * lead to some false positives, but it doesn't seem worth the trouble.)
4425 */
4426void
4428{
4430 PREDICATELOCKTARGET *target;
4431 Oid dbId;
4432 Oid heapId;
4433 int i;
4434
4435 /*
4436 * Bail out quickly if there are no serializable transactions running.
4437 * It's safe to check this without taking locks because the caller is
4438 * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
4439 * would matter here can be acquired while that is held.
4440 */
4442 return;
4443
4444 if (!SerializationNeededForWrite(relation))
4445 return;
4446
4447 /*
4448 * We're doing a write which might cause rw-conflicts now or later.
4449 * Memorize that fact.
4450 */
4451 MyXactDidWrite = true;
4452
4453 Assert(relation->rd_index == NULL); /* not an index relation */
4454
4455 dbId = relation->rd_locator.dbOid;
4456 heapId = relation->rd_id;
4457
4459 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
4462
4463 /* Scan through target list */
4465
4466 while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
4467 {
4468 dlist_mutable_iter iter;
4469
4470 /*
4471 * Check whether this is a target which needs attention.
4472 */
4474 continue; /* wrong relation id */
4475 if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId)
4476 continue; /* wrong database id */
4477
4478 /*
4479 * Loop through locks for this target and flag conflicts.
4480 */
4481 dlist_foreach_modify(iter, &target->predicateLocks)
4482 {
4484 dlist_container(PREDICATELOCK, targetLink, iter.cur);
4485
4486 if (predlock->tag.myXact != MySerializableXact
4488 {
4490 }
4491 }
4492 }
4493
4494 /* Release locks in reverse order */
4496 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
4499}
4500
4501
4502/*
4503 * Flag a rw-dependency between two serializable transactions.
4504 *
4505 * The caller is responsible for ensuring that we have a LW lock on
4506 * the transaction hash table.
4507 */
4508static void
4510{
4511 Assert(reader != writer);
4512
4513 /* First, see if this conflict causes failure. */
4515
4516 /* Actually do the conflict flagging. */
4517 if (reader == OldCommittedSxact)
4519 else if (writer == OldCommittedSxact)
4521 else
4522 SetRWConflict(reader, writer);
4523}
4524
4525/*----------------------------------------------------------------------------
4526 * We are about to add a RW-edge to the dependency graph - check that we don't
4527 * introduce a dangerous structure by doing so, and abort one of the
4528 * transactions if so.
4529 *
4530 * A serialization failure can only occur if there is a dangerous structure
4531 * in the dependency graph:
4532 *
4533 * Tin ------> Tpivot ------> Tout
4534 * rw rw
4535 *
4536 * Furthermore, Tout must commit first.
4537 *
4538 * One more optimization is that if Tin is declared READ ONLY (or commits
4539 * without writing), we can only have a problem if Tout committed before Tin
4540 * acquired its snapshot.
4541 *----------------------------------------------------------------------------
4542 */
4543static void
4546{
4547 bool failure;
4548
4550
4551 failure = false;
4552
4553 /*------------------------------------------------------------------------
4554 * Check for already-committed writer with rw-conflict out flagged
4555 * (conflict-flag on W means that T2 committed before W):
4556 *
4557 * R ------> W ------> T2
4558 * rw rw
4559 *
4560 * That is a dangerous structure, so we must abort. (Since the writer
4561 * has already committed, we must be the reader)
4562 *------------------------------------------------------------------------
4563 */
4566 failure = true;
4567
4568 /*------------------------------------------------------------------------
4569 * Check whether the writer has become a pivot with an out-conflict
4570 * committed transaction (T2), and T2 committed first:
4571 *
4572 * R ------> W ------> T2
4573 * rw rw
4574 *
4575 * Because T2 must've committed first, there is no anomaly if:
4576 * - the reader committed before T2
4577 * - the writer committed before T2
4578 * - the reader is a READ ONLY transaction and the reader was concurrent
4579 * with T2 (= reader acquired its snapshot before T2 committed)
4580 *
4581 * We also handle the case that T2 is prepared but not yet committed
4582 * here. In that case T2 has already checked for conflicts, so if it
4583 * commits first, making the above conflict real, it's too late for it
4584 * to abort.
4585 *------------------------------------------------------------------------
4586 */
4588 failure = true;
4589 else if (!failure)
4590 {
4591 dlist_iter iter;
4592
4593 dlist_foreach(iter, &writer->outConflicts)
4594 {
4596 dlist_container(RWConflictData, outLink, iter.cur);
4597 SERIALIZABLEXACT *t2 = conflict->sxactIn;
4598
4599 if (SxactIsPrepared(t2)
4600 && (!SxactIsCommitted(reader)
4601 || t2->prepareSeqNo <= reader->commitSeqNo)
4603 || t2->prepareSeqNo <= writer->commitSeqNo)
4604 && (!SxactIsReadOnly(reader)
4605 || t2->prepareSeqNo <= reader->SeqNo.lastCommitBeforeSnapshot))
4606 {
4607 failure = true;
4608 break;
4609 }
4610 }
4611 }
4612
4613 /*------------------------------------------------------------------------
4614 * Check whether the reader has become a pivot with a writer
4615 * that's committed (or prepared):
4616 *
4617 * T0 ------> R ------> W
4618 * rw rw
4619 *
4620 * Because W must've committed first for an anomaly to occur, there is no
4621 * anomaly if:
4622 * - T0 committed before the writer
4623 * - T0 is READ ONLY, and overlaps the writer
4624 *------------------------------------------------------------------------
4625 */
4626 if (!failure && SxactIsPrepared(writer) && !SxactIsReadOnly(reader))
4627 {
4628 if (SxactHasSummaryConflictIn(reader))
4629 {
4630 failure = true;
4631 }
4632 else
4633 {
4634 dlist_iter iter;
4635
4636 /*
4637 * The unconstify is needed as we have no const version of
4638 * dlist_foreach().
4639 */
4640 dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->inConflicts)
4641 {
4642 const RWConflict conflict =
4643 dlist_container(RWConflictData, inLink, iter.cur);
4644 const SERIALIZABLEXACT *t0 = conflict->sxactOut;
4645
4646 if (!SxactIsDoomed(t0)
4647 && (!SxactIsCommitted(t0)
4648 || t0->commitSeqNo >= writer->prepareSeqNo)
4649 && (!SxactIsReadOnly(t0)
4650 || t0->SeqNo.lastCommitBeforeSnapshot >= writer->prepareSeqNo))
4651 {
4652 failure = true;
4653 break;
4654 }
4655 }
4656 }
4657 }
4658
4659 if (failure)
4660 {
4661 /*
4662 * We have to kill a transaction to avoid a possible anomaly from
4663 * occurring. If the writer is us, we can just ereport() to cause a
4664 * transaction abort. Otherwise we flag the writer for termination,
4665 * causing it to abort when it tries to commit. However, if the writer
4666 * is a prepared transaction, already prepared, we can't abort it
4667 * anymore, so we have to kill the reader instead.
4668 */
4670 {
4672 ereport(ERROR,
4674 errmsg("could not serialize access due to read/write dependencies among transactions"),
4675 errdetail_internal("Reason code: Canceled on identification as a pivot, during write."),
4676 errhint("The transaction might succeed if retried.")));
4677 }
4678 else if (SxactIsPrepared(writer))
4679 {
4681
4682 /* if we're not the writer, we have to be the reader */
4683 Assert(MySerializableXact == reader);
4684 ereport(ERROR,
4686 errmsg("could not serialize access due to read/write dependencies among transactions"),
4687 errdetail_internal("Reason code: Canceled on conflict out to pivot %u, during read.", writer->topXid),
4688 errhint("The transaction might succeed if retried.")));
4689 }
4690 writer->flags |= SXACT_FLAG_DOOMED;
4691 }
4692}
4693
4694/*
4695 * PreCommit_CheckForSerializationFailure
4696 * Check for dangerous structures in a serializable transaction
4697 * at commit.
4698 *
4699 * We're checking for a dangerous structure as each conflict is recorded.
4700 * The only way we could have a problem at commit is if this is the "out"
4701 * side of a pivot, and neither the "in" side nor the pivot has yet
4702 * committed.
4703 *
4704 * If a dangerous structure is found, the pivot (the near conflict) is
4705 * marked for death, because rolling back another transaction might mean
4706 * that we fail without ever making progress. This transaction is
4707 * committing writes, so letting it commit ensures progress. If we
4708 * canceled the far conflict, it might immediately fail again on retry.
4709 */
4710void
4712{
4714
4716 return;
4717
4719
4721
4722 /*
4723 * Check if someone else has already decided that we need to die. Since
4724 * we set our own DOOMED flag when partially releasing, ignore in that
4725 * case.
4726 */
4729 {
4731 ereport(ERROR,
4733 errmsg("could not serialize access due to read/write dependencies among transactions"),
4734 errdetail_internal("Reason code: Canceled on identification as a pivot, during commit attempt."),
4735 errhint("The transaction might succeed if retried.")));
4736 }
4737
4739 {
4742
4743 if (!SxactIsCommitted(nearConflict->sxactOut)
4744 && !SxactIsDoomed(nearConflict->sxactOut))
4745 {
4747
4748 dlist_foreach(far_iter, &nearConflict->sxactOut->inConflicts)
4749 {
4752
4753 if (farConflict->sxactOut == MySerializableXact
4754 || (!SxactIsCommitted(farConflict->sxactOut)
4755 && !SxactIsReadOnly(farConflict->sxactOut)
4756 && !SxactIsDoomed(farConflict->sxactOut)))
4757 {
4758 /*
4759 * Normally, we kill the pivot transaction to make sure we
4760 * make progress if the failing transaction is retried.
4761 * However, we can't kill it if it's already prepared, so
4762 * in that case we commit suicide instead.
4763 */
4764 if (SxactIsPrepared(nearConflict->sxactOut))
4765 {
4767 ereport(ERROR,
4769 errmsg("could not serialize access due to read/write dependencies among transactions"),
4770 errdetail_internal("Reason code: Canceled on commit attempt with conflict in from prepared pivot."),
4771 errhint("The transaction might succeed if retried.")));
4772 }
4773 nearConflict->sxactOut->flags |= SXACT_FLAG_DOOMED;
4774 break;
4775 }
4776 }
4777 }
4778 }
4779
4782
4784}
4785
4786/*------------------------------------------------------------------------*/
4787
4788/*
4789 * Two-phase commit support
4790 */
4791
4792/*
4793 * AtPrepare_Locks
4794 * Do the preparatory work for a PREPARE: make 2PC state file
4795 * records for all predicate locks currently held.
4796 */
4797void
4799{
4802 TwoPhasePredicateXactRecord *xactRecord;
4803 TwoPhasePredicateLockRecord *lockRecord;
4804 dlist_iter iter;
4805
4807 xactRecord = &(record.data.xactRecord);
4808 lockRecord = &(record.data.lockRecord);
4809
4811 return;
4812
4813 /* Generate an xact record for our SERIALIZABLEXACT */
4815 xactRecord->xmin = MySerializableXact->xmin;
4816 xactRecord->flags = MySerializableXact->flags;
4817
4818 /*
4819 * Note that we don't include the list of conflicts in our out in the
4820 * statefile, because new conflicts can be added even after the
4821 * transaction prepares. We'll just make a conservative assumption during
4822 * recovery instead.
4823 */
4824
4826 &record, sizeof(record));
4827
4828 /*
4829 * Generate a lock record for each lock.
4830 *
4831 * To do this, we need to walk the predicate lock list in our sxact rather
4832 * than using the local predicate lock table because the latter is not
4833 * guaranteed to be accurate.
4834 */
4836
4837 /*
4838 * No need to take sxact->perXactPredicateListLock in parallel mode
4839 * because there cannot be any parallel workers running while we are
4840 * preparing a transaction.
4841 */
4843
4844 dlist_foreach(iter, &sxact->predicateLocks)
4845 {
4847 dlist_container(PREDICATELOCK, xactLink, iter.cur);
4848
4850 lockRecord->target = predlock->tag.myTarget->tag;
4851
4853 &record, sizeof(record));
4854 }
4855
4857}
4858
4859/*
4860 * PostPrepare_Locks
4861 * Clean up after successful PREPARE. Unlike the non-predicate
4862 * lock manager, we do not need to transfer locks to a dummy
4863 * PGPROC because our SERIALIZABLEXACT will stay around
4864 * anyway. We only need to clean up our local state.
4865 */
4866void
4868{
4870 return;
4871
4873
4876
4879
4881 MyXactDidWrite = false;
4882}
4883
4884/*
4885 * PredicateLockTwoPhaseFinish
4886 * Release a prepared transaction's predicate locks once it
4887 * commits or aborts.
4888 */
4889void
4891{
4894
4896
4898 sxid = (SERIALIZABLEXID *)
4901
4902 /* xid will not be found if it wasn't a serializable transaction */
4903 if (sxid == NULL)
4904 return;
4905
4906 /* Release its locks */
4907 MySerializableXact = sxid->myXact;
4908 MyXactDidWrite = true; /* conservatively assume that we wrote
4909 * something */
4911}
4912
4913/*
4914 * Re-acquire a predicate lock belonging to a transaction that was prepared.
4915 */
4916void
4918 void *recdata, uint32 len)
4919{
4922
4924
4925 record = (TwoPhasePredicateRecord *) recdata;
4926
4928 (record->type == TWOPHASEPREDICATERECORD_LOCK));
4929
4930 if (record->type == TWOPHASEPREDICATERECORD_XACT)
4931 {
4932 /* Per-transaction record. Set up a SERIALIZABLEXACT. */
4933 TwoPhasePredicateXactRecord *xactRecord;
4937 bool found;
4938
4939 xactRecord = (TwoPhasePredicateXactRecord *) &record->data.xactRecord;
4940
4943 if (!sxact)
4944 ereport(ERROR,
4946 errmsg("out of shared memory")));
4947
4948 /* vxid for a prepared xact is INVALID_PROC_NUMBER/xid; no pid */
4949 sxact->vxid.procNumber = INVALID_PROC_NUMBER;
4950 sxact->vxid.localTransactionId = (LocalTransactionId) xid;
4951 sxact->pid = 0;
4952 sxact->pgprocno = INVALID_PROC_NUMBER;
4953
4954 /* a prepared xact hasn't committed yet */
4955 sxact->prepareSeqNo = RecoverySerCommitSeqNo;
4956 sxact->commitSeqNo = InvalidSerCommitSeqNo;
4957 sxact->finishedBefore = InvalidTransactionId;
4958
4959 sxact->SeqNo.lastCommitBeforeSnapshot = RecoverySerCommitSeqNo;
4960
4961 /*
4962 * Don't need to track this; no transactions running at the time the
4963 * recovered xact started are still active, except possibly other
4964 * prepared xacts and we don't care whether those are RO_SAFE or not.
4965 */
4966 dlist_init(&(sxact->possibleUnsafeConflicts));
4967
4968 dlist_init(&(sxact->predicateLocks));
4969 dlist_node_init(&sxact->finishedLink);
4970
4971 sxact->topXid = xid;
4972 sxact->xmin = xactRecord->xmin;
4973 sxact->flags = xactRecord->flags;
4975 if (!SxactIsReadOnly(sxact))
4976 {
4980 }
4981
4982 /*
4983 * We don't know whether the transaction had any conflicts or not, so
4984 * we'll conservatively assume that it had both a conflict in and a
4985 * conflict out, and represent that with the summary conflict flags.
4986 */
4987 dlist_init(&(sxact->outConflicts));
4988 dlist_init(&(sxact->inConflicts));
4991
4992 /* Register the transaction's xid */
4993 sxidtag.xid = xid;
4995 &sxidtag,
4996 HASH_ENTER, &found);
4997 Assert(sxid != NULL);
4998 Assert(!found);
4999 sxid->myXact = sxact;
5000
5001 /*
5002 * Update global xmin. Note that this is a special case compared to
5003 * registering a normal transaction, because the global xmin might go
5004 * backwards. That's OK, because until recovery is over we're not
5005 * going to complete any transactions or create any non-prepared
5006 * transactions, so there's no danger of throwing away.
5007 */
5010 {
5014 }
5016 {
5019 }
5020
5022 }
5023 else if (record->type == TWOPHASEPREDICATERECORD_LOCK)
5024 {
5025 /* Lock record. Recreate the PREDICATELOCK */
5026 TwoPhasePredicateLockRecord *lockRecord;
5031
5032 lockRecord = (TwoPhasePredicateLockRecord *) &record->data.lockRecord;
5034
5036 sxidtag.xid = xid;
5037 sxid = (SERIALIZABLEXID *)
5040
5041 Assert(sxid != NULL);
5042 sxact = sxid->myXact;
5044
5046 }
5047}
5048
5049/*
5050 * Prepare to share the current SERIALIZABLEXACT with parallel workers.
5051 * Return a handle object that can be used by AttachSerializableXact() in a
5052 * parallel worker.
5053 */
5056{
5057 return MySerializableXact;
5058}
5059
5060/*
5061 * Allow parallel workers to import the leader's SERIALIZABLEXACT.
5062 */
5063void
5065{
5066
5068
5072}
bool ParallelContextActive(void)
Definition parallel.c:1033
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
#define unconstify(underlying_type, expr)
Definition c.h:1327
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:243
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
uint16_t uint16
Definition c.h:617
uint32_t uint32
Definition c.h:618
uint32 LocalTransactionId
Definition c.h:740
uint32 TransactionId
Definition c.h:738
size_t Size
Definition c.h:691
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
Size hash_estimate_size(int64 num_entries, Size entrysize)
Definition dynahash.c:783
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
Definition dynahash.c:358
void hash_destroy(HTAB *hashp)
Definition dynahash.c:865
void * hash_search_with_hash_value(HTAB *hashp, const void *keyPtr, uint32 hashvalue, HASHACTION action, bool *foundPtr)
Definition dynahash.c:965
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition dynahash.c:1415
int64 hash_get_num_entries(HTAB *hashp)
Definition dynahash.c:1336
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition dynahash.c:1380
int errcode(int sqlerrcode)
Definition elog.c:874
int int errdetail_internal(const char *fmt,...) pg_attribute_printf(1
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define DEBUG2
Definition elog.h:29
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
int MyProcPid
Definition globals.c:47
ProcNumber MyProcNumber
Definition globals.c:90
bool IsUnderPostmaster
Definition globals.c:120
int MaxBackends
Definition globals.c:146
int serializable_buffers
Definition globals.c:165
#define newval
GucSource
Definition guc.h:112
@ HASH_FIND
Definition hsearch.h:113
@ HASH_REMOVE
Definition hsearch.h:115
@ HASH_ENTER
Definition hsearch.h:114
@ HASH_ENTER_NULL
Definition hsearch.h:116
#define HASH_ELEM
Definition hsearch.h:95
#define HASH_FUNCTION
Definition hsearch.h:98
#define HASH_BLOBS
Definition hsearch.h:97
#define HASH_FIXED_SIZE
Definition hsearch.h:105
#define HASH_PARTITION
Definition hsearch.h:92
static dlist_node * dlist_pop_head_node(dlist_head *head)
Definition ilist.h:450
#define dlist_foreach(iter, lhead)
Definition ilist.h:623
static void dlist_init(dlist_head *head)
Definition ilist.h:314
#define dlist_head_element(type, membername, lhead)
Definition ilist.h:603
static void dlist_delete_thoroughly(dlist_node *node)
Definition ilist.h:416
static void dlist_delete(dlist_node *node)
Definition ilist.h:405
#define dlist_foreach_modify(iter, lhead)
Definition ilist.h:640
static bool dlist_is_empty(const dlist_head *head)
Definition ilist.h:336
static void dlist_push_tail(dlist_head *head, dlist_node *node)
Definition ilist.h:364
static void dlist_node_init(dlist_node *node)
Definition ilist.h:325
#define dlist_container(type, membername, ptr)
Definition ilist.h:593
#define IsParallelWorker()
Definition parallel.h:62
FILE * output
long val
Definition informix.c:689
static bool success
Definition initdb.c:188
int i
Definition isn.c:77
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
#define GET_VXID_FROM_PGPROC(vxid_dst, proc)
Definition lock.h:80
#define SetInvalidVirtualTransactionId(vxid)
Definition lock.h:77
bool LWLockHeldByMe(LWLock *lock)
Definition lwlock.c:1912
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1177
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition lwlock.c:1956
void LWLockRelease(LWLock *lock)
Definition lwlock.c:1794
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition lwlock.c:699
@ LW_SHARED
Definition lwlock.h:113
@ LW_EXCLUSIVE
Definition lwlock.h:112
#define NUM_PREDICATELOCK_PARTITIONS
Definition lwlock.h:99
#define InvalidPid
Definition miscadmin.h:32
static char * errmsg
#define SLRU_PAGES_PER_SEGMENT
const void size_t len
const void * data
static bool pg_lfind32(uint32 key, const uint32 *base, uint32 nelem)
Definition pg_lfind.h:153
static rewind_source * source
Definition pg_rewind.c:89
#define ERRCODE_T_R_SERIALIZATION_FAILURE
Definition pgbench.c:77
#define InvalidOid
unsigned int Oid
PredicateLockData * GetPredicateLockStatusData(void)
Definition predicate.c:1456
void CheckPointPredicate(void)
Definition predicate.c:1052
void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition predicate.c:3153
static void DecrementParentLocks(const PREDICATELOCKTARGETTAG *targettag)
Definition predicate.c:2400
static HTAB * PredicateLockHash
Definition predicate.c:399
static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT *activeXact)
Definition predicate.c:668
#define PredicateLockTargetTagHashCode(predicatelocktargettag)
Definition predicate.c:304
static void SetNewSxactGlobalXmin(void)
Definition predicate.c:3260
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4345
#define SerialPage(xid)
Definition predicate.c:344
static void ReleasePredXact(SERIALIZABLEXACT *sxact)
Definition predicate.c:598
void SetSerializableTransactionSnapshot(Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
Definition predicate.c:1731
static bool RWConflictExists(const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer)
Definition predicate.c:612
static bool PredicateLockingNeededForRelation(Relation relation)
Definition predicate.c:500
static bool SerializationNeededForRead(Relation relation, Snapshot snapshot)
Definition predicate.c:518
static Snapshot GetSafeSnapshot(Snapshot origSnapshot)
Definition predicate.c:1567
#define SxactIsCommitted(sxact)
Definition predicate.c:278
static SerialControl serialControl
Definition predicate.c:355
void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
Definition predicate.c:2608
#define SxactIsROUnsafe(sxact)
Definition predicate.c:293
static Snapshot GetSerializableTransactionSnapshotInt(Snapshot snapshot, VirtualTransactionId *sourcevxid, int sourcepid)
Definition predicate.c:1773
static LWLock * ScratchPartitionLock
Definition predicate.c:409
static void PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag)
Definition predicate.c:2526
#define SxactIsDeferrableWaiting(sxact)
Definition predicate.c:291
static void ReleasePredicateLocksLocal(void)
Definition predicate.c:3688
static HTAB * LocalPredicateLockHash
Definition predicate.c:415
int max_predicate_locks_per_page
Definition predicate.c:374
struct SerialControlData * SerialControl
Definition predicate.c:353
static PredXactList PredXact
Definition predicate.c:385
static void SetRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
Definition predicate.c:645
int GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
Definition predicate.c:1637
static uint32 ScratchTargetTagHash
Definition predicate.c:408
static void RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target, uint32 targettaghash)
Definition predicate.c:2192
static uint32 predicatelock_hash(const void *key, Size keysize)
Definition predicate.c:1430
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition predicate.c:4032
#define SxactIsReadOnly(sxact)
Definition predicate.c:282
#define SerialNextPage(page)
Definition predicate.c:338
static void DropAllPredicateLocksFromTable(Relation relation, bool transfer)
Definition predicate.c:2946
bool PageIsPredicateLocked(Relation relation, BlockNumber blkno)
Definition predicate.c:2017
static int serial_errdetail_for_io_error(const void *opaque_data)
Definition predicate.c:748
static void CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag, uint32 targettaghash, SERIALIZABLEXACT *sxact)
Definition predicate.c:2462
static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
Definition predicate.c:869
static void ClearOldPredicateLocks(void)
Definition predicate.c:3706
#define SxactHasSummaryConflictIn(sxact)
Definition predicate.c:283
static SERIALIZABLEXACT * CreatePredXact(void)
Definition predicate.c:584
static bool GetParentPredicateLockTag(const PREDICATELOCKTARGETTAG *tag, PREDICATELOCKTARGETTAG *parent)
Definition predicate.c:2081
#define PredicateLockHashCodeFromTargetHashCode(predicatelocktag, targethash)
Definition predicate.c:317
static void RestoreScratchTarget(bool lockheld)
Definition predicate.c:2170
#define SerialValue(slotno, xid)
Definition predicate.c:340
static void DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag)
Definition predicate.c:2223
static void DeleteLockTarget(PREDICATELOCKTARGET *target, uint32 targettaghash)
Definition predicate.c:2678
void PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit)
Definition predicate.c:4891
void predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition predicate.c:4918
static SERIALIZABLEXACT * OldCommittedSxact
Definition predicate.c:363
#define SxactHasConflictOut(sxact)
Definition predicate.c:290
static bool MyXactDidWrite
Definition predicate.c:423
static int MaxPredicateChildLocks(const PREDICATELOCKTARGETTAG *tag)
Definition predicate.c:2298
static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact)
Definition predicate.c:701
static void SerialInit(void)
Definition predicate.c:816
void CheckTableForSerializableConflictIn(Relation relation)
Definition predicate.c:4428
#define SxactIsPrepared(sxact)
Definition predicate.c:279
void AttachSerializableXact(SerializableXactHandle handle)
Definition predicate.c:5065
SerializableXactHandle ShareSerializableXact(void)
Definition predicate.c:5056
static bool PredicateLockExists(const PREDICATELOCKTARGETTAG *targettag)
Definition predicate.c:2054
static void RemoveScratchTarget(bool lockheld)
Definition predicate.c:2149
#define SxactIsOnFinishedList(sxact)
Definition predicate.c:268
#define SxactIsPartiallyReleased(sxact)
Definition predicate.c:294
static void SerialSetActiveSerXmin(TransactionId xid)
Definition predicate.c:1001
static dlist_head * FinishedSerializableTransactions
Definition predicate.c:400
static bool SerializationNeededForWrite(Relation relation)
Definition predicate.c:562
static HTAB * SerializableXidHash
Definition predicate.c:397
static bool CheckAndPromotePredicateLockRequest(const PREDICATELOCKTARGETTAG *reqtag)
Definition predicate.c:2335
void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition predicate.c:3238
static bool SerialPagePrecedesLogically(int64 page1, int64 page2)
Definition predicate.c:733
static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
Definition predicate.c:4175
int max_predicate_locks_per_relation
Definition predicate.c:373
#define SxactIsROSafe(sxact)
Definition predicate.c:292
void PreCommit_CheckForSerializationFailure(void)
Definition predicate.c:4712
void ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe)
Definition predicate.c:3321
static void FlagRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
Definition predicate.c:4510
static const PREDICATELOCKTARGETTAG ScratchTargetTag
Definition predicate.c:407
#define PredicateLockHashPartitionLockByIndex(i)
Definition predicate.c:262
static void OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
Definition predicate.c:4545
static bool CoarserLockCovers(const PREDICATELOCKTARGETTAG *newtargettag)
Definition predicate.c:2120
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition predicate.c:2585
static SERIALIZABLEXACT * MySerializableXact
Definition predicate.c:422
void PredicateLockShmemInit(void)
Definition predicate.c:1156
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition predicate.c:2630
Size PredicateLockShmemSize(void)
Definition predicate.c:1368
#define SxactIsDoomed(sxact)
Definition predicate.c:281
#define NPREDICATELOCKTARGETENTS()
Definition predicate.c:265
static SerCommitSeqNo SerialGetMinConflictCommitSeqNo(TransactionId xid)
Definition predicate.c:960
static void SummarizeOldestCommittedSxact(void)
Definition predicate.c:1512
bool check_serial_buffers(int *newval, void **extra, GucSource source)
Definition predicate.c:858
void PostPrepare_PredicateLocks(FullTransactionId fxid)
Definition predicate.c:4868
#define TargetTagIsCoveredBy(covered_target, covering_target)
Definition predicate.c:234
static RWConflictPoolHeader RWConflictPool
Definition predicate.c:391
static void ReleaseRWConflict(RWConflict conflict)
Definition predicate.c:693
static bool TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag, PREDICATELOCKTARGETTAG newtargettag, bool removeOld)
Definition predicate.c:2739
void AtPrepare_PredicateLocks(void)
Definition predicate.c:4799
void RegisterPredicateLockingXid(TransactionId xid)
Definition predicate.c:1968
#define PredicateLockHashPartitionLock(hashcode)
Definition predicate.c:259
#define SERIAL_ENTRIESPERPAGE
Definition predicate.c:331
static bool XidIsConcurrent(TransactionId xid)
Definition predicate.c:3981
static void ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial, bool summarize)
Definition predicate.c:3844
static HTAB * PredicateLockTargetHash
Definition predicate.c:398
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition predicate.c:4000
#define SxactIsRolledBack(sxact)
Definition predicate.c:280
static SERIALIZABLEXACT * SavedSerializableXact
Definition predicate.c:432
#define SxactHasSummaryConflictOut(sxact)
Definition predicate.c:284
void TransferPredicateLocksToHeapRelation(Relation relation)
Definition predicate.c:3132
static void CreateLocalPredicateLockHash(void)
Definition predicate.c:1949
#define SerialSlruCtl
Definition predicate.c:327
int max_predicate_locks_per_xact
Definition predicate.c:372
Snapshot GetSerializableTransactionSnapshot(Snapshot snapshot)
Definition predicate.c:1691
void * SerializableXactHandle
Definition predicate.h:34
#define RWConflictDataSize
#define SXACT_FLAG_DEFERRABLE_WAITING
#define SXACT_FLAG_SUMMARY_CONFLICT_IN
@ TWOPHASEPREDICATERECORD_XACT
@ TWOPHASEPREDICATERECORD_LOCK
#define FirstNormalSerCommitSeqNo
#define InvalidSerCommitSeqNo
@ PREDLOCKTAG_RELATION
@ PREDLOCKTAG_PAGE
@ PREDLOCKTAG_TUPLE
#define SXACT_FLAG_CONFLICT_OUT
#define PredXactListDataSize
#define SXACT_FLAG_READ_ONLY
#define SXACT_FLAG_DOOMED
#define GET_PREDICATELOCKTARGETTAG_DB(locktag)
#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag)
#define RWConflictPoolHeaderDataSize
#define InvalidSerializableXact
#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag, dboid, reloid, blocknum)
#define RecoverySerCommitSeqNo
struct RWConflictData * RWConflict
#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag)
#define SET_PREDICATELOCKTARGETTAG_RELATION(locktag, dboid, reloid)
uint64 SerCommitSeqNo
#define SXACT_FLAG_ROLLED_BACK
#define SXACT_FLAG_COMMITTED
#define SXACT_FLAG_RO_UNSAFE
#define SXACT_FLAG_PREPARED
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag, dboid, reloid, blocknum, offnum)
#define SXACT_FLAG_PARTIALLY_RELEASED
#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag)
#define SXACT_FLAG_RO_SAFE
#define SXACT_FLAG_SUMMARY_CONFLICT_OUT
#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag)
static int fb(int x)
Snapshot GetSnapshotData(Snapshot snapshot)
Definition procarray.c:2122
bool ProcArrayInstallImportedXmin(TransactionId xmin, VirtualTransactionId *sourcevxid)
Definition procarray.c:2479
#define INVALID_PROC_NUMBER
Definition procnumber.h:26
#define RelationUsesLocalBuffers(relation)
Definition rel.h:646
bool ShmemAddrIsValid(const void *addr)
Definition shmem.c:268
Size add_size(Size s1, Size s2)
Definition shmem.c:485
Size mul_size(Size s1, Size s2)
Definition shmem.c:500
HTAB * ShmemInitHash(const char *name, int64 init_size, int64 max_size, HASHCTL *infoP, int hash_flags)
Definition shmem.c:326
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition shmem.c:381
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
Definition slru.c:254
void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
Definition slru.c:1355
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, const void *opaque_data)
Definition slru.c:533
int SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
Definition slru.c:380
void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
Definition slru.c:1441
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, const void *opaque_data)
Definition slru.c:637
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition slru.c:200
bool check_slru_buffers(const char *name, int *newval)
Definition slru.c:360
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition slru.h:171
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition slru.h:196
Snapshot GetTransactionSnapshot(void)
Definition snapmgr.c:272
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:59
void ProcSendSignal(ProcNumber procNumber)
Definition proc.c:2003
PGPROC * MyProc
Definition proc.c:68
void ProcWaitForSignal(uint32 wait_event_info)
Definition proc.c:1991
Size keysize
Definition hsearch.h:75
HashValueFunc hash
Definition hsearch.h:78
Size entrysize
Definition hsearch.h:76
int64 num_partitions
Definition hsearch.h:68
Definition proc.h:176
SERIALIZABLEXACT * myXact
PREDICATELOCKTARGET * myTarget
PREDICATELOCKTARGETTAG tag
SerCommitSeqNo commitSeqNo
SERIALIZABLEXACT * element
SerCommitSeqNo LastSxactCommitSeqNo
SerCommitSeqNo CanPartialClearThrough
SERIALIZABLEXACT * OldCommittedSxact
SerCommitSeqNo HavePartialClearedThrough
TransactionId SxactGlobalXmin
Form_pg_index rd_index
Definition rel.h:192
Oid rd_id
Definition rel.h:113
RelFileLocator rd_locator
Definition rel.h:57
VirtualTransactionId vxid
SerCommitSeqNo lastCommitBeforeSnapshot
dlist_head possibleUnsafeConflicts
SerCommitSeqNo prepareSeqNo
SerCommitSeqNo commitSeqNo
union SERIALIZABLEXACT::@131 SeqNo
TransactionId finishedBefore
SerCommitSeqNo earliestOutConflictCommit
TransactionId headXid
Definition predicate.c:349
TransactionId tailXid
Definition predicate.c:350
TransactionId xmin
Definition snapshot.h:153
FullTransactionId nextXid
Definition transam.h:220
PREDICATELOCKTARGETTAG target
TwoPhasePredicateRecordType type
TwoPhasePredicateLockRecord lockRecord
union TwoPhasePredicateRecord::@132 data
TwoPhasePredicateXactRecord xactRecord
dlist_node * cur
Definition ilist.h:179
dlist_node * cur
Definition ilist.h:200
@ SYNC_HANDLER_NONE
Definition sync.h:42
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define FirstUnpinnedObjectId
Definition transam.h:196
#define InvalidTransactionId
Definition transam.h:31
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define XidFromFullTransactionId(x)
Definition transam.h:48
#define FirstNormalTransactionId
Definition transam.h:34
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition twophase.c:1274
int max_prepared_xacts
Definition twophase.c:117
#define TWOPHASE_RM_PREDICATELOCK_ID
TransamVariablesData * TransamVariables
Definition varsup.c:34
bool XactDeferrable
Definition xact.c:87
bool XactReadOnly
Definition xact.c:84
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:443
bool IsSubTransaction(void)
Definition xact.c:5067
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:943
bool IsInParallelMode(void)
Definition xact.c:1091
#define IsolationIsSerializable()
Definition xact.h:53
bool RecoveryInProgress(void)
Definition xlog.c:6444

◆ PredicateLockHashCodeFromTargetHashCode

#define PredicateLockHashCodeFromTargetHashCode (   predicatelocktag,
  targethash 
)
Value:
#define LOG2_NUM_PREDICATELOCK_PARTITIONS
Definition lwlock.h:98
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342

Definition at line 317 of file predicate.c.

◆ PredicateLockHashPartition

#define PredicateLockHashPartition (   hashcode)     ((hashcode) % NUM_PREDICATELOCK_PARTITIONS)

Definition at line 257 of file predicate.c.

◆ PredicateLockHashPartitionLock

#define PredicateLockHashPartitionLock (   hashcode)
Value:
LWLockPadded * MainLWLockArray
Definition lwlock.c:162
#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET
Definition lwlock.h:105
LWLock lock
Definition lwlock.h:70

Definition at line 259 of file predicate.c.

◆ PredicateLockHashPartitionLockByIndex

#define PredicateLockHashPartitionLockByIndex (   i)     (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)

Definition at line 262 of file predicate.c.

◆ PredicateLockTargetTagHashCode

Definition at line 304 of file predicate.c.

◆ SERIAL_ENTRIESPERPAGE

#define SERIAL_ENTRIESPERPAGE   (SERIAL_PAGESIZE / SERIAL_ENTRYSIZE)

Definition at line 331 of file predicate.c.

◆ SERIAL_ENTRYSIZE

#define SERIAL_ENTRYSIZE   sizeof(SerCommitSeqNo)

Definition at line 330 of file predicate.c.

◆ SERIAL_MAX_PAGE

#define SERIAL_MAX_PAGE   (MaxTransactionId / SERIAL_ENTRIESPERPAGE)

Definition at line 336 of file predicate.c.

◆ SERIAL_PAGESIZE

#define SERIAL_PAGESIZE   BLCKSZ

Definition at line 329 of file predicate.c.

◆ SerialNextPage

#define SerialNextPage (   page)    (((page) >= SERIAL_MAX_PAGE) ? 0 : (page) + 1)

Definition at line 338 of file predicate.c.

◆ SerialPage

#define SerialPage (   xid)    (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)

Definition at line 344 of file predicate.c.

◆ SerialSlruCtl

#define SerialSlruCtl   (&SerialSlruCtlData)

Definition at line 327 of file predicate.c.

◆ SerialValue

#define SerialValue (   slotno,
  xid 
)
Value:
(*((SerCommitSeqNo *) \
(SerialSlruCtl->shared->page_buffer[slotno] + \
#define SERIAL_ENTRYSIZE
Definition predicate.c:330

Definition at line 340 of file predicate.c.

◆ SxactHasConflictOut

#define SxactHasConflictOut (   sxact)    (((sxact)->flags & SXACT_FLAG_CONFLICT_OUT) != 0)

Definition at line 290 of file predicate.c.

◆ SxactHasSummaryConflictIn

#define SxactHasSummaryConflictIn (   sxact)    (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_IN) != 0)

Definition at line 283 of file predicate.c.

◆ SxactHasSummaryConflictOut

#define SxactHasSummaryConflictOut (   sxact)    (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_OUT) != 0)

Definition at line 284 of file predicate.c.

◆ SxactIsCommitted

#define SxactIsCommitted (   sxact)    (((sxact)->flags & SXACT_FLAG_COMMITTED) != 0)

Definition at line 278 of file predicate.c.

◆ SxactIsDeferrableWaiting

#define SxactIsDeferrableWaiting (   sxact)    (((sxact)->flags & SXACT_FLAG_DEFERRABLE_WAITING) != 0)

Definition at line 291 of file predicate.c.

◆ SxactIsDoomed

#define SxactIsDoomed (   sxact)    (((sxact)->flags & SXACT_FLAG_DOOMED) != 0)

Definition at line 281 of file predicate.c.

◆ SxactIsOnFinishedList

#define SxactIsOnFinishedList (   sxact)    (!dlist_node_is_detached(&(sxact)->finishedLink))

Definition at line 268 of file predicate.c.

◆ SxactIsPartiallyReleased

#define SxactIsPartiallyReleased (   sxact)    (((sxact)->flags & SXACT_FLAG_PARTIALLY_RELEASED) != 0)

Definition at line 294 of file predicate.c.

◆ SxactIsPrepared

#define SxactIsPrepared (   sxact)    (((sxact)->flags & SXACT_FLAG_PREPARED) != 0)

Definition at line 279 of file predicate.c.

◆ SxactIsReadOnly

#define SxactIsReadOnly (   sxact)    (((sxact)->flags & SXACT_FLAG_READ_ONLY) != 0)

Definition at line 282 of file predicate.c.

◆ SxactIsRolledBack

#define SxactIsRolledBack (   sxact)    (((sxact)->flags & SXACT_FLAG_ROLLED_BACK) != 0)

Definition at line 280 of file predicate.c.

◆ SxactIsROSafe

#define SxactIsROSafe (   sxact)    (((sxact)->flags & SXACT_FLAG_RO_SAFE) != 0)

Definition at line 292 of file predicate.c.

◆ SxactIsROUnsafe

#define SxactIsROUnsafe (   sxact)    (((sxact)->flags & SXACT_FLAG_RO_UNSAFE) != 0)

Definition at line 293 of file predicate.c.

◆ TargetTagIsCoveredBy

Typedef Documentation

◆ SerialControl

Definition at line 353 of file predicate.c.

◆ SerialControlData

Function Documentation

◆ AtPrepare_PredicateLocks()

void AtPrepare_PredicateLocks ( void  )

Definition at line 4799 of file predicate.c.

4800{
4803 TwoPhasePredicateXactRecord *xactRecord;
4804 TwoPhasePredicateLockRecord *lockRecord;
4805 dlist_iter iter;
4806
4808 xactRecord = &(record.data.xactRecord);
4809 lockRecord = &(record.data.lockRecord);
4810
4812 return;
4813
4814 /* Generate an xact record for our SERIALIZABLEXACT */
4816 xactRecord->xmin = MySerializableXact->xmin;
4817 xactRecord->flags = MySerializableXact->flags;
4818
4819 /*
4820 * Note that we don't include the list of conflicts in our out in the
4821 * statefile, because new conflicts can be added even after the
4822 * transaction prepares. We'll just make a conservative assumption during
4823 * recovery instead.
4824 */
4825
4827 &record, sizeof(record));
4828
4829 /*
4830 * Generate a lock record for each lock.
4831 *
4832 * To do this, we need to walk the predicate lock list in our sxact rather
4833 * than using the local predicate lock table because the latter is not
4834 * guaranteed to be accurate.
4835 */
4837
4838 /*
4839 * No need to take sxact->perXactPredicateListLock in parallel mode
4840 * because there cannot be any parallel workers running while we are
4841 * preparing a transaction.
4842 */
4844
4845 dlist_foreach(iter, &sxact->predicateLocks)
4846 {
4848 dlist_container(PREDICATELOCK, xactLink, iter.cur);
4849
4851 lockRecord->target = predlock->tag.myTarget->tag;
4852
4854 &record, sizeof(record));
4855 }
4856
4858}

References Assert, dlist_iter::cur, TwoPhasePredicateRecord::data, dlist_container, dlist_foreach, fb(), SERIALIZABLEXACT::flags, TwoPhasePredicateXactRecord::flags, InvalidSerializableXact, IsParallelWorker, TwoPhasePredicateRecord::lockRecord, LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, ParallelContextActive(), RegisterTwoPhaseRecord(), TwoPhasePredicateLockRecord::target, TWOPHASE_RM_PREDICATELOCK_ID, TWOPHASEPREDICATERECORD_LOCK, TWOPHASEPREDICATERECORD_XACT, TwoPhasePredicateRecord::type, TwoPhasePredicateRecord::xactRecord, SERIALIZABLEXACT::xmin, and TwoPhasePredicateXactRecord::xmin.

Referenced by PrepareTransaction().

◆ AttachSerializableXact()

◆ check_serial_buffers()

bool check_serial_buffers ( int newval,
void **  extra,
GucSource  source 
)

Definition at line 858 of file predicate.c.

859{
860 return check_slru_buffers("serializable_buffers", newval);
861}

References check_slru_buffers(), and newval.

◆ CheckAndPromotePredicateLockRequest()

static bool CheckAndPromotePredicateLockRequest ( const PREDICATELOCKTARGETTAG reqtag)
static

Definition at line 2335 of file predicate.c.

2336{
2338 nexttag,
2341 bool found,
2342 promote;
2343
2344 promote = false;
2345
2346 targettag = *reqtag;
2347
2348 /* check parents iteratively */
2350 {
2353 &targettag,
2354 HASH_ENTER,
2355 &found);
2356 if (!found)
2357 {
2358 parentlock->held = false;
2359 parentlock->childLocks = 1;
2360 }
2361 else
2362 parentlock->childLocks++;
2363
2364 if (parentlock->childLocks >
2366 {
2367 /*
2368 * We should promote to this parent lock. Continue to check its
2369 * ancestors, however, both to get their child counts right and to
2370 * check whether we should just go ahead and promote to one of
2371 * them.
2372 */
2374 promote = true;
2375 }
2376 }
2377
2378 if (promote)
2379 {
2380 /* acquire coarsest ancestor eligible for promotion */
2382 return true;
2383 }
2384 else
2385 return false;
2386}

References fb(), GetParentPredicateLockTag(), HASH_ENTER, hash_search(), LOCALPREDICATELOCK::held, LocalPredicateLockHash, MaxPredicateChildLocks(), and PredicateLockAcquire().

Referenced by PredicateLockAcquire().

◆ CheckForSerializableConflictIn()

void CheckForSerializableConflictIn ( Relation  relation,
const ItemPointerData tid,
BlockNumber  blkno 
)

Definition at line 4345 of file predicate.c.

4346{
4348
4349 if (!SerializationNeededForWrite(relation))
4350 return;
4351
4352 /* Check if someone else has already decided that we need to die */
4354 ereport(ERROR,
4356 errmsg("could not serialize access due to read/write dependencies among transactions"),
4357 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict in checking."),
4358 errhint("The transaction might succeed if retried.")));
4359
4360 /*
4361 * We're doing a write which might cause rw-conflicts now or later.
4362 * Memorize that fact.
4363 */
4364 MyXactDidWrite = true;
4365
4366 /*
4367 * It is important that we check for locks from the finest granularity to
4368 * the coarsest granularity, so that granularity promotion doesn't cause
4369 * us to miss a lock. The new (coarser) lock will be acquired before the
4370 * old (finer) locks are released.
4371 *
4372 * It is not possible to take and hold a lock across the checks for all
4373 * granularities because each target could be in a separate partition.
4374 */
4375 if (tid != NULL)
4376 {
4378 relation->rd_locator.dbOid,
4379 relation->rd_id,
4383 }
4384
4385 if (blkno != InvalidBlockNumber)
4386 {
4388 relation->rd_locator.dbOid,
4389 relation->rd_id,
4390 blkno);
4392 }
4393
4395 relation->rd_locator.dbOid,
4396 relation->rd_id);
4398}

References CheckTargetForConflictsIn(), RelFileLocator::dbOid, ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, fb(), InvalidBlockNumber, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), MySerializableXact, MyXactDidWrite, RelationData::rd_id, RelationData::rd_locator, SerializationNeededForWrite(), SET_PREDICATELOCKTARGETTAG_PAGE, SET_PREDICATELOCKTARGETTAG_RELATION, SET_PREDICATELOCKTARGETTAG_TUPLE, and SxactIsDoomed.

Referenced by _bt_check_unique(), _bt_doinsert(), _hash_doinsert(), ginEntryInsert(), ginFindLeafPage(), ginHeapTupleFastInsert(), gistinserttuples(), heap_delete(), heap_insert(), heap_multi_insert(), heap_update(), and index_insert().

◆ CheckForSerializableConflictOut()

void CheckForSerializableConflictOut ( Relation  relation,
TransactionId  xid,
Snapshot  snapshot 
)

Definition at line 4032 of file predicate.c.

4033{
4037
4038 if (!SerializationNeededForRead(relation, snapshot))
4039 return;
4040
4041 /* Check if someone else has already decided that we need to die */
4043 {
4044 ereport(ERROR,
4046 errmsg("could not serialize access due to read/write dependencies among transactions"),
4047 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
4048 errhint("The transaction might succeed if retried.")));
4049 }
4051
4053 return;
4054
4055 /*
4056 * Find sxact or summarized info for the top level xid.
4057 */
4058 sxidtag.xid = xid;
4060 sxid = (SERIALIZABLEXID *)
4062 if (!sxid)
4063 {
4064 /*
4065 * Transaction not found in "normal" SSI structures. Check whether it
4066 * got pushed out to SLRU storage for "old committed" transactions.
4067 */
4069
4071 if (conflictCommitSeqNo != 0)
4072 {
4077 ereport(ERROR,
4079 errmsg("could not serialize access due to read/write dependencies among transactions"),
4080 errdetail_internal("Reason code: Canceled on conflict out to old pivot %u.", xid),
4081 errhint("The transaction might succeed if retried.")));
4082
4085 ereport(ERROR,
4087 errmsg("could not serialize access due to read/write dependencies among transactions"),
4088 errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction %u.", xid),
4089 errhint("The transaction might succeed if retried.")));
4090
4092 }
4093
4094 /* It's not serializable or otherwise not important. */
4096 return;
4097 }
4098 sxact = sxid->myXact;
4099 Assert(TransactionIdEquals(sxact->topXid, xid));
4101 {
4102 /* Can't conflict with ourself or a transaction that will roll back. */
4104 return;
4105 }
4106
4107 /*
4108 * We have a conflict out to a transaction which has a conflict out to a
4109 * summarized transaction. That summarized transaction must have
4110 * committed first, and we can't tell when it committed in relation to our
4111 * snapshot acquisition, so something needs to be canceled.
4112 */
4114 {
4115 if (!SxactIsPrepared(sxact))
4116 {
4117 sxact->flags |= SXACT_FLAG_DOOMED;
4119 return;
4120 }
4121 else
4122 {
4124 ereport(ERROR,
4126 errmsg("could not serialize access due to read/write dependencies among transactions"),
4127 errdetail_internal("Reason code: Canceled on conflict out to old pivot."),
4128 errhint("The transaction might succeed if retried.")));
4129 }
4130 }
4131
4132 /*
4133 * If this is a read-only transaction and the writing transaction has
4134 * committed, and it doesn't have a rw-conflict to a transaction which
4135 * committed before it, no conflict.
4136 */
4141 || MySerializableXact->SeqNo.lastCommitBeforeSnapshot < sxact->SeqNo.earliestOutConflictCommit))
4142 {
4143 /* Read-only transaction will appear to run first. No conflict. */
4145 return;
4146 }
4147
4148 if (!XidIsConcurrent(xid))
4149 {
4150 /* This write was already in our snapshot; no conflict. */
4152 return;
4153 }
4154
4156 {
4157 /* We don't want duplicate conflict records in the list. */
4159 return;
4160 }
4161
4162 /*
4163 * Flag the conflict. But first, if this conflict creates a dangerous
4164 * structure, ereport an error.
4165 */
4168}

References Assert, dlist_is_empty(), ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, fb(), FlagRWConflict(), SERIALIZABLEXACT::flags, GetTopTransactionIdIfAny(), HASH_FIND, hash_search(), SERIALIZABLEXACT::inConflicts, InvalidSerCommitSeqNo, SERIALIZABLEXACT::lastCommitBeforeSnapshot, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, RWConflictExists(), SERIALIZABLEXACT::SeqNo, SerialGetMinConflictCommitSeqNo(), SerializableXidHash, SerializationNeededForRead(), SXACT_FLAG_DOOMED, SXACT_FLAG_SUMMARY_CONFLICT_OUT, SxactHasConflictOut, SxactHasSummaryConflictIn, SxactHasSummaryConflictOut, SxactIsCommitted, SxactIsDoomed, SxactIsPrepared, SxactIsReadOnly, TransactionIdEquals, TransactionIdIsValid, and XidIsConcurrent().

Referenced by HeapCheckForSerializableConflictOut().

◆ CheckForSerializableConflictOutNeeded()

bool CheckForSerializableConflictOutNeeded ( Relation  relation,
Snapshot  snapshot 
)

Definition at line 4000 of file predicate.c.

4001{
4002 if (!SerializationNeededForRead(relation, snapshot))
4003 return false;
4004
4005 /* Check if someone else has already decided that we need to die */
4007 {
4008 ereport(ERROR,
4010 errmsg("could not serialize access due to read/write dependencies among transactions"),
4011 errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
4012 errhint("The transaction might succeed if retried.")));
4013 }
4014
4015 return true;
4016}

References ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, MySerializableXact, SerializationNeededForRead(), and SxactIsDoomed.

Referenced by heap_prepare_pagescan(), and HeapCheckForSerializableConflictOut().

◆ CheckPointPredicate()

void CheckPointPredicate ( void  )

Definition at line 1052 of file predicate.c.

1053{
1055
1057
1058 /* Exit quickly if the SLRU is currently not in use. */
1059 if (serialControl->headPage < 0)
1060 {
1062 return;
1063 }
1064
1066 {
1068
1070
1071 /*
1072 * It is possible for the tailXid to be ahead of the headXid. This
1073 * occurs if we checkpoint while there are in-progress serializable
1074 * transaction(s) advancing the tail but we are yet to summarize the
1075 * transactions. In this case, we cutoff up to the headPage and the
1076 * next summary will advance the headXid.
1077 */
1079 {
1080 /* We can truncate the SLRU up to the page containing tailXid */
1082 }
1083 else
1085 }
1086 else
1087 {
1088 /*----------
1089 * The SLRU is no longer needed. Truncate to head before we set head
1090 * invalid.
1091 *
1092 * XXX: It's possible that the SLRU is not needed again until XID
1093 * wrap-around has happened, so that the segment containing headPage
1094 * that we leave behind will appear to be new again. In that case it
1095 * won't be removed until XID horizon advances enough to make it
1096 * current again.
1097 *
1098 * XXX: This should happen in vac_truncate_clog(), not in checkpoints.
1099 * Consider this scenario, starting from a system with no in-progress
1100 * transactions and VACUUM FREEZE having maximized oldestXact:
1101 * - Start a SERIALIZABLE transaction.
1102 * - Start, finish, and summarize a SERIALIZABLE transaction, creating
1103 * one SLRU page.
1104 * - Consume XIDs to reach xidStopLimit.
1105 * - Finish all transactions. Due to the long-running SERIALIZABLE
1106 * transaction, earlier checkpoints did not touch headPage. The
1107 * next checkpoint will change it, but that checkpoint happens after
1108 * the end of the scenario.
1109 * - VACUUM to advance XID limits.
1110 * - Consume ~2M XIDs, crossing the former xidWrapLimit.
1111 * - Start, finish, and summarize a SERIALIZABLE transaction.
1112 * SerialAdd() declines to create the targetPage, because headPage
1113 * is not regarded as in the past relative to that targetPage. The
1114 * transaction instigating the summarize fails in
1115 * SimpleLruReadPage().
1116 */
1118 serialControl->headPage = -1;
1119 }
1120
1122
1123 /*
1124 * Truncate away pages that are no longer required. Note that no
1125 * additional locking is required, because this is only called as part of
1126 * a checkpoint, and the validity limits have already been determined.
1127 */
1129
1130 /*
1131 * Write dirty SLRU pages to disk
1132 *
1133 * This is not actually necessary from a correctness point of view. We do
1134 * it merely as a debugging aid.
1135 *
1136 * We're doing this after the truncation to avoid writing pages right
1137 * before deleting the file in which they sit, which would be completely
1138 * pointless.
1139 */
1141}

References fb(), SerialControlData::headPage, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), serialControl, SerialPage, SerialPagePrecedesLogically(), SerialSlruCtl, SimpleLruTruncate(), SimpleLruWriteAll(), SerialControlData::tailXid, and TransactionIdIsValid.

Referenced by CheckPointGuts().

◆ CheckTableForSerializableConflictIn()

void CheckTableForSerializableConflictIn ( Relation  relation)

Definition at line 4428 of file predicate.c.

4429{
4431 PREDICATELOCKTARGET *target;
4432 Oid dbId;
4433 Oid heapId;
4434 int i;
4435
4436 /*
4437 * Bail out quickly if there are no serializable transactions running.
4438 * It's safe to check this without taking locks because the caller is
4439 * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
4440 * would matter here can be acquired while that is held.
4441 */
4443 return;
4444
4445 if (!SerializationNeededForWrite(relation))
4446 return;
4447
4448 /*
4449 * We're doing a write which might cause rw-conflicts now or later.
4450 * Memorize that fact.
4451 */
4452 MyXactDidWrite = true;
4453
4454 Assert(relation->rd_index == NULL); /* not an index relation */
4455
4456 dbId = relation->rd_locator.dbOid;
4457 heapId = relation->rd_id;
4458
4460 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
4463
4464 /* Scan through target list */
4466
4467 while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
4468 {
4469 dlist_mutable_iter iter;
4470
4471 /*
4472 * Check whether this is a target which needs attention.
4473 */
4475 continue; /* wrong relation id */
4476 if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId)
4477 continue; /* wrong database id */
4478
4479 /*
4480 * Loop through locks for this target and flag conflicts.
4481 */
4482 dlist_foreach_modify(iter, &target->predicateLocks)
4483 {
4485 dlist_container(PREDICATELOCK, targetLink, iter.cur);
4486
4487 if (predlock->tag.myXact != MySerializableXact
4489 {
4491 }
4492 }
4493 }
4494
4495 /* Release locks in reverse order */
4497 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
4500}

References Assert, dlist_mutable_iter::cur, RelFileLocator::dbOid, dlist_container, dlist_foreach_modify, fb(), FlagRWConflict(), GET_PREDICATELOCKTARGETTAG_DB, GET_PREDICATELOCKTARGETTAG_RELATION, hash_seq_init(), hash_seq_search(), i, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, MyXactDidWrite, NUM_PREDICATELOCK_PARTITIONS, PredicateLockHashPartitionLockByIndex, PREDICATELOCKTARGET::predicateLocks, PredicateLockTargetHash, PredXact, RelationData::rd_id, RelationData::rd_index, RelationData::rd_locator, RWConflictExists(), SerializationNeededForWrite(), PredXactListData::SxactGlobalXmin, PREDICATELOCKTARGET::tag, and TransactionIdIsValid.

Referenced by ExecuteTruncateGuts(), and heap_drop_with_catalog().

◆ CheckTargetForConflictsIn()

static void CheckTargetForConflictsIn ( PREDICATELOCKTARGETTAG targettag)
static

Definition at line 4175 of file predicate.c.

4176{
4179 PREDICATELOCKTARGET *target;
4182 dlist_mutable_iter iter;
4183
4185
4186 /*
4187 * The same hash and LW lock apply to the lock target and the lock itself.
4188 */
4192 target = (PREDICATELOCKTARGET *)
4195 HASH_FIND, NULL);
4196 if (!target)
4197 {
4198 /* Nothing has this target locked; we're done here. */
4200 return;
4201 }
4202
4203 /*
4204 * Each lock for an overlapping transaction represents a conflict: a
4205 * rw-dependency in to this transaction.
4206 */
4208
4209 dlist_foreach_modify(iter, &target->predicateLocks)
4210 {
4212 dlist_container(PREDICATELOCK, targetLink, iter.cur);
4213 SERIALIZABLEXACT *sxact = predlock->tag.myXact;
4214
4216 {
4217 /*
4218 * If we're getting a write lock on a tuple, we don't need a
4219 * predicate (SIREAD) lock on the same tuple. We can safely remove
4220 * our SIREAD lock, but we'll defer doing so until after the loop
4221 * because that requires upgrading to an exclusive partition lock.
4222 *
4223 * We can't use this optimization within a subtransaction because
4224 * the subtransaction could roll back, and we would be left
4225 * without any lock at the top level.
4226 */
4227 if (!IsSubTransaction()
4229 {
4231 mypredlocktag = predlock->tag;
4232 }
4233 }
4234 else if (!SxactIsDoomed(sxact)
4237 sxact->finishedBefore))
4239 {
4242
4243 /*
4244 * Re-check after getting exclusive lock because the other
4245 * transaction may have flagged a conflict.
4246 */
4247 if (!SxactIsDoomed(sxact)
4250 sxact->finishedBefore))
4252 {
4254 }
4255
4258 }
4259 }
4262
4263 /*
4264 * If we found one of our own SIREAD locks to remove, remove it now.
4265 *
4266 * At this point our transaction already has a RowExclusiveLock on the
4267 * relation, so we are OK to drop the predicate lock on the tuple, if
4268 * found, without fearing that another write against the tuple will occur
4269 * before the MVCC information makes it to the buffer.
4270 */
4271 if (mypredlock != NULL)
4272 {
4275
4277 if (IsInParallelMode())
4281
4282 /*
4283 * Remove the predicate lock from shared memory, if it wasn't removed
4284 * while the locks were released. One way that could happen is from
4285 * autovacuum cleaning up an index.
4286 */
4293 HASH_FIND, NULL);
4294 if (rmpredlock != NULL)
4295 {
4297
4298 dlist_delete(&(mypredlock->targetLink));
4299 dlist_delete(&(mypredlock->xactLink));
4300
4305 HASH_REMOVE, NULL);
4307
4309 }
4310
4313 if (IsInParallelMode())
4316
4317 if (rmpredlock != NULL)
4318 {
4319 /*
4320 * Remove entry in local lock table if it exists. It's OK if it
4321 * doesn't exist; that means the lock was transferred to a new
4322 * target by a different backend.
4323 */
4326 HASH_REMOVE, NULL);
4327
4329 }
4330 }
4331}

References Assert, dlist_mutable_iter::cur, DecrementParentLocks(), dlist_container, dlist_delete(), dlist_foreach_modify, fb(), FlagRWConflict(), GET_PREDICATELOCKTARGETTAG_OFFSET, GetTransactionSnapshot(), HASH_FIND, HASH_REMOVE, hash_search_with_hash_value(), InvalidSerializableXact, IsInParallelMode(), IsSubTransaction(), LocalPredicateLockHash, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, SERIALIZABLEXACT::perXactPredicateListLock, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, PREDICATELOCKTARGET::predicateLocks, PredicateLockTargetHash, PredicateLockTargetTagHashCode, RemoveTargetIfNoLongerUsed(), RWConflictExists(), SxactIsCommitted, SxactIsDoomed, and TransactionIdPrecedes().

Referenced by CheckForSerializableConflictIn().

◆ ClearOldPredicateLocks()

static void ClearOldPredicateLocks ( void  )
static

Definition at line 3706 of file predicate.c.

3707{
3708 dlist_mutable_iter iter;
3709
3710 /*
3711 * Loop through finished transactions. They are in commit order, so we can
3712 * stop as soon as we find one that's still interesting.
3713 */
3717 {
3719 dlist_container(SERIALIZABLEXACT, finishedLink, iter.cur);
3720
3724 {
3725 /*
3726 * This transaction committed before any in-progress transaction
3727 * took its snapshot. It's no longer interesting.
3728 */
3730 dlist_delete_thoroughly(&finishedSxact->finishedLink);
3733 }
3734 else if (finishedSxact->commitSeqNo > PredXact->HavePartialClearedThrough
3735 && finishedSxact->commitSeqNo <= PredXact->CanPartialClearThrough)
3736 {
3737 /*
3738 * Any active transactions that took their snapshot before this
3739 * transaction committed are read-only, so we can clear part of
3740 * its state.
3741 */
3743
3745 {
3746 /* A read-only transaction can be removed entirely */
3747 dlist_delete_thoroughly(&(finishedSxact->finishedLink));
3749 }
3750 else
3751 {
3752 /*
3753 * A read-write transaction can only be partially cleared. We
3754 * need to keep the SERIALIZABLEXACT but can release the
3755 * SIREAD locks and conflicts in.
3756 */
3758 }
3759
3762 }
3763 else
3764 {
3765 /* Still interesting. */
3766 break;
3767 }
3768 }
3770
3771 /*
3772 * Loop through predicate locks on dummy transaction for summarized data.
3773 */
3776 {
3778 dlist_container(PREDICATELOCK, xactLink, iter.cur);
3780
3782 Assert(predlock->commitSeqNo != 0);
3783 Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3786
3787 /*
3788 * If this lock originally belonged to an old enough transaction, we
3789 * can release it.
3790 */
3792 {
3793 PREDICATELOCKTAG tag;
3794 PREDICATELOCKTARGET *target;
3798
3799 tag = predlock->tag;
3800 target = tag.myTarget;
3801 targettag = target->tag;
3804
3806
3807 dlist_delete(&(predlock->targetLink));
3808 dlist_delete(&(predlock->xactLink));
3809
3813 HASH_REMOVE, NULL);
3815
3817 }
3818 }
3819
3822}

References Assert, PredXactListData::CanPartialClearThrough, dlist_mutable_iter::cur, dlist_container, dlist_delete(), dlist_delete_thoroughly(), dlist_foreach_modify, fb(), FinishedSerializableTransactions, HASH_REMOVE, hash_search_with_hash_value(), PredXactListData::HavePartialClearedThrough, InvalidSerCommitSeqNo, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PREDICATELOCKTAG::myTarget, OldCommittedSxact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, SERIALIZABLEXACT::predicateLocks, PredicateLockTargetTagHashCode, PredXact, ReleaseOneSerializableXact(), RemoveTargetIfNoLongerUsed(), PredXactListData::SxactGlobalXmin, SxactIsReadOnly, PREDICATELOCKTARGET::tag, TransactionIdIsValid, and TransactionIdPrecedesOrEquals().

Referenced by ReleasePredicateLocks().

◆ CoarserLockCovers()

static bool CoarserLockCovers ( const PREDICATELOCKTARGETTAG newtargettag)
static

Definition at line 2120 of file predicate.c.

2121{
2123 parenttag;
2124
2126
2127 /* check parents iteratively until no more */
2129 {
2132 return true;
2133 }
2134
2135 /* no more parents to check; lock is not covered */
2136 return false;
2137}

References fb(), GetParentPredicateLockTag(), and PredicateLockExists().

Referenced by PredicateLockAcquire().

◆ CreateLocalPredicateLockHash()

static void CreateLocalPredicateLockHash ( void  )
static

Definition at line 1949 of file predicate.c.

1950{
1952
1953 /* Initialize the backend-local hash table of parent locks */
1955 hash_ctl.keysize = sizeof(PREDICATELOCKTARGETTAG);
1956 hash_ctl.entrysize = sizeof(LOCALPREDICATELOCK);
1957 LocalPredicateLockHash = hash_create("Local predicate lock",
1959 &hash_ctl,
1961}

References Assert, fb(), HASH_BLOBS, hash_create(), HASH_ELEM, LocalPredicateLockHash, and max_predicate_locks_per_xact.

Referenced by AttachSerializableXact(), and GetSerializableTransactionSnapshotInt().

◆ CreatePredicateLock()

static void CreatePredicateLock ( const PREDICATELOCKTARGETTAG targettag,
uint32  targettaghash,
SERIALIZABLEXACT sxact 
)
static

Definition at line 2462 of file predicate.c.

2465{
2466 PREDICATELOCKTARGET *target;
2467 PREDICATELOCKTAG locktag;
2468 PREDICATELOCK *lock;
2470 bool found;
2471
2473
2475 if (IsInParallelMode())
2476 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2478
2479 /* Make sure that the target is represented. */
2480 target = (PREDICATELOCKTARGET *)
2483 HASH_ENTER_NULL, &found);
2484 if (!target)
2485 ereport(ERROR,
2487 errmsg("out of shared memory"),
2488 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2489 if (!found)
2490 dlist_init(&target->predicateLocks);
2491
2492 /* We've got the sxact and target, make sure they're joined. */
2493 locktag.myTarget = target;
2494 locktag.myXact = sxact;
2495 lock = (PREDICATELOCK *)
2498 HASH_ENTER_NULL, &found);
2499 if (!lock)
2500 ereport(ERROR,
2502 errmsg("out of shared memory"),
2503 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2504
2505 if (!found)
2506 {
2507 dlist_push_tail(&target->predicateLocks, &lock->targetLink);
2508 dlist_push_tail(&sxact->predicateLocks, &lock->xactLink);
2510 }
2511
2513 if (IsInParallelMode())
2514 LWLockRelease(&sxact->perXactPredicateListLock);
2516}

References PREDICATELOCK::commitSeqNo, dlist_init(), dlist_push_tail(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), HASH_ENTER_NULL, hash_search_with_hash_value(), InvalidSerCommitSeqNo, IsInParallelMode(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), PREDICATELOCKTAG::myTarget, PREDICATELOCKTAG::myXact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, PREDICATELOCKTARGET::predicateLocks, PredicateLockTargetHash, PREDICATELOCK::targetLink, and PREDICATELOCK::xactLink.

Referenced by predicatelock_twophase_recover(), and PredicateLockAcquire().

◆ CreatePredXact()

◆ DecrementParentLocks()

static void DecrementParentLocks ( const PREDICATELOCKTARGETTAG targettag)
static

Definition at line 2400 of file predicate.c.

2401{
2403 nexttag;
2404
2406
2408 {
2412
2418 HASH_FIND, NULL);
2419
2420 /*
2421 * There's a small chance the parent lock doesn't exist in the lock
2422 * table. This can happen if we prematurely removed it because an
2423 * index split caused the child refcount to be off.
2424 */
2425 if (parentlock == NULL)
2426 continue;
2427
2428 parentlock->childLocks--;
2429
2430 /*
2431 * Under similar circumstances the parent lock's refcount might be
2432 * zero. This only happens if we're holding that lock (otherwise we
2433 * would have removed the entry).
2434 */
2435 if (parentlock->childLocks < 0)
2436 {
2437 Assert(parentlock->held);
2438 parentlock->childLocks = 0;
2439 }
2440
2441 if ((parentlock->childLocks == 0) && (!parentlock->held))
2442 {
2446 HASH_REMOVE, NULL);
2448 }
2449 }
2450}

References Assert, fb(), GetParentPredicateLockTag(), HASH_FIND, HASH_REMOVE, hash_search_with_hash_value(), LocalPredicateLockHash, PG_USED_FOR_ASSERTS_ONLY, and PredicateLockTargetTagHashCode.

Referenced by CheckTargetForConflictsIn(), and DeleteChildTargetLocks().

◆ DeleteChildTargetLocks()

static void DeleteChildTargetLocks ( const PREDICATELOCKTARGETTAG newtargettag)
static

Definition at line 2223 of file predicate.c.

2224{
2227 dlist_mutable_iter iter;
2228
2231 if (IsInParallelMode())
2232 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2233
2234 dlist_foreach_modify(iter, &sxact->predicateLocks)
2235 {
2239
2240 predlock = dlist_container(PREDICATELOCK, xactLink, iter.cur);
2241
2242 oldlocktag = predlock->tag;
2243 Assert(oldlocktag.myXact == sxact);
2244 oldtarget = oldlocktag.myTarget;
2245 oldtargettag = oldtarget->tag;
2246
2248 {
2252
2255
2257
2258 dlist_delete(&predlock->xactLink);
2259 dlist_delete(&predlock->targetLink);
2262 &oldlocktag,
2265 HASH_REMOVE, NULL);
2267
2269
2271
2273 }
2274 }
2275 if (IsInParallelMode())
2276 LWLockRelease(&sxact->perXactPredicateListLock);
2278}

References Assert, dlist_mutable_iter::cur, DecrementParentLocks(), dlist_container, dlist_delete(), dlist_foreach_modify, fb(), HASH_REMOVE, hash_search_with_hash_value(), IsInParallelMode(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, PG_USED_FOR_ASSERTS_ONLY, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, PredicateLockTargetTagHashCode, RemoveTargetIfNoLongerUsed(), and TargetTagIsCoveredBy.

Referenced by PredicateLockAcquire().

◆ DeleteLockTarget()

static void DeleteLockTarget ( PREDICATELOCKTARGET target,
uint32  targettaghash 
)
static

◆ DropAllPredicateLocksFromTable()

static void DropAllPredicateLocksFromTable ( Relation  relation,
bool  transfer 
)
static

Definition at line 2946 of file predicate.c.

2947{
2951 Oid dbId;
2952 Oid relId;
2953 Oid heapId;
2954 int i;
2955 bool isIndex;
2956 bool found;
2958
2959 /*
2960 * Bail out quickly if there are no serializable transactions running.
2961 * It's safe to check this without taking locks because the caller is
2962 * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
2963 * would matter here can be acquired while that is held.
2964 */
2966 return;
2967
2968 if (!PredicateLockingNeededForRelation(relation))
2969 return;
2970
2971 dbId = relation->rd_locator.dbOid;
2972 relId = relation->rd_id;
2973 if (relation->rd_index == NULL)
2974 {
2975 isIndex = false;
2976 heapId = relId;
2977 }
2978 else
2979 {
2980 isIndex = true;
2981 heapId = relation->rd_index->indrelid;
2982 }
2984 Assert(transfer || !isIndex); /* index OID only makes sense with
2985 * transfer */
2986
2987 /* Retrieve first time needed, then keep. */
2989 heaptarget = NULL;
2990
2991 /* Acquire locks on all lock partitions */
2993 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
2996
2997 /*
2998 * Remove the dummy entry to give us scratch space, so we know we'll be
2999 * able to create the new lock target.
3000 */
3001 if (transfer)
3002 RemoveScratchTarget(true);
3003
3004 /* Scan through target map */
3006
3008 {
3009 dlist_mutable_iter iter;
3010
3011 /*
3012 * Check whether this is a target which needs attention.
3013 */
3015 continue; /* wrong relation id */
3016 if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId)
3017 continue; /* wrong database id */
3018 if (transfer && !isIndex
3020 continue; /* already the right lock */
3021
3022 /*
3023 * If we made it here, we have work to do. We make sure the heap
3024 * relation lock exists, then we walk the list of predicate locks for
3025 * the old target we found, moving all locks to the heap relation lock
3026 * -- unless they already hold that.
3027 */
3028
3029 /*
3030 * First make sure we have the heap relation target. We only need to
3031 * do this once.
3032 */
3033 if (transfer && heaptarget == NULL)
3034 {
3036
3042 HASH_ENTER, &found);
3043 if (!found)
3044 dlist_init(&heaptarget->predicateLocks);
3045 }
3046
3047 /*
3048 * Loop through all the locks on the old target, replacing them with
3049 * locks on the new target.
3050 */
3051 dlist_foreach_modify(iter, &oldtarget->predicateLocks)
3052 {
3054 dlist_container(PREDICATELOCK, targetLink, iter.cur);
3058
3059 /*
3060 * Remove the old lock first. This avoids the chance of running
3061 * out of lock structure entries for the hash table.
3062 */
3064 oldXact = oldpredlock->tag.myXact;
3065
3066 dlist_delete(&(oldpredlock->xactLink));
3067
3068 /*
3069 * No need for retail delete from oldtarget list, we're removing
3070 * the whole target anyway.
3071 */
3073 &oldpredlock->tag,
3074 HASH_REMOVE, &found);
3075 Assert(found);
3076
3077 if (transfer)
3078 {
3080
3082 newpredlocktag.myXact = oldXact;
3088 HASH_ENTER,
3089 &found);
3090 if (!found)
3091 {
3092 dlist_push_tail(&(heaptarget->predicateLocks),
3093 &(newpredlock->targetLink));
3094 dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
3095 &(newpredlock->xactLink));
3096 newpredlock->commitSeqNo = oldCommitSeqNo;
3097 }
3098 else
3099 {
3100 if (newpredlock->commitSeqNo < oldCommitSeqNo)
3101 newpredlock->commitSeqNo = oldCommitSeqNo;
3102 }
3103
3104 Assert(newpredlock->commitSeqNo != 0);
3105 Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
3106 || (newpredlock->tag.myXact == OldCommittedSxact));
3107 }
3108 }
3109
3111 &found);
3112 Assert(found);
3113 }
3114
3115 /* Put the scratch entry back */
3116 if (transfer)
3118
3119 /* Release locks in reverse order */
3121 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
3124}

References Assert, SERIALIZABLEXACT::commitSeqNo, dlist_mutable_iter::cur, RelFileLocator::dbOid, dlist_container, dlist_delete(), dlist_foreach_modify, dlist_init(), dlist_push_tail(), fb(), GET_PREDICATELOCKTARGETTAG_DB, GET_PREDICATELOCKTARGETTAG_RELATION, GET_PREDICATELOCKTARGETTAG_TYPE, HASH_ENTER, HASH_REMOVE, hash_search(), hash_search_with_hash_value(), hash_seq_init(), hash_seq_search(), i, InvalidOid, InvalidSerCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), PREDICATELOCKTAG::myTarget, NUM_PREDICATELOCK_PARTITIONS, OldCommittedSxact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLockByIndex, PredicateLockingNeededForRelation(), PredicateLockTargetHash, PredicateLockTargetTagHashCode, PREDLOCKTAG_RELATION, PredXact, RelationData::rd_id, RelationData::rd_index, RelationData::rd_locator, RemoveScratchTarget(), RestoreScratchTarget(), SET_PREDICATELOCKTARGETTAG_RELATION, PredXactListData::SxactGlobalXmin, and TransactionIdIsValid.

Referenced by TransferPredicateLocksToHeapRelation().

◆ FlagRWConflict()

static void FlagRWConflict ( SERIALIZABLEXACT reader,
SERIALIZABLEXACT writer 
)
static

Definition at line 4510 of file predicate.c.

4511{
4512 Assert(reader != writer);
4513
4514 /* First, see if this conflict causes failure. */
4516
4517 /* Actually do the conflict flagging. */
4518 if (reader == OldCommittedSxact)
4520 else if (writer == OldCommittedSxact)
4522 else
4523 SetRWConflict(reader, writer);
4524}

References Assert, fb(), SERIALIZABLEXACT::flags, OldCommittedSxact, OnConflict_CheckForSerializationFailure(), SetRWConflict(), SXACT_FLAG_SUMMARY_CONFLICT_IN, and SXACT_FLAG_SUMMARY_CONFLICT_OUT.

Referenced by CheckForSerializableConflictOut(), CheckTableForSerializableConflictIn(), and CheckTargetForConflictsIn().

◆ FlagSxactUnsafe()

static void FlagSxactUnsafe ( SERIALIZABLEXACT sxact)
static

Definition at line 701 of file predicate.c.

702{
704
707
708 sxact->flags |= SXACT_FLAG_RO_UNSAFE;
709
710 /*
711 * We know this isn't a safe snapshot, so we can stop looking for other
712 * potential conflicts.
713 */
714 dlist_foreach_modify(iter, &sxact->possibleUnsafeConflicts)
715 {
717 dlist_container(RWConflictData, inLink, iter.cur);
718
719 Assert(!SxactIsReadOnly(conflict->sxactOut));
720 Assert(sxact == conflict->sxactIn);
721
723 }
724}

References Assert, dlist_mutable_iter::cur, dlist_container, dlist_foreach_modify, fb(), ReleaseRWConflict(), SXACT_FLAG_RO_UNSAFE, SxactIsReadOnly, and SxactIsROSafe.

Referenced by ReleasePredicateLocks().

◆ GetParentPredicateLockTag()

static bool GetParentPredicateLockTag ( const PREDICATELOCKTARGETTAG tag,
PREDICATELOCKTARGETTAG parent 
)
static

Definition at line 2081 of file predicate.c.

2083{
2084 switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2085 {
2087 /* relation locks have no parent lock */
2088 return false;
2089
2090 case PREDLOCKTAG_PAGE:
2091 /* parent lock is relation lock */
2095
2096 return true;
2097
2098 case PREDLOCKTAG_TUPLE:
2099 /* parent lock is page lock */
2104 return true;
2105 }
2106
2107 /* not reachable */
2108 Assert(false);
2109 return false;
2110}

References Assert, GET_PREDICATELOCKTARGETTAG_DB, GET_PREDICATELOCKTARGETTAG_PAGE, GET_PREDICATELOCKTARGETTAG_RELATION, GET_PREDICATELOCKTARGETTAG_TYPE, PREDLOCKTAG_PAGE, PREDLOCKTAG_RELATION, PREDLOCKTAG_TUPLE, SET_PREDICATELOCKTARGETTAG_PAGE, and SET_PREDICATELOCKTARGETTAG_RELATION.

Referenced by CheckAndPromotePredicateLockRequest(), CoarserLockCovers(), DecrementParentLocks(), and PredicateLockPageSplit().

◆ GetPredicateLockStatusData()

PredicateLockData * GetPredicateLockStatusData ( void  )

Definition at line 1456 of file predicate.c.

1457{
1459 int i;
1460 int els,
1461 el;
1464
1466
1467 /*
1468 * To ensure consistency, take simultaneous locks on all partition locks
1469 * in ascending order, then SerializableXactHashLock.
1470 */
1471 for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
1474
1475 /* Get number of locks and allocate appropriately-sized arrays. */
1477 data->nelements = els;
1480
1481
1482 /* Scan through PredicateLockHash and copy contents */
1484
1485 el = 0;
1486
1488 {
1489 data->locktags[el] = predlock->tag.myTarget->tag;
1490 data->xacts[el] = *predlock->tag.myXact;
1491 el++;
1492 }
1493
1494 Assert(el == els);
1495
1496 /* Release locks in reverse order */
1498 for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
1500
1501 return data;
1502}

References Assert, data, fb(), hash_get_num_entries(), hash_seq_init(), hash_seq_search(), i, LW_SHARED, LWLockAcquire(), LWLockRelease(), NUM_PREDICATELOCK_PARTITIONS, palloc_array, palloc_object, PredicateLockHash, and PredicateLockHashPartitionLockByIndex.

Referenced by pg_lock_status().

◆ GetSafeSnapshot()

static Snapshot GetSafeSnapshot ( Snapshot  origSnapshot)
static

Definition at line 1567 of file predicate.c.

1568{
1569 Snapshot snapshot;
1570
1572
1573 while (true)
1574 {
1575 /*
1576 * GetSerializableTransactionSnapshotInt is going to call
1577 * GetSnapshotData, so we need to provide it the static snapshot area
1578 * our caller passed to us. The pointer returned is actually the same
1579 * one passed to it, but we avoid assuming that here.
1580 */
1582 NULL, InvalidPid);
1583
1585 return snapshot; /* no concurrent r/w xacts; it's safe */
1586
1588
1589 /*
1590 * Wait for concurrent transactions to finish. Stop early if one of
1591 * them marked us as conflicted.
1592 */
1596 {
1600 }
1602
1604 {
1606 break; /* success */
1607 }
1608
1610
1611 /* else, need to retry... */
1614 errmsg_internal("deferrable snapshot was unsafe; trying a new one")));
1615 ReleasePredicateLocks(false, false);
1616 }
1617
1618 /*
1619 * Now we have a safe snapshot, so we don't need to do any further checks.
1620 */
1622 ReleasePredicateLocks(false, true);
1623
1624 return snapshot;
1625}

References Assert, DEBUG2, dlist_is_empty(), ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errmsg_internal(), fb(), SERIALIZABLEXACT::flags, GetSerializableTransactionSnapshotInt(), InvalidPid, InvalidSerializableXact, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, SERIALIZABLEXACT::possibleUnsafeConflicts, ProcWaitForSignal(), ReleasePredicateLocks(), SXACT_FLAG_DEFERRABLE_WAITING, SxactIsROSafe, SxactIsROUnsafe, XactDeferrable, and XactReadOnly.

Referenced by GetSerializableTransactionSnapshot().

◆ GetSafeSnapshotBlockingPids()

int GetSafeSnapshotBlockingPids ( int  blocked_pid,
int output,
int  output_size 
)

Definition at line 1637 of file predicate.c.

1638{
1639 int num_written = 0;
1640 dlist_iter iter;
1642
1644
1645 /* Find blocked_pid's SERIALIZABLEXACT by linear search. */
1647 {
1649 dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
1650
1651 if (sxact->pid == blocked_pid)
1652 {
1654 break;
1655 }
1656 }
1657
1658 /* Did we find it, and is it currently waiting in GetSafeSnapshot? */
1660 {
1661 /* Traverse the list of possible unsafe conflicts collecting PIDs. */
1662 dlist_foreach(iter, &blocking_sxact->possibleUnsafeConflicts)
1663 {
1665 dlist_container(RWConflictData, inLink, iter.cur);
1666
1667 output[num_written++] = possibleUnsafeConflict->sxactOut->pid;
1668
1669 if (num_written >= output_size)
1670 break;
1671 }
1672 }
1673
1675
1676 return num_written;
1677}

References PredXactListData::activeList, dlist_iter::cur, dlist_container, dlist_foreach, fb(), LW_SHARED, LWLockAcquire(), LWLockRelease(), output, PredXact, and SxactIsDeferrableWaiting.

Referenced by pg_isolation_test_session_is_blocked(), and pg_safe_snapshot_blocking_pids().

◆ GetSerializableTransactionSnapshot()

Snapshot GetSerializableTransactionSnapshot ( Snapshot  snapshot)

Definition at line 1691 of file predicate.c.

1692{
1694
1695 /*
1696 * Can't use serializable mode while recovery is still active, as it is,
1697 * for example, on a hot standby. We could get here despite the check in
1698 * check_transaction_isolation() if default_transaction_isolation is set
1699 * to serializable, so phrase the hint accordingly.
1700 */
1701 if (RecoveryInProgress())
1702 ereport(ERROR,
1704 errmsg("cannot use serializable mode in a hot standby"),
1705 errdetail("\"default_transaction_isolation\" is set to \"serializable\"."),
1706 errhint("You can use \"SET default_transaction_isolation = 'repeatable read'\" to change the default.")));
1707
1708 /*
1709 * A special optimization is available for SERIALIZABLE READ ONLY
1710 * DEFERRABLE transactions -- we can wait for a suitable snapshot and
1711 * thereby avoid all SSI overhead once it's running.
1712 */
1714 return GetSafeSnapshot(snapshot);
1715
1717 NULL, InvalidPid);
1718}

References Assert, ereport, errcode(), errdetail(), errhint(), errmsg, ERROR, fb(), GetSafeSnapshot(), GetSerializableTransactionSnapshotInt(), InvalidPid, IsolationIsSerializable, RecoveryInProgress(), XactDeferrable, and XactReadOnly.

Referenced by GetTransactionSnapshot().

◆ GetSerializableTransactionSnapshotInt()

static Snapshot GetSerializableTransactionSnapshotInt ( Snapshot  snapshot,
VirtualTransactionId sourcevxid,
int  sourcepid 
)
static

Definition at line 1773 of file predicate.c.

1776{
1777 PGPROC *proc;
1780 *othersxact;
1781
1782 /* We only do this for serializable transactions. Once. */
1784
1786
1787 /*
1788 * Since all parts of a serializable transaction must use the same
1789 * snapshot, it is too late to establish one after a parallel operation
1790 * has begun.
1791 */
1792 if (IsInParallelMode())
1793 elog(ERROR, "cannot establish serializable snapshot during a parallel operation");
1794
1795 proc = MyProc;
1796 Assert(proc != NULL);
1797 GET_VXID_FROM_PGPROC(vxid, *proc);
1798
1799 /*
1800 * First we get the sxact structure, which may involve looping and access
1801 * to the "finished" list to free a structure for use.
1802 *
1803 * We must hold SerializableXactHashLock when taking/checking the snapshot
1804 * to avoid race conditions, for much the same reasons that
1805 * GetSnapshotData takes the ProcArrayLock. Since we might have to
1806 * release SerializableXactHashLock to call SummarizeOldestCommittedSxact,
1807 * this means we have to create the sxact first, which is a bit annoying
1808 * (in particular, an elog(ERROR) in procarray.c would cause us to leak
1809 * the sxact). Consider refactoring to avoid this.
1810 */
1811#ifdef TEST_SUMMARIZE_SERIAL
1813#endif
1815 do
1816 {
1818 /* If null, push out committed sxact to SLRU summary & retry. */
1819 if (!sxact)
1820 {
1824 }
1825 } while (!sxact);
1826
1827 /* Get the snapshot, or check that it's safe to use */
1828 if (!sourcevxid)
1829 snapshot = GetSnapshotData(snapshot);
1830 else if (!ProcArrayInstallImportedXmin(snapshot->xmin, sourcevxid))
1831 {
1834 ereport(ERROR,
1836 errmsg("could not import the requested snapshot"),
1837 errdetail("The source process with PID %d is not running anymore.",
1838 sourcepid)));
1839 }
1840
1841 /*
1842 * If there are no serializable transactions which are not read-only, we
1843 * can "opt out" of predicate locking and conflict checking for a
1844 * read-only transaction.
1845 *
1846 * The reason this is safe is that a read-only transaction can only become
1847 * part of a dangerous structure if it overlaps a writable transaction
1848 * which in turn overlaps a writable transaction which committed before
1849 * the read-only transaction started. A new writable transaction can
1850 * overlap this one, but it can't meet the other condition of overlapping
1851 * a transaction which committed before this one started.
1852 */
1854 {
1857 return snapshot;
1858 }
1859
1860 /* Initialize the structure. */
1861 sxact->vxid = vxid;
1862 sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo;
1863 sxact->prepareSeqNo = InvalidSerCommitSeqNo;
1864 sxact->commitSeqNo = InvalidSerCommitSeqNo;
1865 dlist_init(&(sxact->outConflicts));
1866 dlist_init(&(sxact->inConflicts));
1867 dlist_init(&(sxact->possibleUnsafeConflicts));
1868 sxact->topXid = GetTopTransactionIdIfAny();
1869 sxact->finishedBefore = InvalidTransactionId;
1870 sxact->xmin = snapshot->xmin;
1871 sxact->pid = MyProcPid;
1872 sxact->pgprocno = MyProcNumber;
1873 dlist_init(&sxact->predicateLocks);
1874 dlist_node_init(&sxact->finishedLink);
1875 sxact->flags = 0;
1876 if (XactReadOnly)
1877 {
1878 dlist_iter iter;
1879
1880 sxact->flags |= SXACT_FLAG_READ_ONLY;
1881
1882 /*
1883 * Register all concurrent r/w transactions as possible conflicts; if
1884 * all of them commit without any outgoing conflicts to earlier
1885 * transactions then this snapshot can be deemed safe (and we can run
1886 * without tracking predicate locks).
1887 */
1889 {
1891
1895 {
1897 }
1898 }
1899
1900 /*
1901 * If we didn't find any possibly unsafe conflicts because every
1902 * uncommitted writable transaction turned out to be doomed, then we
1903 * can "opt out" immediately. See comments above the earlier check
1904 * for PredXact->WritableSxactCount == 0.
1905 */
1906 if (dlist_is_empty(&sxact->possibleUnsafeConflicts))
1907 {
1910 return snapshot;
1911 }
1912 }
1913 else
1914 {
1918 }
1919
1920 /* Maintain serializable global xmin info. */
1922 {
1924 PredXact->SxactGlobalXmin = snapshot->xmin;
1926 SerialSetActiveSerXmin(snapshot->xmin);
1927 }
1928 else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin))
1929 {
1932 }
1933 else
1934 {
1936 }
1937
1939 MyXactDidWrite = false; /* haven't written anything yet */
1940
1942
1944
1945 return snapshot;
1946}

References PredXactListData::activeList, Assert, CreateLocalPredicateLockHash(), CreatePredXact(), dlist_iter::cur, dlist_container, dlist_foreach, dlist_init(), dlist_is_empty(), dlist_node_init(), elog, ereport, errcode(), errdetail(), errmsg, ERROR, fb(), GET_VXID_FROM_PGPROC, GetSnapshotData(), GetTopTransactionIdIfAny(), InvalidSerCommitSeqNo, InvalidSerializableXact, InvalidTransactionId, IsInParallelMode(), PredXactListData::LastSxactCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_prepared_xacts, MaxBackends, MyProc, MyProcNumber, MyProcPid, MySerializableXact, MyXactDidWrite, PredXact, ProcArrayInstallImportedXmin(), RecoveryInProgress(), ReleasePredXact(), SerialSetActiveSerXmin(), SetPossibleUnsafeConflict(), SummarizeOldestCommittedSxact(), SXACT_FLAG_READ_ONLY, PredXactListData::SxactGlobalXmin, PredXactListData::SxactGlobalXminCount, SxactIsCommitted, SxactIsDoomed, SxactIsReadOnly, TransactionIdEquals, TransactionIdFollows(), TransactionIdIsValid, PredXactListData::WritableSxactCount, XactReadOnly, and SnapshotData::xmin.

Referenced by GetSafeSnapshot(), GetSerializableTransactionSnapshot(), and SetSerializableTransactionSnapshot().

◆ MaxPredicateChildLocks()

static int MaxPredicateChildLocks ( const PREDICATELOCKTARGETTAG tag)
static

Definition at line 2298 of file predicate.c.

2299{
2300 switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2301 {
2307
2308 case PREDLOCKTAG_PAGE:
2310
2311 case PREDLOCKTAG_TUPLE:
2312
2313 /*
2314 * not reachable: nothing is finer-granularity than a tuple, so we
2315 * should never try to promote to it.
2316 */
2317 Assert(false);
2318 return 0;
2319 }
2320
2321 /* not reachable */
2322 Assert(false);
2323 return 0;
2324}

References Assert, GET_PREDICATELOCKTARGETTAG_TYPE, max_predicate_locks_per_page, max_predicate_locks_per_relation, max_predicate_locks_per_xact, PREDLOCKTAG_PAGE, PREDLOCKTAG_RELATION, and PREDLOCKTAG_TUPLE.

Referenced by CheckAndPromotePredicateLockRequest().

◆ OnConflict_CheckForSerializationFailure()

static void OnConflict_CheckForSerializationFailure ( const SERIALIZABLEXACT reader,
SERIALIZABLEXACT writer 
)
static

Definition at line 4545 of file predicate.c.

4547{
4548 bool failure;
4549
4551
4552 failure = false;
4553
4554 /*------------------------------------------------------------------------
4555 * Check for already-committed writer with rw-conflict out flagged
4556 * (conflict-flag on W means that T2 committed before W):
4557 *
4558 * R ------> W ------> T2
4559 * rw rw
4560 *
4561 * That is a dangerous structure, so we must abort. (Since the writer
4562 * has already committed, we must be the reader)
4563 *------------------------------------------------------------------------
4564 */
4567 failure = true;
4568
4569 /*------------------------------------------------------------------------
4570 * Check whether the writer has become a pivot with an out-conflict
4571 * committed transaction (T2), and T2 committed first:
4572 *
4573 * R ------> W ------> T2
4574 * rw rw
4575 *
4576 * Because T2 must've committed first, there is no anomaly if:
4577 * - the reader committed before T2
4578 * - the writer committed before T2
4579 * - the reader is a READ ONLY transaction and the reader was concurrent
4580 * with T2 (= reader acquired its snapshot before T2 committed)
4581 *
4582 * We also handle the case that T2 is prepared but not yet committed
4583 * here. In that case T2 has already checked for conflicts, so if it
4584 * commits first, making the above conflict real, it's too late for it
4585 * to abort.
4586 *------------------------------------------------------------------------
4587 */
4589 failure = true;
4590 else if (!failure)
4591 {
4592 dlist_iter iter;
4593
4594 dlist_foreach(iter, &writer->outConflicts)
4595 {
4597 dlist_container(RWConflictData, outLink, iter.cur);
4598 SERIALIZABLEXACT *t2 = conflict->sxactIn;
4599
4600 if (SxactIsPrepared(t2)
4601 && (!SxactIsCommitted(reader)
4602 || t2->prepareSeqNo <= reader->commitSeqNo)
4604 || t2->prepareSeqNo <= writer->commitSeqNo)
4605 && (!SxactIsReadOnly(reader)
4606 || t2->prepareSeqNo <= reader->SeqNo.lastCommitBeforeSnapshot))
4607 {
4608 failure = true;
4609 break;
4610 }
4611 }
4612 }
4613
4614 /*------------------------------------------------------------------------
4615 * Check whether the reader has become a pivot with a writer
4616 * that's committed (or prepared):
4617 *
4618 * T0 ------> R ------> W
4619 * rw rw
4620 *
4621 * Because W must've committed first for an anomaly to occur, there is no
4622 * anomaly if:
4623 * - T0 committed before the writer
4624 * - T0 is READ ONLY, and overlaps the writer
4625 *------------------------------------------------------------------------
4626 */
4627 if (!failure && SxactIsPrepared(writer) && !SxactIsReadOnly(reader))
4628 {
4629 if (SxactHasSummaryConflictIn(reader))
4630 {
4631 failure = true;
4632 }
4633 else
4634 {
4635 dlist_iter iter;
4636
4637 /*
4638 * The unconstify is needed as we have no const version of
4639 * dlist_foreach().
4640 */
4641 dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->inConflicts)
4642 {
4643 const RWConflict conflict =
4644 dlist_container(RWConflictData, inLink, iter.cur);
4645 const SERIALIZABLEXACT *t0 = conflict->sxactOut;
4646
4647 if (!SxactIsDoomed(t0)
4648 && (!SxactIsCommitted(t0)
4649 || t0->commitSeqNo >= writer->prepareSeqNo)
4650 && (!SxactIsReadOnly(t0)
4651 || t0->SeqNo.lastCommitBeforeSnapshot >= writer->prepareSeqNo))
4652 {
4653 failure = true;
4654 break;
4655 }
4656 }
4657 }
4658 }
4659
4660 if (failure)
4661 {
4662 /*
4663 * We have to kill a transaction to avoid a possible anomaly from
4664 * occurring. If the writer is us, we can just ereport() to cause a
4665 * transaction abort. Otherwise we flag the writer for termination,
4666 * causing it to abort when it tries to commit. However, if the writer
4667 * is a prepared transaction, already prepared, we can't abort it
4668 * anymore, so we have to kill the reader instead.
4669 */
4671 {
4673 ereport(ERROR,
4675 errmsg("could not serialize access due to read/write dependencies among transactions"),
4676 errdetail_internal("Reason code: Canceled on identification as a pivot, during write."),
4677 errhint("The transaction might succeed if retried.")));
4678 }
4679 else if (SxactIsPrepared(writer))
4680 {
4682
4683 /* if we're not the writer, we have to be the reader */
4684 Assert(MySerializableXact == reader);
4685 ereport(ERROR,
4687 errmsg("could not serialize access due to read/write dependencies among transactions"),
4688 errdetail_internal("Reason code: Canceled on conflict out to pivot %u, during read.", writer->topXid),
4689 errhint("The transaction might succeed if retried.")));
4690 }
4691 writer->flags |= SXACT_FLAG_DOOMED;
4692 }
4693}

References Assert, SERIALIZABLEXACT::commitSeqNo, dlist_iter::cur, dlist_container, dlist_foreach, ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, fb(), SERIALIZABLEXACT::lastCommitBeforeSnapshot, LWLockHeldByMe(), LWLockRelease(), MySerializableXact, SERIALIZABLEXACT::SeqNo, SXACT_FLAG_DOOMED, SxactHasConflictOut, SxactHasSummaryConflictIn, SxactHasSummaryConflictOut, SxactIsCommitted, SxactIsDoomed, SxactIsPrepared, SxactIsReadOnly, and unconstify.

Referenced by FlagRWConflict().

◆ PageIsPredicateLocked()

◆ PostPrepare_PredicateLocks()

◆ PreCommit_CheckForSerializationFailure()

void PreCommit_CheckForSerializationFailure ( void  )

Definition at line 4712 of file predicate.c.

4713{
4715
4717 return;
4718
4720
4722
4723 /*
4724 * Check if someone else has already decided that we need to die. Since
4725 * we set our own DOOMED flag when partially releasing, ignore in that
4726 * case.
4727 */
4730 {
4732 ereport(ERROR,
4734 errmsg("could not serialize access due to read/write dependencies among transactions"),
4735 errdetail_internal("Reason code: Canceled on identification as a pivot, during commit attempt."),
4736 errhint("The transaction might succeed if retried.")));
4737 }
4738
4740 {
4743
4744 if (!SxactIsCommitted(nearConflict->sxactOut)
4745 && !SxactIsDoomed(nearConflict->sxactOut))
4746 {
4748
4749 dlist_foreach(far_iter, &nearConflict->sxactOut->inConflicts)
4750 {
4753
4754 if (farConflict->sxactOut == MySerializableXact
4755 || (!SxactIsCommitted(farConflict->sxactOut)
4756 && !SxactIsReadOnly(farConflict->sxactOut)
4757 && !SxactIsDoomed(farConflict->sxactOut)))
4758 {
4759 /*
4760 * Normally, we kill the pivot transaction to make sure we
4761 * make progress if the failing transaction is retried.
4762 * However, we can't kill it if it's already prepared, so
4763 * in that case we commit suicide instead.
4764 */
4765 if (SxactIsPrepared(nearConflict->sxactOut))
4766 {
4768 ereport(ERROR,
4770 errmsg("could not serialize access due to read/write dependencies among transactions"),
4771 errdetail_internal("Reason code: Canceled on commit attempt with conflict in from prepared pivot."),
4772 errhint("The transaction might succeed if retried.")));
4773 }
4774 nearConflict->sxactOut->flags |= SXACT_FLAG_DOOMED;
4775 break;
4776 }
4777 }
4778 }
4779 }
4780
4783
4785}

References Assert, dlist_container, dlist_foreach, ereport, errcode(), ERRCODE_T_R_SERIALIZATION_FAILURE, errdetail_internal(), errhint(), errmsg, ERROR, fb(), SERIALIZABLEXACT::flags, SERIALIZABLEXACT::inConflicts, InvalidSerializableXact, IsolationIsSerializable, PredXactListData::LastSxactCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, PredXact, SERIALIZABLEXACT::prepareSeqNo, SXACT_FLAG_DOOMED, SXACT_FLAG_PREPARED, SxactIsCommitted, SxactIsDoomed, SxactIsPartiallyReleased, SxactIsPrepared, and SxactIsReadOnly.

Referenced by CommitTransaction(), and PrepareTransaction().

◆ predicatelock_hash()

static uint32 predicatelock_hash ( const void key,
Size  keysize 
)
static

Definition at line 1430 of file predicate.c.

1431{
1432 const PREDICATELOCKTAG *predicatelocktag = (const PREDICATELOCKTAG *) key;
1434
1435 Assert(keysize == sizeof(PREDICATELOCKTAG));
1436
1437 /* Look into the associated target object, and compute its hash code */
1439
1441}

References Assert, fb(), PredicateLockHashCodeFromTargetHashCode, and PredicateLockTargetTagHashCode.

Referenced by PredicateLockShmemInit().

◆ predicatelock_twophase_recover()

void predicatelock_twophase_recover ( FullTransactionId  fxid,
uint16  info,
void recdata,
uint32  len 
)

Definition at line 4918 of file predicate.c.

4920{
4923
4925
4926 record = (TwoPhasePredicateRecord *) recdata;
4927
4929 (record->type == TWOPHASEPREDICATERECORD_LOCK));
4930
4931 if (record->type == TWOPHASEPREDICATERECORD_XACT)
4932 {
4933 /* Per-transaction record. Set up a SERIALIZABLEXACT. */
4934 TwoPhasePredicateXactRecord *xactRecord;
4938 bool found;
4939
4940 xactRecord = (TwoPhasePredicateXactRecord *) &record->data.xactRecord;
4941
4944 if (!sxact)
4945 ereport(ERROR,
4947 errmsg("out of shared memory")));
4948
4949 /* vxid for a prepared xact is INVALID_PROC_NUMBER/xid; no pid */
4950 sxact->vxid.procNumber = INVALID_PROC_NUMBER;
4951 sxact->vxid.localTransactionId = (LocalTransactionId) xid;
4952 sxact->pid = 0;
4953 sxact->pgprocno = INVALID_PROC_NUMBER;
4954
4955 /* a prepared xact hasn't committed yet */
4956 sxact->prepareSeqNo = RecoverySerCommitSeqNo;
4957 sxact->commitSeqNo = InvalidSerCommitSeqNo;
4958 sxact->finishedBefore = InvalidTransactionId;
4959
4960 sxact->SeqNo.lastCommitBeforeSnapshot = RecoverySerCommitSeqNo;
4961
4962 /*
4963 * Don't need to track this; no transactions running at the time the
4964 * recovered xact started are still active, except possibly other
4965 * prepared xacts and we don't care whether those are RO_SAFE or not.
4966 */
4967 dlist_init(&(sxact->possibleUnsafeConflicts));
4968
4969 dlist_init(&(sxact->predicateLocks));
4970 dlist_node_init(&sxact->finishedLink);
4971
4972 sxact->topXid = xid;
4973 sxact->xmin = xactRecord->xmin;
4974 sxact->flags = xactRecord->flags;
4976 if (!SxactIsReadOnly(sxact))
4977 {
4981 }
4982
4983 /*
4984 * We don't know whether the transaction had any conflicts or not, so
4985 * we'll conservatively assume that it had both a conflict in and a
4986 * conflict out, and represent that with the summary conflict flags.
4987 */
4988 dlist_init(&(sxact->outConflicts));
4989 dlist_init(&(sxact->inConflicts));
4992
4993 /* Register the transaction's xid */
4994 sxidtag.xid = xid;
4996 &sxidtag,
4997 HASH_ENTER, &found);
4998 Assert(sxid != NULL);
4999 Assert(!found);
5000 sxid->myXact = sxact;
5001
5002 /*
5003 * Update global xmin. Note that this is a special case compared to
5004 * registering a normal transaction, because the global xmin might go
5005 * backwards. That's OK, because until recovery is over we're not
5006 * going to complete any transactions or create any non-prepared
5007 * transactions, so there's no danger of throwing away.
5008 */
5011 {
5015 }
5017 {
5020 }
5021
5023 }
5024 else if (record->type == TWOPHASEPREDICATERECORD_LOCK)
5025 {
5026 /* Lock record. Recreate the PREDICATELOCK */
5027 TwoPhasePredicateLockRecord *lockRecord;
5032
5033 lockRecord = (TwoPhasePredicateLockRecord *) &record->data.lockRecord;
5034 targettaghash = PredicateLockTargetTagHashCode(&lockRecord->target);
5035
5037 sxidtag.xid = xid;
5038 sxid = (SERIALIZABLEXID *)
5041
5042 Assert(sxid != NULL);
5043 sxact = sxid->myXact;
5045
5046 CreatePredicateLock(&lockRecord->target, targettaghash, sxact);
5047 }
5048}

References Assert, CreatePredicateLock(), CreatePredXact(), TwoPhasePredicateRecord::data, dlist_init(), dlist_node_init(), ereport, errcode(), errmsg, ERROR, fb(), TwoPhasePredicateXactRecord::flags, HASH_ENTER, HASH_FIND, hash_search(), INVALID_PROC_NUMBER, InvalidSerCommitSeqNo, InvalidSerializableXact, InvalidTransactionId, len, TwoPhasePredicateRecord::lockRecord, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), max_prepared_xacts, MaxBackends, PredicateLockTargetTagHashCode, PredXact, RecoverySerCommitSeqNo, SerializableXidHash, SerialSetActiveSerXmin(), SXACT_FLAG_SUMMARY_CONFLICT_IN, SXACT_FLAG_SUMMARY_CONFLICT_OUT, PredXactListData::SxactGlobalXmin, PredXactListData::SxactGlobalXminCount, SxactIsPrepared, SxactIsReadOnly, TwoPhasePredicateLockRecord::target, TransactionIdEquals, TransactionIdFollows(), TransactionIdIsValid, TWOPHASEPREDICATERECORD_LOCK, TWOPHASEPREDICATERECORD_XACT, TwoPhasePredicateRecord::type, PredXactListData::WritableSxactCount, TwoPhasePredicateRecord::xactRecord, XidFromFullTransactionId, and TwoPhasePredicateXactRecord::xmin.

◆ PredicateLockAcquire()

static void PredicateLockAcquire ( const PREDICATELOCKTARGETTAG targettag)
static

Definition at line 2526 of file predicate.c.

2527{
2529 bool found;
2531
2532 /* Do we have the lock already, or a covering lock? */
2534 return;
2535
2537 return;
2538
2539 /* the same hash and LW lock apply to the lock target and the local lock. */
2541
2542 /* Acquire lock in local table */
2546 HASH_ENTER, &found);
2547 locallock->held = true;
2548 if (!found)
2549 locallock->childLocks = 0;
2550
2551 /* Actually create the lock */
2553
2554 /*
2555 * Lock has been acquired. Check whether it should be promoted to a
2556 * coarser granularity, or whether there are finer-granularity locks to
2557 * clean up.
2558 */
2560 {
2561 /*
2562 * Lock request was promoted to a coarser-granularity lock, and that
2563 * lock was acquired. It will delete this lock and any of its
2564 * children, so we're done.
2565 */
2566 }
2567 else
2568 {
2569 /* Clean up any finer-granularity locks */
2572 }
2573}

References CheckAndPromotePredicateLockRequest(), CoarserLockCovers(), CreatePredicateLock(), DeleteChildTargetLocks(), fb(), GET_PREDICATELOCKTARGETTAG_TYPE, HASH_ENTER, hash_search_with_hash_value(), LocalPredicateLockHash, MySerializableXact, PredicateLockExists(), PredicateLockTargetTagHashCode, and PREDLOCKTAG_TUPLE.

Referenced by CheckAndPromotePredicateLockRequest(), PredicateLockPage(), PredicateLockRelation(), and PredicateLockTID().

◆ PredicateLockExists()

static bool PredicateLockExists ( const PREDICATELOCKTARGETTAG targettag)
static

Definition at line 2054 of file predicate.c.

2055{
2056 LOCALPREDICATELOCK *lock;
2057
2058 /* check local hash table */
2060 targettag,
2061 HASH_FIND, NULL);
2062
2063 if (!lock)
2064 return false;
2065
2066 /*
2067 * Found entry in the table, but still need to check whether it's actually
2068 * held -- it could just be a parent of some held lock.
2069 */
2070 return lock->held;
2071}

References fb(), HASH_FIND, hash_search(), LOCALPREDICATELOCK::held, and LocalPredicateLockHash.

Referenced by CoarserLockCovers(), PredicateLockAcquire(), and PredicateLockTID().

◆ PredicateLockingNeededForRelation()

static bool PredicateLockingNeededForRelation ( Relation  relation)
inlinestatic

◆ PredicateLockPage()

◆ PredicateLockPageCombine()

void PredicateLockPageCombine ( Relation  relation,
BlockNumber  oldblkno,
BlockNumber  newblkno 
)

Definition at line 3238 of file predicate.c.

3240{
3241 /*
3242 * Page combines differ from page splits in that we ought to be able to
3243 * remove the locks on the old page after transferring them to the new
3244 * page, instead of duplicating them. However, because we can't edit other
3245 * backends' local lock tables, removing the old lock would leave them
3246 * with an entry in their LocalPredicateLockHash for a lock they're not
3247 * holding, which isn't acceptable. So we wind up having to do the same
3248 * work as a page split, acquiring a lock on the new page and keeping the
3249 * old page locked too. That can lead to some false positives, but should
3250 * be rare in practice.
3251 */
3253}

References fb(), and PredicateLockPageSplit().

Referenced by _bt_mark_page_halfdead(), and ginDeletePostingPage().

◆ PredicateLockPageSplit()

void PredicateLockPageSplit ( Relation  relation,
BlockNumber  oldblkno,
BlockNumber  newblkno 
)

Definition at line 3153 of file predicate.c.

3155{
3158 bool success;
3159
3160 /*
3161 * Bail out quickly if there are no serializable transactions running.
3162 *
3163 * It's safe to do this check without taking any additional locks. Even if
3164 * a serializable transaction starts concurrently, we know it can't take
3165 * any SIREAD locks on the page being split because the caller is holding
3166 * the associated buffer page lock. Memory reordering isn't an issue; the
3167 * memory barrier in the LWLock acquisition guarantees that this read
3168 * occurs while the buffer page lock is held.
3169 */
3171 return;
3172
3173 if (!PredicateLockingNeededForRelation(relation))
3174 return;
3175
3179
3181 relation->rd_locator.dbOid,
3182 relation->rd_id,
3183 oldblkno);
3185 relation->rd_locator.dbOid,
3186 relation->rd_id,
3187 newblkno);
3188
3190
3191 /*
3192 * Try copying the locks over to the new page's tag, creating it if
3193 * necessary.
3194 */
3197 false);
3198
3199 if (!success)
3200 {
3201 /*
3202 * No more predicate lock entries are available. Failure isn't an
3203 * option here, so promote the page lock to a relation lock.
3204 */
3205
3206 /* Get the parent relation lock's lock tag */
3208 &newtargettag);
3209 Assert(success);
3210
3211 /*
3212 * Move the locks to the parent. This shouldn't fail.
3213 *
3214 * Note that here we are removing locks held by other backends,
3215 * leading to a possible inconsistency in their local lock hash table.
3216 * This is OK because we're replacing it with a lock that covers the
3217 * old one.
3218 */
3221 true);
3222 Assert(success);
3223 }
3224
3226}

References Assert, BlockNumberIsValid(), RelFileLocator::dbOid, fb(), GetParentPredicateLockTag(), LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), PredicateLockingNeededForRelation(), PredXact, RelationData::rd_id, RelationData::rd_locator, SET_PREDICATELOCKTARGETTAG_PAGE, success, PredXactListData::SxactGlobalXmin, TransactionIdIsValid, and TransferPredicateLocksToNewTarget().

Referenced by _bt_insertonpg(), _hash_splitbucket(), createPostingTree(), ginPlaceToPage(), gistplacetopage(), and PredicateLockPageCombine().

◆ PredicateLockRelation()

void PredicateLockRelation ( Relation  relation,
Snapshot  snapshot 
)

◆ PredicateLockShmemInit()

void PredicateLockShmemInit ( void  )

Definition at line 1156 of file predicate.c.

1157{
1158 HASHCTL info;
1161 bool found;
1162
1163#ifndef EXEC_BACKEND
1165#endif
1166
1167 /*
1168 * Compute size of predicate lock target hashtable. Note these
1169 * calculations must agree with PredicateLockShmemSize!
1170 */
1172
1173 /*
1174 * Allocate hash table for PREDICATELOCKTARGET structs. This stores
1175 * per-predicate-lock-target information.
1176 */
1177 info.keysize = sizeof(PREDICATELOCKTARGETTAG);
1178 info.entrysize = sizeof(PREDICATELOCKTARGET);
1180
1181 PredicateLockTargetHash = ShmemInitHash("PREDICATELOCKTARGET hash",
1184 &info,
1187
1188 /*
1189 * Reserve a dummy entry in the hash table; we use it to make sure there's
1190 * always one entry available when we need to split or combine a page,
1191 * because running out of space there could mean aborting a
1192 * non-serializable transaction.
1193 */
1194 if (!IsUnderPostmaster)
1195 {
1197 HASH_ENTER, &found);
1198 Assert(!found);
1199 }
1200
1201 /* Pre-calculate the hash and partition lock of the scratch entry */
1204
1205 /*
1206 * Allocate hash table for PREDICATELOCK structs. This stores per
1207 * xact-lock-of-a-target information.
1208 */
1209 info.keysize = sizeof(PREDICATELOCKTAG);
1210 info.entrysize = sizeof(PREDICATELOCK);
1211 info.hash = predicatelock_hash;
1213
1214 /* Assume an average of 2 xacts per target */
1215 max_table_size *= 2;
1216
1217 PredicateLockHash = ShmemInitHash("PREDICATELOCK hash",
1220 &info,
1223
1224 /*
1225 * Compute size for serializable transaction hashtable. Note these
1226 * calculations must agree with PredicateLockShmemSize!
1227 */
1229
1230 /*
1231 * Allocate a list to hold information on transactions participating in
1232 * predicate locking.
1233 *
1234 * Assume an average of 10 predicate locking transactions per backend.
1235 * This allows aggressive cleanup while detail is present before data must
1236 * be summarized for storage in SLRU and the "dummy" transaction.
1237 */
1238 max_table_size *= 10;
1239
1242 sizeof(SERIALIZABLEXACT))));
1243
1244 PredXact = ShmemInitStruct("PredXactList",
1246 &found);
1247 Assert(found == IsUnderPostmaster);
1248 if (!found)
1249 {
1250 int i;
1251
1252 /* clean everything, both the header and the element */
1254
1265 /* Add all elements to available list, clean. */
1266 for (i = 0; i < max_table_size; i++)
1267 {
1271 }
1288 }
1289 /* This never changes, so let's keep a local copy. */
1291
1292 /*
1293 * Allocate hash table for SERIALIZABLEXID structs. This stores per-xid
1294 * information for serializable transactions which have accessed data.
1295 */
1296 info.keysize = sizeof(SERIALIZABLEXIDTAG);
1297 info.entrysize = sizeof(SERIALIZABLEXID);
1298
1299 SerializableXidHash = ShmemInitHash("SERIALIZABLEXID hash",
1302 &info,
1305
1306 /*
1307 * Allocate space for tracking rw-conflicts in lists attached to the
1308 * transactions.
1309 *
1310 * Assume an average of 5 conflicts per transaction. Calculations suggest
1311 * that this will prevent resource exhaustion in even the most pessimal
1312 * loads up to max_connections = 200 with all 200 connections pounding the
1313 * database with serializable transactions. Beyond that, there may be
1314 * occasional transactions canceled when trying to flag conflicts. That's
1315 * probably OK.
1316 */
1317 max_table_size *= 5;
1318
1322
1323 RWConflictPool = ShmemInitStruct("RWConflictPool",
1325 &found);
1326 Assert(found == IsUnderPostmaster);
1327 if (!found)
1328 {
1329 int i;
1330
1331 /* clean everything, including the elements */
1333
1337 /* Add all elements to available list, clean. */
1338 for (i = 0; i < max_table_size; i++)
1339 {
1342 }
1343 }
1344
1345 /*
1346 * Create or attach to the header for the list of finished serializable
1347 * transactions.
1348 */
1350 ShmemInitStruct("FinishedSerializableTransactions",
1351 sizeof(dlist_head),
1352 &found);
1353 Assert(found == IsUnderPostmaster);
1354 if (!found)
1356
1357 /*
1358 * Initialize the SLRU storage for old committed serializable
1359 * transactions.
1360 */
1361 SerialInit();
1362}

References PredXactListData::activeList, add_size(), Assert, PredXactListData::availableList, RWConflictPoolHeaderData::availableList, PredXactListData::CanPartialClearThrough, SERIALIZABLEXACT::commitSeqNo, CreatePredXact(), dlist_init(), dlist_node_init(), dlist_push_tail(), PredXactListData::element, RWConflictPoolHeaderData::element, HASHCTL::entrysize, fb(), SERIALIZABLEXACT::finishedBefore, SERIALIZABLEXACT::finishedLink, FinishedSerializableTransactions, FirstNormalSerCommitSeqNo, SERIALIZABLEXACT::flags, HASHCTL::hash, HASH_BLOBS, HASH_ELEM, HASH_ENTER, HASH_FIXED_SIZE, HASH_FUNCTION, HASH_PARTITION, hash_search(), PredXactListData::HavePartialClearedThrough, i, SERIALIZABLEXACT::inConflicts, INVALID_PROC_NUMBER, InvalidTransactionId, IsUnderPostmaster, HASHCTL::keysize, SERIALIZABLEXACT::lastCommitBeforeSnapshot, PredXactListData::LastSxactCommitSeqNo, LWLockInitialize(), max_prepared_xacts, MaxBackends, mul_size(), NPREDICATELOCKTARGETENTS, HASHCTL::num_partitions, NUM_PREDICATELOCK_PARTITIONS, OldCommittedSxact, PredXactListData::OldCommittedSxact, SERIALIZABLEXACT::outConflicts, RWConflictData::outLink, SERIALIZABLEXACT::perXactPredicateListLock, SERIALIZABLEXACT::pgprocno, SERIALIZABLEXACT::pid, SERIALIZABLEXACT::possibleUnsafeConflicts, predicatelock_hash(), PredicateLockHash, PredicateLockHashPartitionLock, SERIALIZABLEXACT::predicateLocks, PredicateLockTargetHash, PredicateLockTargetTagHashCode, PredXact, PredXactListDataSize, SERIALIZABLEXACT::prepareSeqNo, RWConflictDataSize, RWConflictPool, RWConflictPoolHeaderDataSize, ScratchPartitionLock, ScratchTargetTag, ScratchTargetTagHash, SERIALIZABLEXACT::SeqNo, SerialInit(), SerializableXidHash, SetInvalidVirtualTransactionId, ShmemInitHash(), ShmemInitStruct(), SXACT_FLAG_COMMITTED, PredXactListData::SxactGlobalXmin, PredXactListData::SxactGlobalXminCount, SERIALIZABLEXACT::topXid, SERIALIZABLEXACT::vxid, PredXactListData::WritableSxactCount, SERIALIZABLEXACT::xactLink, and SERIALIZABLEXACT::xmin.

Referenced by CreateOrAttachShmemStructs().

◆ PredicateLockShmemSize()

Size PredicateLockShmemSize ( void  )

Definition at line 1368 of file predicate.c.

1369{
1370 Size size = 0;
1371 long max_table_size;
1372
1373 /* predicate lock target hash table */
1376 sizeof(PREDICATELOCKTARGET)));
1377
1378 /* predicate lock hash table */
1379 max_table_size *= 2;
1381 sizeof(PREDICATELOCK)));
1382
1383 /*
1384 * Since NPREDICATELOCKTARGETENTS is only an estimate, add 10% safety
1385 * margin.
1386 */
1387 size = add_size(size, size / 10);
1388
1389 /* transaction list */
1391 max_table_size *= 10;
1392 size = add_size(size, PredXactListDataSize);
1393 size = add_size(size, mul_size((Size) max_table_size,
1394 sizeof(SERIALIZABLEXACT)));
1395
1396 /* transaction xid table */
1398 sizeof(SERIALIZABLEXID)));
1399
1400 /* rw-conflict pool */
1401 max_table_size *= 5;
1403 size = add_size(size, mul_size((Size) max_table_size,
1405
1406 /* Head for list of finished serializable transactions. */
1407 size = add_size(size, sizeof(dlist_head));
1408
1409 /* Shared memory structures for SLRU tracking of old committed xids. */
1410 size = add_size(size, sizeof(SerialControlData));
1412
1413 return size;
1414}

References add_size(), fb(), hash_estimate_size(), max_prepared_xacts, MaxBackends, mul_size(), NPREDICATELOCKTARGETENTS, PredXactListDataSize, RWConflictDataSize, RWConflictPoolHeaderDataSize, serializable_buffers, and SimpleLruShmemSize().

Referenced by CalculateShmemSize().

◆ PredicateLockTID()

void PredicateLockTID ( Relation  relation,
const ItemPointerData tid,
Snapshot  snapshot,
TransactionId  tuple_xid 
)

Definition at line 2630 of file predicate.c.

2632{
2634
2635 if (!SerializationNeededForRead(relation, snapshot))
2636 return;
2637
2638 /*
2639 * Return if this xact wrote it.
2640 */
2641 if (relation->rd_index == NULL)
2642 {
2643 /* If we wrote it; we already have a write lock. */
2645 return;
2646 }
2647
2648 /*
2649 * Do quick-but-not-definitive test for a relation lock first. This will
2650 * never cause a return when the relation is *not* locked, but will
2651 * occasionally let the check continue when there really *is* a relation
2652 * level lock.
2653 */
2655 relation->rd_locator.dbOid,
2656 relation->rd_id);
2657 if (PredicateLockExists(&tag))
2658 return;
2659
2661 relation->rd_locator.dbOid,
2662 relation->rd_id,
2666}

References RelFileLocator::dbOid, fb(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), PredicateLockAcquire(), PredicateLockExists(), RelationData::rd_id, RelationData::rd_index, RelationData::rd_locator, SerializationNeededForRead(), SET_PREDICATELOCKTARGETTAG_RELATION, SET_PREDICATELOCKTARGETTAG_TUPLE, and TransactionIdIsCurrentTransactionId().

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), and heap_hot_search_buffer().

◆ PredicateLockTwoPhaseFinish()

void PredicateLockTwoPhaseFinish ( FullTransactionId  fxid,
bool  isCommit 
)

Definition at line 4891 of file predicate.c.

4892{
4895
4897
4899 sxid = (SERIALIZABLEXID *)
4902
4903 /* xid will not be found if it wasn't a serializable transaction */
4904 if (sxid == NULL)
4905 return;
4906
4907 /* Release its locks */
4908 MySerializableXact = sxid->myXact;
4909 MyXactDidWrite = true; /* conservatively assume that we wrote
4910 * something */
4912}

References fb(), HASH_FIND, hash_search(), LW_SHARED, LWLockAcquire(), LWLockRelease(), MySerializableXact, MyXactDidWrite, ReleasePredicateLocks(), SerializableXidHash, SERIALIZABLEXIDTAG::xid, and XidFromFullTransactionId.

Referenced by FinishPreparedTransaction().

◆ RegisterPredicateLockingXid()

void RegisterPredicateLockingXid ( TransactionId  xid)

Definition at line 1968 of file predicate.c.

1969{
1972 bool found;
1973
1974 /*
1975 * If we're not tracking predicate lock data for this transaction, we
1976 * should ignore the request and return quickly.
1977 */
1979 return;
1980
1981 /* We should have a valid XID and be at the top level. */
1983
1985
1986 /* This should only be done once per transaction. */
1988
1990
1991 sxidtag.xid = xid;
1993 &sxidtag,
1994 HASH_ENTER, &found);
1995 Assert(!found);
1996
1997 /* Initialize the structure. */
1998 sxid->myXact = MySerializableXact;
2000}

References Assert, fb(), HASH_ENTER, hash_search(), InvalidSerializableXact, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, SerializableXidHash, SERIALIZABLEXACT::topXid, and TransactionIdIsValid.

Referenced by AssignTransactionId().

◆ ReleaseOneSerializableXact()

static void ReleaseOneSerializableXact ( SERIALIZABLEXACT sxact,
bool  partial,
bool  summarize 
)
static

Definition at line 3844 of file predicate.c.

3846{
3848 dlist_mutable_iter iter;
3849
3850 Assert(sxact != NULL);
3852 Assert(partial || !SxactIsOnFinishedList(sxact));
3854
3855 /*
3856 * First release all the predicate locks held by this xact (or transfer
3857 * them to OldCommittedSxact if summarize is true)
3858 */
3860 if (IsInParallelMode())
3861 LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
3862 dlist_foreach_modify(iter, &sxact->predicateLocks)
3863 {
3865 dlist_container(PREDICATELOCK, xactLink, iter.cur);
3866 PREDICATELOCKTAG tag;
3867 PREDICATELOCKTARGET *target;
3871
3872 tag = predlock->tag;
3873 target = tag.myTarget;
3874 targettag = target->tag;
3877
3879
3880 dlist_delete(&predlock->targetLink);
3881
3885 HASH_REMOVE, NULL);
3886 if (summarize)
3887 {
3888 bool found;
3889
3890 /* Fold into dummy transaction list. */
3895 HASH_ENTER_NULL, &found);
3896 if (!predlock)
3897 ereport(ERROR,
3899 errmsg("out of shared memory"),
3900 errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
3901 if (found)
3902 {
3903 Assert(predlock->commitSeqNo != 0);
3904 Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3905 if (predlock->commitSeqNo < sxact->commitSeqNo)
3906 predlock->commitSeqNo = sxact->commitSeqNo;
3907 }
3908 else
3909 {
3911 &predlock->targetLink);
3913 &predlock->xactLink);
3914 predlock->commitSeqNo = sxact->commitSeqNo;
3915 }
3916 }
3917 else
3919
3921 }
3922
3923 /*
3924 * Rather than retail removal, just re-init the head after we've run
3925 * through the list.
3926 */
3927 dlist_init(&sxact->predicateLocks);
3928
3929 if (IsInParallelMode())
3930 LWLockRelease(&sxact->perXactPredicateListLock);
3932
3933 sxidtag.xid = sxact->topXid;
3935
3936 /* Release all outConflicts (unless 'partial' is true) */
3937 if (!partial)
3938 {
3939 dlist_foreach_modify(iter, &sxact->outConflicts)
3940 {
3942 dlist_container(RWConflictData, outLink, iter.cur);
3943
3944 if (summarize)
3945 conflict->sxactIn->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
3947 }
3948 }
3949
3950 /* Release all inConflicts. */
3951 dlist_foreach_modify(iter, &sxact->inConflicts)
3952 {
3954 dlist_container(RWConflictData, inLink, iter.cur);
3955
3956 if (summarize)
3957 conflict->sxactOut->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
3959 }
3960
3961 /* Finally, get rid of the xid and the record of the transaction itself. */
3962 if (!partial)
3963 {
3964 if (sxidtag.xid != InvalidTransactionId)
3967 }
3968
3970}

References Assert, dlist_mutable_iter::cur, dlist_container, dlist_delete(), dlist_foreach_modify, dlist_init(), dlist_push_tail(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), HASH_ENTER_NULL, HASH_REMOVE, hash_search(), hash_search_with_hash_value(), InvalidSerCommitSeqNo, InvalidTransactionId, IsInParallelMode(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockHeldByMe(), LWLockRelease(), PREDICATELOCKTAG::myTarget, PREDICATELOCKTAG::myXact, OldCommittedSxact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, SERIALIZABLEXACT::predicateLocks, PREDICATELOCKTARGET::predicateLocks, PredicateLockTargetTagHashCode, ReleasePredXact(), ReleaseRWConflict(), RemoveTargetIfNoLongerUsed(), SerializableXidHash, SXACT_FLAG_SUMMARY_CONFLICT_IN, SXACT_FLAG_SUMMARY_CONFLICT_OUT, SxactIsCommitted, SxactIsOnFinishedList, SxactIsRolledBack, and PREDICATELOCKTARGET::tag.

Referenced by ClearOldPredicateLocks(), ReleasePredicateLocks(), and SummarizeOldestCommittedSxact().

◆ ReleasePredicateLocks()

void ReleasePredicateLocks ( bool  isCommit,
bool  isReadOnlySafe 
)

Definition at line 3321 of file predicate.c.

3322{
3323 bool partiallyReleasing = false;
3324 bool needToClear;
3326 dlist_mutable_iter iter;
3327
3328 /*
3329 * We can't trust XactReadOnly here, because a transaction which started
3330 * as READ WRITE can show as READ ONLY later, e.g., within
3331 * subtransactions. We want to flag a transaction as READ ONLY if it
3332 * commits without writing so that de facto READ ONLY transactions get the
3333 * benefit of some RO optimizations, so we will use this local variable to
3334 * get some cleanup logic right which is based on whether the transaction
3335 * was declared READ ONLY at the top level.
3336 */
3338
3339 /* We can't be both committing and releasing early due to RO_SAFE. */
3341
3342 /* Are we at the end of a transaction, that is, a commit or abort? */
3343 if (!isReadOnlySafe)
3344 {
3345 /*
3346 * Parallel workers mustn't release predicate locks at the end of
3347 * their transaction. The leader will do that at the end of its
3348 * transaction.
3349 */
3350 if (IsParallelWorker())
3351 {
3353 return;
3354 }
3355
3356 /*
3357 * By the time the leader in a parallel query reaches end of
3358 * transaction, it has waited for all workers to exit.
3359 */
3361
3362 /*
3363 * If the leader in a parallel query earlier stashed a partially
3364 * released SERIALIZABLEXACT for final clean-up at end of transaction
3365 * (because workers might still have been accessing it), then it's
3366 * time to restore it.
3367 */
3369 {
3374 }
3375 }
3376
3378 {
3380 return;
3381 }
3382
3384
3385 /*
3386 * If the transaction is committing, but it has been partially released
3387 * already, then treat this as a roll back. It was marked as rolled back.
3388 */
3390 isCommit = false;
3391
3392 /*
3393 * If we're called in the middle of a transaction because we discovered
3394 * that the SXACT_FLAG_RO_SAFE flag was set, then we'll partially release
3395 * it (that is, release the predicate locks and conflicts, but not the
3396 * SERIALIZABLEXACT itself) if we're the first backend to have noticed.
3397 */
3399 {
3400 /*
3401 * The leader needs to stash a pointer to it, so that it can
3402 * completely release it at end-of-transaction.
3403 */
3404 if (!IsParallelWorker())
3406
3407 /*
3408 * The first backend to reach this condition will partially release
3409 * the SERIALIZABLEXACT. All others will just clear their
3410 * backend-local state so that they stop doing SSI checks for the rest
3411 * of the transaction.
3412 */
3414 {
3417 return;
3418 }
3419 else
3420 {
3422 partiallyReleasing = true;
3423 /* ... and proceed to perform the partial release below. */
3424 }
3425 }
3431
3432 /* may not be serializable during COMMIT/ROLLBACK PREPARED */
3434
3435 /* We'd better not already be on the cleanup list. */
3437
3439
3440 /*
3441 * We don't hold XidGenLock lock here, assuming that TransactionId is
3442 * atomic!
3443 *
3444 * If this value is changing, we don't care that much whether we get the
3445 * old or new value -- it is just used to determine how far
3446 * SxactGlobalXmin must advance before this transaction can be fully
3447 * cleaned up. The worst that could happen is we wait for one more
3448 * transaction to complete before freeing some RAM; correctness of visible
3449 * behavior is not affected.
3450 */
3452
3453 /*
3454 * If it's not a commit it's either a rollback or a read-only transaction
3455 * flagged SXACT_FLAG_RO_SAFE, and we can clear our locks immediately.
3456 */
3457 if (isCommit)
3458 {
3461 /* Recognize implicit read-only transaction (commit without write). */
3462 if (!MyXactDidWrite)
3464 }
3465 else
3466 {
3467 /*
3468 * The DOOMED flag indicates that we intend to roll back this
3469 * transaction and so it should not cause serialization failures for
3470 * other transactions that conflict with it. Note that this flag might
3471 * already be set, if another backend marked this transaction for
3472 * abort.
3473 *
3474 * The ROLLED_BACK flag further indicates that ReleasePredicateLocks
3475 * has been called, and so the SerializableXact is eligible for
3476 * cleanup. This means it should not be considered when calculating
3477 * SxactGlobalXmin.
3478 */
3481
3482 /*
3483 * If the transaction was previously prepared, but is now failing due
3484 * to a ROLLBACK PREPARED or (hopefully very rare) error after the
3485 * prepare, clear the prepared flag. This simplifies conflict
3486 * checking.
3487 */
3489 }
3490
3492 {
3494 if (--(PredXact->WritableSxactCount) == 0)
3495 {
3496 /*
3497 * Release predicate locks and rw-conflicts in for all committed
3498 * transactions. There are no longer any transactions which might
3499 * conflict with the locks and no chance for new transactions to
3500 * overlap. Similarly, existing conflicts in can't cause pivots,
3501 * and any conflicts in which could have completed a dangerous
3502 * structure would already have caused a rollback, so any
3503 * remaining ones must be benign.
3504 */
3506 }
3507 }
3508 else
3509 {
3510 /*
3511 * Read-only transactions: clear the list of transactions that might
3512 * make us unsafe. Note that we use 'inLink' for the iteration as
3513 * opposed to 'outLink' for the r/w xacts.
3514 */
3516 {
3518 dlist_container(RWConflictData, inLink, iter.cur);
3519
3522
3524 }
3525 }
3526
3527 /* Check for conflict out to old committed transactions. */
3528 if (isCommit
3531 {
3532 /*
3533 * we don't know which old committed transaction we conflicted with,
3534 * so be conservative and use FirstNormalSerCommitSeqNo here
3535 */
3539 }
3540
3541 /*
3542 * Release all outConflicts to committed transactions. If we're rolling
3543 * back clear them all. Set SXACT_FLAG_CONFLICT_OUT if any point to
3544 * previously committed transactions.
3545 */
3547 {
3549 dlist_container(RWConflictData, outLink, iter.cur);
3550
3551 if (isCommit
3553 && SxactIsCommitted(conflict->sxactIn))
3554 {
3556 || conflict->sxactIn->prepareSeqNo < MySerializableXact->SeqNo.earliestOutConflictCommit)
3559 }
3560
3561 if (!isCommit
3562 || SxactIsCommitted(conflict->sxactIn)
3563 || (conflict->sxactIn->SeqNo.lastCommitBeforeSnapshot >= PredXact->LastSxactCommitSeqNo))
3565 }
3566
3567 /*
3568 * Release all inConflicts from committed and read-only transactions. If
3569 * we're rolling back, clear them all.
3570 */
3572 {
3574 dlist_container(RWConflictData, inLink, iter.cur);
3575
3576 if (!isCommit
3577 || SxactIsCommitted(conflict->sxactOut)
3578 || SxactIsReadOnly(conflict->sxactOut))
3580 }
3581
3583 {
3584 /*
3585 * Remove ourselves from the list of possible conflicts for concurrent
3586 * READ ONLY transactions, flagging them as unsafe if we have a
3587 * conflict out. If any are waiting DEFERRABLE transactions, wake them
3588 * up if they are known safe or known unsafe.
3589 */
3591 {
3593 dlist_container(RWConflictData, outLink, iter.cur);
3594
3595 roXact = possibleUnsafeConflict->sxactIn;
3598
3599 /* Mark conflicted if necessary. */
3600 if (isCommit
3604 <= roXact->SeqNo.lastCommitBeforeSnapshot))
3605 {
3606 /*
3607 * This releases possibleUnsafeConflict (as well as all other
3608 * possible conflicts for roXact)
3609 */
3611 }
3612 else
3613 {
3615
3616 /*
3617 * If we were the last possible conflict, flag it safe. The
3618 * transaction can now safely release its predicate locks (but
3619 * that transaction's backend has to do that itself).
3620 */
3621 if (dlist_is_empty(&roXact->possibleUnsafeConflicts))
3622 roXact->flags |= SXACT_FLAG_RO_SAFE;
3623 }
3624
3625 /*
3626 * Wake up the process for a waiting DEFERRABLE transaction if we
3627 * now know it's either safe or conflicted.
3628 */
3631 ProcSendSignal(roXact->pgprocno);
3632 }
3633 }
3634
3635 /*
3636 * Check whether it's time to clean up old transactions. This can only be
3637 * done when the last serializable transaction with the oldest xmin among
3638 * serializable transactions completes. We then find the "new oldest"
3639 * xmin and purge any transactions which finished before this transaction
3640 * was launched.
3641 *
3642 * For parallel queries in read-only transactions, it might run twice. We
3643 * only release the reference on the first call.
3644 */
3645 needToClear = false;
3646 if ((partiallyReleasing ||
3650 {
3652 if (--(PredXact->SxactGlobalXminCount) == 0)
3653 {
3655 needToClear = true;
3656 }
3657 }
3658
3660
3662
3663 /* Add this to the list of transactions to check for later cleanup. */
3664 if (isCommit)
3667
3668 /*
3669 * If we're releasing a RO_SAFE transaction in parallel mode, we'll only
3670 * partially release it. That's necessary because other backends may have
3671 * a reference to it. The leader will release the SERIALIZABLEXACT itself
3672 * at the end of the transaction after workers have stopped running.
3673 */
3674 if (!isCommit)
3677 false);
3678
3680
3681 if (needToClear)
3683
3685}

References Assert, PredXactListData::CanPartialClearThrough, ClearOldPredicateLocks(), SERIALIZABLEXACT::commitSeqNo, dlist_mutable_iter::cur, dlist_container, dlist_foreach_modify, dlist_is_empty(), dlist_push_tail(), SERIALIZABLEXACT::earliestOutConflictCommit, fb(), SERIALIZABLEXACT::finishedBefore, SERIALIZABLEXACT::finishedLink, FinishedSerializableTransactions, FirstNormalSerCommitSeqNo, SERIALIZABLEXACT::flags, FlagSxactUnsafe(), SERIALIZABLEXACT::inConflicts, InvalidSerializableXact, IsInParallelMode(), IsolationIsSerializable, IsParallelWorker, PredXactListData::LastSxactCommitSeqNo, LocalPredicateLockHash, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MySerializableXact, MyXactDidWrite, TransamVariablesData::nextXid, SERIALIZABLEXACT::outConflicts, ParallelContextActive(), SERIALIZABLEXACT::pid, SERIALIZABLEXACT::possibleUnsafeConflicts, PredXact, ProcSendSignal(), ReleaseOneSerializableXact(), ReleasePredicateLocksLocal(), ReleaseRWConflict(), SavedSerializableXact, SERIALIZABLEXACT::SeqNo, SetNewSxactGlobalXmin(), SXACT_FLAG_COMMITTED, SXACT_FLAG_CONFLICT_OUT, SXACT_FLAG_DOOMED, SXACT_FLAG_PARTIALLY_RELEASED, SXACT_FLAG_READ_ONLY, SXACT_FLAG_RO_SAFE, SXACT_FLAG_ROLLED_BACK, PredXactListData::SxactGlobalXmin, PredXactListData::SxactGlobalXminCount, SxactHasConflictOut, SxactHasSummaryConflictOut, SxactIsCommitted, SxactIsDeferrableWaiting, SxactIsDoomed, SxactIsOnFinishedList, SxactIsPartiallyReleased, SxactIsPrepared, SxactIsReadOnly, SxactIsRolledBack, SxactIsROSafe, SxactIsROUnsafe, TransactionIdEquals, TransamVariables, PredXactListData::WritableSxactCount, XidFromFullTransactionId, and SERIALIZABLEXACT::xmin.

Referenced by GetSafeSnapshot(), PredicateLockTwoPhaseFinish(), ResourceOwnerReleaseInternal(), and SerializationNeededForRead().

◆ ReleasePredicateLocksLocal()

static void ReleasePredicateLocksLocal ( void  )
static

Definition at line 3688 of file predicate.c.

3689{
3691 MyXactDidWrite = false;
3692
3693 /* Delete per-transaction lock table */
3695 {
3698 }
3699}

References fb(), hash_destroy(), InvalidSerializableXact, LocalPredicateLockHash, MySerializableXact, and MyXactDidWrite.

Referenced by ReleasePredicateLocks().

◆ ReleasePredXact()

◆ ReleaseRWConflict()

static void ReleaseRWConflict ( RWConflict  conflict)
static

◆ RemoveScratchTarget()

◆ RemoveTargetIfNoLongerUsed()

◆ RestoreScratchTarget()

◆ RWConflictExists()

static bool RWConflictExists ( const SERIALIZABLEXACT reader,
const SERIALIZABLEXACT writer 
)
static

Definition at line 612 of file predicate.c.

613{
614 dlist_iter iter;
615
616 Assert(reader != writer);
617
618 /* Check the ends of the purported conflict first. */
619 if (SxactIsDoomed(reader)
621 || dlist_is_empty(&reader->outConflicts)
622 || dlist_is_empty(&writer->inConflicts))
623 return false;
624
625 /*
626 * A conflict is possible; walk the list to find out.
627 *
628 * The unconstify is needed as we have no const version of
629 * dlist_foreach().
630 */
631 dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->outConflicts)
632 {
634 dlist_container(RWConflictData, outLink, iter.cur);
635
636 if (conflict->sxactIn == writer)
637 return true;
638 }
639
640 /* No conflict found. */
641 return false;
642}

References Assert, dlist_iter::cur, dlist_container, dlist_foreach, dlist_is_empty(), fb(), SERIALIZABLEXACT::outConflicts, SxactIsDoomed, and unconstify.

Referenced by CheckForSerializableConflictOut(), CheckTableForSerializableConflictIn(), CheckTargetForConflictsIn(), and SetRWConflict().

◆ serial_errdetail_for_io_error()

static int serial_errdetail_for_io_error ( const void opaque_data)
static

Definition at line 748 of file predicate.c.

749{
750 TransactionId xid = *(const TransactionId *) opaque_data;
751
752 return errdetail("Could not access serializable CSN of transaction %u.", xid);
753}

References errdetail(), and fb().

Referenced by SerialInit().

◆ SerialAdd()

static void SerialAdd ( TransactionId  xid,
SerCommitSeqNo  minConflictCommitSeqNo 
)
static

Definition at line 869 of file predicate.c.

870{
871 TransactionId tailXid;
873 int slotno;
875 bool isNewPage;
876 LWLock *lock;
877
879
880 targetPage = SerialPage(xid);
882
883 /*
884 * In this routine, we must hold both SerialControlLock and the SLRU bank
885 * lock simultaneously while making the SLRU data catch up with the new
886 * state that we determine.
887 */
889
890 /*
891 * If 'xid' is older than the global xmin (== tailXid), there's no need to
892 * store it, after all. This can happen if the oldest transaction holding
893 * back the global xmin just finished, making 'xid' uninteresting, but
894 * ClearOldPredicateLocks() has not yet run.
895 */
896 tailXid = serialControl->tailXid;
897 if (!TransactionIdIsValid(tailXid) || TransactionIdPrecedes(xid, tailXid))
898 {
900 return;
901 }
902
903 /*
904 * If the SLRU is currently unused, zero out the whole active region from
905 * tailXid to headXid before taking it into use. Otherwise zero out only
906 * any new pages that enter the tailXid-headXid range as we advance
907 * headXid.
908 */
909 if (serialControl->headPage < 0)
910 {
911 firstZeroPage = SerialPage(tailXid);
912 isNewPage = true;
913 }
914 else
915 {
918 targetPage);
919 }
920
923 serialControl->headXid = xid;
924 if (isNewPage)
926
927 if (isNewPage)
928 {
929 /* Initialize intervening pages; might involve trading locks */
930 for (;;)
931 {
936 break;
938 LWLockRelease(lock);
939 }
940 }
941 else
942 {
945 }
946
948 SerialSlruCtl->shared->page_dirty[slotno] = true;
949
950 LWLockRelease(lock);
952}

References Assert, fb(), SerialControlData::headPage, SerialControlData::headXid, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), serialControl, SerialNextPage, SerialPage, SerialPagePrecedesLogically(), SerialSlruCtl, SerialValue, SimpleLruGetBankLock(), SimpleLruReadPage(), SimpleLruZeroPage(), SerialControlData::tailXid, TransactionIdFollows(), TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by SummarizeOldestCommittedSxact().

◆ SerialGetMinConflictCommitSeqNo()

static SerCommitSeqNo SerialGetMinConflictCommitSeqNo ( TransactionId  xid)
static

Definition at line 960 of file predicate.c.

961{
962 TransactionId headXid;
963 TransactionId tailXid;
965 int slotno;
966
968
970 headXid = serialControl->headXid;
971 tailXid = serialControl->tailXid;
973
974 if (!TransactionIdIsValid(headXid))
975 return 0;
976
978
979 if (TransactionIdPrecedes(xid, tailXid)
980 || TransactionIdFollows(xid, headXid))
981 return 0;
982
983 /*
984 * The following function must be called without holding SLRU bank lock,
985 * but will return with that lock held, which must then be released.
986 */
988 SerialPage(xid), &xid);
989 val = SerialValue(slotno, xid);
991 return val;
992}

References Assert, fb(), SerialControlData::headXid, LW_SHARED, LWLockAcquire(), LWLockRelease(), serialControl, SerialPage, SerialSlruCtl, SerialValue, SimpleLruGetBankLock(), SimpleLruReadPage_ReadOnly(), SerialControlData::tailXid, TransactionIdFollows(), TransactionIdIsValid, TransactionIdPrecedes(), and val.

Referenced by CheckForSerializableConflictOut().

◆ SerialInit()

static void SerialInit ( void  )
static

Definition at line 816 of file predicate.c.

817{
818 bool found;
819
820 /*
821 * Set up SLRU management of the pg_serial data.
822 */
824 SerialSlruCtl->errdetail_for_io_error = serial_errdetail_for_io_error;
825 SimpleLruInit(SerialSlruCtl, "serializable",
826 serializable_buffers, 0, "pg_serial",
828 SYNC_HANDLER_NONE, false);
829#ifdef USE_ASSERT_CHECKING
831#endif
833
834 /*
835 * Create or attach to the SerialControl structure.
836 */
838 ShmemInitStruct("SerialControlData", sizeof(SerialControlData), &found);
839
840 Assert(found == IsUnderPostmaster);
841 if (!found)
842 {
843 /*
844 * Set control information to reflect empty SLRU.
845 */
851 }
852}

References Assert, fb(), SerialControlData::headPage, SerialControlData::headXid, InvalidTransactionId, IsUnderPostmaster, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), SERIAL_ENTRIESPERPAGE, serial_errdetail_for_io_error(), serialControl, serializable_buffers, SerialPagePrecedesLogically(), SerialSlruCtl, ShmemInitStruct(), SimpleLruInit(), SlruPagePrecedesUnitTests, SYNC_HANDLER_NONE, and SerialControlData::tailXid.

Referenced by PredicateLockShmemInit().

◆ SerializationNeededForRead()

static bool SerializationNeededForRead ( Relation  relation,
Snapshot  snapshot 
)
inlinestatic

Definition at line 518 of file predicate.c.

519{
520 /* Nothing to do if this is not a serializable transaction */
522 return false;
523
524 /*
525 * Don't acquire locks or conflict when scanning with a special snapshot.
526 * This excludes things like CLUSTER and REINDEX. They use the wholesale
527 * functions TransferPredicateLocksToHeapRelation() and
528 * CheckTableForSerializableConflictIn() to participate in serialization,
529 * but the scans involved don't need serialization.
530 */
531 if (!IsMVCCSnapshot(snapshot))
532 return false;
533
534 /*
535 * Check if we have just become "RO-safe". If we have, immediately release
536 * all locks as they're not needed anymore. This also resets
537 * MySerializableXact, so that subsequent calls to this function can exit
538 * quickly.
539 *
540 * A transaction is flagged as RO_SAFE if all concurrent R/W transactions
541 * commit without having conflicts out to an earlier snapshot, thus
542 * ensuring that no conflicts are possible for this transaction.
543 */
545 {
546 ReleasePredicateLocks(false, true);
547 return false;
548 }
549
550 /* Check if the relation doesn't participate in predicate locking */
552 return false;
553
554 return true; /* no excuse to skip predicate locking */
555}

References InvalidSerializableXact, IsMVCCSnapshot, MySerializableXact, PredicateLockingNeededForRelation(), ReleasePredicateLocks(), and SxactIsROSafe.

Referenced by CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), PredicateLockPage(), PredicateLockRelation(), and PredicateLockTID().

◆ SerializationNeededForWrite()

static bool SerializationNeededForWrite ( Relation  relation)
inlinestatic

Definition at line 562 of file predicate.c.

563{
564 /* Nothing to do if this is not a serializable transaction */
566 return false;
567
568 /* Check if the relation doesn't participate in predicate locking */
570 return false;
571
572 return true; /* no excuse to skip predicate locking */
573}

References InvalidSerializableXact, MySerializableXact, and PredicateLockingNeededForRelation().

Referenced by CheckForSerializableConflictIn(), and CheckTableForSerializableConflictIn().

◆ SerialPagePrecedesLogically()

static bool SerialPagePrecedesLogically ( int64  page1,
int64  page2 
)
static

◆ SerialSetActiveSerXmin()

static void SerialSetActiveSerXmin ( TransactionId  xid)
static

Definition at line 1001 of file predicate.c.

1002{
1004
1005 /*
1006 * When no sxacts are active, nothing overlaps, set the xid values to
1007 * invalid to show that there are no valid entries. Don't clear headPage,
1008 * though. A new xmin might still land on that page, and we don't want to
1009 * repeatedly zero out the same page.
1010 */
1011 if (!TransactionIdIsValid(xid))
1012 {
1016 return;
1017 }
1018
1019 /*
1020 * When we're recovering prepared transactions, the global xmin might move
1021 * backwards depending on the order they're recovered. Normally that's not
1022 * OK, but during recovery no serializable transactions will commit, so
1023 * the SLRU is empty and we can get away with it.
1024 */
1025 if (RecoveryInProgress())
1026 {
1030 {
1031 serialControl->tailXid = xid;
1032 }
1034 return;
1035 }
1036
1039
1040 serialControl->tailXid = xid;
1041
1043}

References Assert, fb(), SerialControlData::headPage, SerialControlData::headXid, InvalidTransactionId, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), RecoveryInProgress(), serialControl, SerialControlData::tailXid, TransactionIdFollows(), TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by GetSerializableTransactionSnapshotInt(), predicatelock_twophase_recover(), and SetNewSxactGlobalXmin().

◆ SetNewSxactGlobalXmin()

◆ SetPossibleUnsafeConflict()

static void SetPossibleUnsafeConflict ( SERIALIZABLEXACT roXact,
SERIALIZABLEXACT activeXact 
)
static

Definition at line 668 of file predicate.c.

670{
672
676
680 errmsg("not enough elements in RWConflictPool to record a potential read/write conflict"),
681 errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
682
684 dlist_delete(&conflict->outLink);
685
686 conflict->sxactOut = activeXact;
687 conflict->sxactIn = roXact;
688 dlist_push_tail(&activeXact->possibleUnsafeConflicts, &conflict->outLink);
689 dlist_push_tail(&roXact->possibleUnsafeConflicts, &conflict->inLink);
690}

References Assert, RWConflictPoolHeaderData::availableList, dlist_delete(), dlist_head_element, dlist_is_empty(), dlist_push_tail(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), RWConflictPool, and SxactIsReadOnly.

Referenced by GetSerializableTransactionSnapshotInt().

◆ SetRWConflict()

static void SetRWConflict ( SERIALIZABLEXACT reader,
SERIALIZABLEXACT writer 
)
static

Definition at line 645 of file predicate.c.

646{
648
649 Assert(reader != writer);
650 Assert(!RWConflictExists(reader, writer));
651
655 errmsg("not enough elements in RWConflictPool to record a read/write conflict"),
656 errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
657
659 dlist_delete(&conflict->outLink);
660
661 conflict->sxactOut = reader;
662 conflict->sxactIn = writer;
663 dlist_push_tail(&reader->outConflicts, &conflict->outLink);
664 dlist_push_tail(&writer->inConflicts, &conflict->inLink);
665}

References Assert, RWConflictPoolHeaderData::availableList, dlist_delete(), dlist_head_element, dlist_is_empty(), dlist_push_tail(), ereport, errcode(), errhint(), errmsg, ERROR, fb(), SERIALIZABLEXACT::outConflicts, RWConflictExists(), and RWConflictPool.

Referenced by FlagRWConflict().

◆ SetSerializableTransactionSnapshot()

void SetSerializableTransactionSnapshot ( Snapshot  snapshot,
VirtualTransactionId sourcevxid,
int  sourcepid 
)

Definition at line 1731 of file predicate.c.

1734{
1736
1737 /*
1738 * If this is called by parallel.c in a parallel worker, we don't want to
1739 * create a SERIALIZABLEXACT just yet because the leader's
1740 * SERIALIZABLEXACT will be installed with AttachSerializableXact(). We
1741 * also don't want to reject SERIALIZABLE READ ONLY DEFERRABLE in this
1742 * case, because the leader has already determined that the snapshot it
1743 * has passed us is safe. So there is nothing for us to do.
1744 */
1745 if (IsParallelWorker())
1746 return;
1747
1748 /*
1749 * We do not allow SERIALIZABLE READ ONLY DEFERRABLE transactions to
1750 * import snapshots, since there's no way to wait for a safe snapshot when
1751 * we're using the snap we're told to. (XXX instead of throwing an error,
1752 * we could just ignore the XactDeferrable flag?)
1753 */
1755 ereport(ERROR,
1757 errmsg("a snapshot-importing transaction must not be READ ONLY DEFERRABLE")));
1758
1760 sourcepid);
1761}

References Assert, ereport, errcode(), errmsg, ERROR, fb(), GetSerializableTransactionSnapshotInt(), IsolationIsSerializable, IsParallelWorker, XactDeferrable, and XactReadOnly.

Referenced by SetTransactionSnapshot().

◆ ShareSerializableXact()

SerializableXactHandle ShareSerializableXact ( void  )

Definition at line 5056 of file predicate.c.

5057{
5058 return MySerializableXact;
5059}

References MySerializableXact.

Referenced by InitializeParallelDSM().

◆ SummarizeOldestCommittedSxact()

static void SummarizeOldestCommittedSxact ( void  )
static

Definition at line 1512 of file predicate.c.

1513{
1515
1517
1518 /*
1519 * This function is only called if there are no sxact slots available.
1520 * Some of them must belong to old, already-finished transactions, so
1521 * there should be something in FinishedSerializableTransactions list that
1522 * we can summarize. However, there's a race condition: while we were not
1523 * holding any locks, a transaction might have ended and cleaned up all
1524 * the finished sxact entries already, freeing up their sxact slots. In
1525 * that case, we have nothing to do here. The caller will find one of the
1526 * slots released by the other backend when it retries.
1527 */
1529 {
1531 return;
1532 }
1533
1534 /*
1535 * Grab the first sxact off the finished list -- this will be the earliest
1536 * commit. Remove it from the list.
1537 */
1540 dlist_delete_thoroughly(&sxact->finishedLink);
1541
1542 /* Add to SLRU summary information. */
1545 ? sxact->SeqNo.earliestOutConflictCommit : InvalidSerCommitSeqNo);
1546
1547 /* Summarize and release the detail. */
1548 ReleaseOneSerializableXact(sxact, false, true);
1549
1551}

References dlist_delete_thoroughly(), dlist_head_element, dlist_is_empty(), fb(), FinishedSerializableTransactions, InvalidSerCommitSeqNo, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ReleaseOneSerializableXact(), SerialAdd(), SxactHasConflictOut, SxactIsReadOnly, and TransactionIdIsValid.

Referenced by GetSerializableTransactionSnapshotInt().

◆ TransferPredicateLocksToHeapRelation()

void TransferPredicateLocksToHeapRelation ( Relation  relation)

◆ TransferPredicateLocksToNewTarget()

static bool TransferPredicateLocksToNewTarget ( PREDICATELOCKTARGETTAG  oldtargettag,
PREDICATELOCKTARGETTAG  newtargettag,
bool  removeOld 
)
static

Definition at line 2739 of file predicate.c.

2742{
2748 bool found;
2749 bool outOfShmem = false;
2750
2752 LW_EXCLUSIVE));
2753
2758
2759 if (removeOld)
2760 {
2761 /*
2762 * Remove the dummy entry to give us scratch space, so we know we'll
2763 * be able to create the new lock target.
2764 */
2765 RemoveScratchTarget(false);
2766 }
2767
2768 /*
2769 * We must get the partition locks in ascending sequence to avoid
2770 * deadlocks. If old and new partitions are the same, we must request the
2771 * lock only once.
2772 */
2774 {
2778 }
2780 {
2784 }
2785 else
2787
2788 /*
2789 * Look for the old target. If not found, that's OK; no predicate locks
2790 * are affected, so we can just clean up and return. If it does exist,
2791 * walk its list of predicate locks and move or copy them to the new
2792 * target.
2793 */
2795 &oldtargettag,
2797 HASH_FIND, NULL);
2798
2799 if (oldtarget)
2800 {
2803 dlist_mutable_iter iter;
2804
2806 &newtargettag,
2808 HASH_ENTER_NULL, &found);
2809
2810 if (!newtarget)
2811 {
2812 /* Failed to allocate due to insufficient shmem */
2813 outOfShmem = true;
2814 goto exit;
2815 }
2816
2817 /* If we created a new entry, initialize it */
2818 if (!found)
2819 dlist_init(&newtarget->predicateLocks);
2820
2821 newpredlocktag.myTarget = newtarget;
2822
2823 /*
2824 * Loop through all the locks on the old target, replacing them with
2825 * locks on the new target.
2826 */
2828
2829 dlist_foreach_modify(iter, &oldtarget->predicateLocks)
2830 {
2832 dlist_container(PREDICATELOCK, targetLink, iter.cur);
2835
2836 newpredlocktag.myXact = oldpredlock->tag.myXact;
2837
2838 if (removeOld)
2839 {
2840 dlist_delete(&(oldpredlock->xactLink));
2841 dlist_delete(&(oldpredlock->targetLink));
2842
2845 &oldpredlock->tag,
2848 HASH_REMOVE, &found);
2849 Assert(found);
2850 }
2851
2858 &found);
2859 if (!newpredlock)
2860 {
2861 /* Out of shared memory. Undo what we've done so far. */
2864 outOfShmem = true;
2865 goto exit;
2866 }
2867 if (!found)
2868 {
2869 dlist_push_tail(&(newtarget->predicateLocks),
2870 &(newpredlock->targetLink));
2871 dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
2872 &(newpredlock->xactLink));
2873 newpredlock->commitSeqNo = oldCommitSeqNo;
2874 }
2875 else
2876 {
2877 if (newpredlock->commitSeqNo < oldCommitSeqNo)
2878 newpredlock->commitSeqNo = oldCommitSeqNo;
2879 }
2880
2881 Assert(newpredlock->commitSeqNo != 0);
2882 Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
2883 || (newpredlock->tag.myXact == OldCommittedSxact));
2884 }
2886
2887 if (removeOld)
2888 {
2889 Assert(dlist_is_empty(&oldtarget->predicateLocks));
2891 }
2892 }
2893
2894
2895exit:
2896 /* Release partition locks in reverse order of acquisition. */
2898 {
2901 }
2903 {
2906 }
2907 else
2909
2910 if (removeOld)
2911 {
2912 /* We shouldn't run out of memory if we're moving locks */
2914
2915 /* Put the scratch entry back */
2916 RestoreScratchTarget(false);
2917 }
2918
2919 return !outOfShmem;
2920}

References Assert, PREDICATELOCK::commitSeqNo, dlist_mutable_iter::cur, DeleteLockTarget(), dlist_container, dlist_delete(), dlist_foreach_modify, dlist_init(), dlist_is_empty(), dlist_push_tail(), fb(), HASH_ENTER_NULL, HASH_FIND, HASH_REMOVE, hash_search_with_hash_value(), InvalidSerCommitSeqNo, LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockHeldByMeInMode(), LWLockRelease(), OldCommittedSxact, PredicateLockHash, PredicateLockHashCodeFromTargetHashCode, PredicateLockHashPartitionLock, PredicateLockTargetHash, PredicateLockTargetTagHashCode, RemoveScratchTarget(), RemoveTargetIfNoLongerUsed(), and RestoreScratchTarget().

Referenced by PredicateLockPageSplit().

◆ XidIsConcurrent()

static bool XidIsConcurrent ( TransactionId  xid)
static

Definition at line 3981 of file predicate.c.

3982{
3983 Snapshot snap;
3984
3987
3989
3990 if (TransactionIdPrecedes(xid, snap->xmin))
3991 return false;
3992
3993 if (TransactionIdFollowsOrEquals(xid, snap->xmax))
3994 return true;
3995
3996 return pg_lfind32(xid, snap->xip, snap->xcnt);
3997}

References Assert, fb(), GetTopTransactionIdIfAny(), GetTransactionSnapshot(), pg_lfind32(), TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by CheckForSerializableConflictOut().

Variable Documentation

◆ FinishedSerializableTransactions

dlist_head* FinishedSerializableTransactions
static

◆ LocalPredicateLockHash

◆ max_predicate_locks_per_page

int max_predicate_locks_per_page

Definition at line 374 of file predicate.c.

Referenced by MaxPredicateChildLocks().

◆ max_predicate_locks_per_relation

int max_predicate_locks_per_relation

Definition at line 373 of file predicate.c.

Referenced by MaxPredicateChildLocks().

◆ max_predicate_locks_per_xact

int max_predicate_locks_per_xact

Definition at line 372 of file predicate.c.

Referenced by CreateLocalPredicateLockHash(), and MaxPredicateChildLocks().

◆ MySerializableXact

◆ MyXactDidWrite

◆ OldCommittedSxact

◆ PredicateLockHash

◆ PredicateLockTargetHash

◆ PredXact

◆ RWConflictPool

RWConflictPoolHeader RWConflictPool
static

◆ SavedSerializableXact

SERIALIZABLEXACT* SavedSerializableXact = InvalidSerializableXact
static

Definition at line 432 of file predicate.c.

Referenced by ReleasePredicateLocks().

◆ ScratchPartitionLock

LWLock* ScratchPartitionLock
static

Definition at line 409 of file predicate.c.

Referenced by PredicateLockShmemInit(), RemoveScratchTarget(), and RestoreScratchTarget().

◆ ScratchTargetTag

const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0}
static

Definition at line 407 of file predicate.c.

407{0, 0, 0, 0};

Referenced by PredicateLockShmemInit(), RemoveScratchTarget(), and RestoreScratchTarget().

◆ ScratchTargetTagHash

uint32 ScratchTargetTagHash
static

Definition at line 408 of file predicate.c.

Referenced by PredicateLockShmemInit(), RemoveScratchTarget(), and RestoreScratchTarget().

◆ serialControl

◆ SerializableXidHash

◆ SerialSlruCtlData

SlruCtlData SerialSlruCtlData
static

Definition at line 325 of file predicate.c.