lwlock_8c_source.html

/*-------------------------------------------------------------------------

 *

 * lwlock.c

 *    Lightweight lock manager

 *

 * Lightweight locks are intended primarily to provide mutual exclusion of

 * access to shared-memory data structures.  Therefore, they offer both

 * exclusive and shared lock modes (to support read/write and read-only

 * access to a shared object).  There are few other frammishes.  User-level

 * locking should be done with the full lock manager --- which depends on

 * LWLocks to protect its shared state.

 *

 * In addition to exclusive and shared modes, lightweight locks can be used to

 * wait until a variable changes value.  The variable is initially not set

 * when the lock is acquired with LWLockAcquire, i.e. it remains set to the

 * value it was set to when the lock was released last, and can be updated

 * without releasing the lock by calling LWLockUpdateVar.  LWLockWaitForVar

 * waits for the variable to be updated, or until the lock is free.  When

 * releasing the lock with LWLockReleaseClearVar() the value can be set to an

 * appropriate value for a free lock.  The meaning of the variable is up to

 * the caller, the lightweight lock code just assigns and compares it.

 *

 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group

 * Portions Copyright (c) 1994, Regents of the University of California

 *

 * IDENTIFICATION

 *    src/backend/storage/lmgr/lwlock.c

 *

 * NOTES:

 *

 * This used to be a pretty straight forward reader-writer lock

 * implementation, in which the internal state was protected by a

 * spinlock. Unfortunately the overhead of taking the spinlock proved to be

 * too high for workloads/locks that were taken in shared mode very

 * frequently. Often we were spinning in the (obviously exclusive) spinlock,

 * while trying to acquire a shared lock that was actually free.

 *

 * Thus a new implementation was devised that provides wait-free shared lock

 * acquisition for locks that aren't exclusively locked.

 *

 * The basic idea is to have a single atomic variable 'lockcount' instead of

 * the formerly separate shared and exclusive counters and to use atomic

 * operations to acquire the lock. That's fairly easy to do for plain

 * rw-spinlocks, but a lot harder for something like LWLocks that want to wait

 * in the OS.

 *

 * For lock acquisition we use an atomic compare-and-exchange on the lockcount

 * variable. For exclusive lock we swap in a sentinel value

 * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.

 *

 * To release the lock we use an atomic decrement to release the lock. If the

 * new value is zero (we get that atomically), we know we can/have to release

 * waiters.

 *

 * Obviously it is important that the sentinel value for exclusive locks

 * doesn't conflict with the maximum number of possible share lockers -

 * luckily MAX_BACKENDS makes that easily possible.

 *

 *

 * The attentive reader might have noticed that naively doing the above has a

 * glaring race condition: We try to lock using the atomic operations and

 * notice that we have to wait. Unfortunately by the time we have finished

 * queuing, the former locker very well might have already finished its

 * work. That's problematic because we're now stuck waiting inside the OS.


 * To mitigate those races we use a two phased attempt at locking:

 *   Phase 1: Try to do it atomically, if we succeed, nice

 *   Phase 2: Add ourselves to the waitqueue of the lock

 *   Phase 3: Try to grab the lock again, if we succeed, remove ourselves from

 *            the queue

 *   Phase 4: Sleep till wake-up, goto Phase 1

 *

 * This protects us against the problem from above as nobody can release too

 *    quick, before we're queued, since after Phase 2 we're already queued.

 * -------------------------------------------------------------------------

 */

#include "postgres.h"


#include "miscadmin.h"

#include "pg_trace.h"

#include "pgstat.h"

#include "port/pg_bitutils.h"

#include "storage/proc.h"

#include "storage/proclist.h"

#include "storage/procnumber.h"

#include "storage/spin.h"

#include "utils/memutils.h"


#ifdef LWLOCK_STATS

#include "utils/hsearch.h"

#endif


#define LW_FLAG_HAS_WAITERS         ((uint32) 1 << 31)

#define LW_FLAG_RELEASE_OK          ((uint32) 1 << 30)

#define LW_FLAG_LOCKED              ((uint32) 1 << 29)

#define LW_FLAG_BITS                3

#define LW_FLAG_MASK                (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))


/* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */

#define LW_VAL_EXCLUSIVE            (MAX_BACKENDS + 1)

#define LW_VAL_SHARED               1


/* already (power of 2)-1, i.e. suitable for a mask */

#define LW_SHARED_MASK              MAX_BACKENDS

#define LW_LOCK_MASK                (MAX_BACKENDS | LW_VAL_EXCLUSIVE)


StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,

                 "MAX_BACKENDS + 1 needs to be a power of 2");


StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,

                 "MAX_BACKENDS and LW_FLAG_MASK overlap");


StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,

                 "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");


/*

 * There are three sorts of LWLock "tranches":

 *

 * 1. The individually-named locks defined in lwlocklist.h each have their

 * own tranche.  We absorb the names of these tranches from there into

 * BuiltinTrancheNames here.

 *

 * 2. There are some predefined tranches for built-in groups of locks.

 * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names

 * appear in BuiltinTrancheNames[] below.

 *

 * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche

 * or LWLockRegisterTranche.  The names of these that are known in the current

 * process appear in LWLockTrancheNames[].

 *

 * All these names are user-visible as wait event names, so choose with care

 * ... and do not forget to update the documentation's list of wait events.

 */

static const char *const BuiltinTrancheNames[] = {

#define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),

#include "storage/lwlocklist.h"

#undef PG_LWLOCK

    [LWTRANCHE_XACT_BUFFER] = "XactBuffer",

    [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",

    [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",

    [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",

    [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",

    [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",

    [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",

    [LWTRANCHE_WAL_INSERT] = "WALInsert",

    [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",

    [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",

    [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",

    [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",

    [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",

    [LWTRANCHE_LOCK_MANAGER] = "LockManager",

    [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",

    [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",

    [LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan",

    [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",

    [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",

    [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",

    [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",

    [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",

    [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",

    [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",

    [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",

    [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",

    [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",

    [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",

    [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",

    [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",

    [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",

    [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",

    [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",

    [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",

    [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",

    [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",

    [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",

    [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",

    [LWTRANCHE_XACT_SLRU] = "XactSLRU",

    [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",

    [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",

};


StaticAssertDecl(lengthof(BuiltinTrancheNames) ==

                 LWTRANCHE_FIRST_USER_DEFINED,

                 "missing entries in BuiltinTrancheNames[]");


/*

 * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and

 * stores the names of all dynamically-created tranches known to the current

 * process.  Any unused entries in the array will contain NULL.

 */

static const char **LWLockTrancheNames = NULL;

static int  LWLockTrancheNamesAllocated = 0;


/*

 * This points to the main array of LWLocks in shared memory.  Backends inherit

 * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,

 * where we have special measures to pass it down).

 */

LWLockPadded *MainLWLockArray = NULL;


/*

 * We use this structure to keep track of locked LWLocks for release

 * during error recovery.  Normally, only a few will be held at once, but

 * occasionally the number can be much higher; for example, the pg_buffercache

 * extension locks all buffer partitions simultaneously.

 */

#define MAX_SIMUL_LWLOCKS   200


/* struct representing the LWLocks we're holding */

typedef struct LWLockHandle

{

    LWLock     *lock;

    LWLockMode  mode;

} LWLockHandle;


static int  num_held_lwlocks = 0;

static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];


/* struct representing the LWLock tranche request for named tranche */

typedef struct NamedLWLockTrancheRequest

{

    char        tranche_name[NAMEDATALEN];

    int         num_lwlocks;

} NamedLWLockTrancheRequest;


static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;

static int  NamedLWLockTrancheRequestsAllocated = 0;


/*

 * NamedLWLockTrancheRequests is both the valid length of the request array,

 * and the length of the shared-memory NamedLWLockTrancheArray later on.

 * This variable and NamedLWLockTrancheArray are non-static so that

 * postmaster.c can copy them to child processes in EXEC_BACKEND builds.

 */

int         NamedLWLockTrancheRequests = 0;


/* points to data in shared memory: */

NamedLWLockTranche *NamedLWLockTrancheArray = NULL;


static void InitializeLWLocks(void);

static inline void LWLockReportWaitStart(LWLock *lock);

static inline void LWLockReportWaitEnd(void);

static const char *GetLWTrancheName(uint16 trancheId);


#define T_NAME(lock) \

    GetLWTrancheName((lock)->tranche)


#ifdef LWLOCK_STATS

typedef struct lwlock_stats_key

{

    int         tranche;

    void       *instance;

}           lwlock_stats_key;


typedef struct lwlock_stats

{

    lwlock_stats_key key;

    int         sh_acquire_count;

    int         ex_acquire_count;

    int         block_count;

    int         dequeue_self_count;

    int         spin_delay_count;

}           lwlock_stats;


static HTAB *lwlock_stats_htab;

static lwlock_stats lwlock_stats_dummy;

#endif


#ifdef LOCK_DEBUG

bool        Trace_lwlocks = false;


inline static void

PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)

{

    /* hide statement & context here, otherwise the log is just too verbose */

    if (Trace_lwlocks)

    {

        uint32      state = pg_atomic_read_u32(&lock->state);


        ereport(LOG,

                (errhidestmt(true),

                 errhidecontext(true),

                 errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",

                                 MyProcPid,

                                 where, T_NAME(lock), lock,

                                 (state & LW_VAL_EXCLUSIVE) != 0,

                                 state & LW_SHARED_MASK,

                                 (state & LW_FLAG_HAS_WAITERS) != 0,

                                 pg_atomic_read_u32(&lock->nwaiters),

                                 (state & LW_FLAG_RELEASE_OK) != 0)));

    }

}


inline static void

LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)

{

    /* hide statement & context here, otherwise the log is just too verbose */

    if (Trace_lwlocks)

    {

        ereport(LOG,

                (errhidestmt(true),

                 errhidecontext(true),

                 errmsg_internal("%s(%s %p): %s", where,

                                 T_NAME(lock), lock, msg)));

    }

}


#else                           /* not LOCK_DEBUG */

#define PRINT_LWDEBUG(a,b,c) ((void)0)

#define LOG_LWDEBUG(a,b,c) ((void)0)

#endif                          /* LOCK_DEBUG */


#ifdef LWLOCK_STATS


static void init_lwlock_stats(void);

static void print_lwlock_stats(int code, Datum arg);

static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);


static void

init_lwlock_stats(void)

{

    HASHCTL     ctl;

    static MemoryContext lwlock_stats_cxt = NULL;

    static bool exit_registered = false;


    if (lwlock_stats_cxt != NULL)

        MemoryContextDelete(lwlock_stats_cxt);


    /*

     * The LWLock stats will be updated within a critical section, which

     * requires allocating new hash entries. Allocations within a critical

     * section are normally not allowed because running out of memory would

     * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally

     * turned on in production, so that's an acceptable risk. The hash entries

     * are small, so the risk of running out of memory is minimal in practice.

     */

    lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,

                                             "LWLock stats",

                                             ALLOCSET_DEFAULT_SIZES);

    MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);


    ctl.keysize = sizeof(lwlock_stats_key);

    ctl.entrysize = sizeof(lwlock_stats);

    ctl.hcxt = lwlock_stats_cxt;

    lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,

                                    HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);

    if (!exit_registered)

    {

        on_shmem_exit(print_lwlock_stats, 0);

        exit_registered = true;

    }

}


static void

print_lwlock_stats(int code, Datum arg)

{

    HASH_SEQ_STATUS scan;

    lwlock_stats *lwstats;


    hash_seq_init(&scan, lwlock_stats_htab);


    /* Grab an LWLock to keep different backends from mixing reports */

    LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);


    while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)

    {

        fprintf(stderr,

                "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",

                MyProcPid, GetLWTrancheName(lwstats->key.tranche),

                lwstats->key.instance, lwstats->sh_acquire_count,

                lwstats->ex_acquire_count, lwstats->block_count,

                lwstats->spin_delay_count, lwstats->dequeue_self_count);

    }


    LWLockRelease(&MainLWLockArray[0].lock);

}


static lwlock_stats *

get_lwlock_stats_entry(LWLock *lock)

{

    lwlock_stats_key key;

    lwlock_stats *lwstats;

    bool        found;


    /*

     * During shared memory initialization, the hash table doesn't exist yet.

     * Stats of that phase aren't very interesting, so just collect operations

     * on all locks in a single dummy entry.

     */

    if (lwlock_stats_htab == NULL)

        return &lwlock_stats_dummy;


    /* Fetch or create the entry. */

    MemSet(&key, 0, sizeof(key));

    key.tranche = lock->tranche;

    key.instance = lock;

    lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);

    if (!found)

    {

        lwstats->sh_acquire_count = 0;

        lwstats->ex_acquire_count = 0;

        lwstats->block_count = 0;

        lwstats->dequeue_self_count = 0;

        lwstats->spin_delay_count = 0;

    }

    return lwstats;

}

#endif                          /* LWLOCK_STATS */


/*

 * Compute number of LWLocks required by named tranches.  These will be

 * allocated in the main array.

 */

static int

NumLWLocksForNamedTranches(void)

{

    int         numLocks = 0;

    int         i;


    for (i = 0; i < NamedLWLockTrancheRequests; i++)

        numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;


    return numLocks;

}


/*

 * Compute shmem space needed for LWLocks and named tranches.

 */

Size

LWLockShmemSize(void)

{

    Size        size;

    int         i;

    int         numLocks = NUM_FIXED_LWLOCKS;


    /* Calculate total number of locks needed in the main array. */

    numLocks += NumLWLocksForNamedTranches();


    /* Space for the LWLock array. */

    size = mul_size(numLocks, sizeof(LWLockPadded));


    /* Space for dynamic allocation counter, plus room for alignment. */

    size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);


    /* space for named tranches. */

    size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));


    /* space for name of each tranche. */

    for (i = 0; i < NamedLWLockTrancheRequests; i++)

        size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);


    return size;

}


/*

 * Allocate shmem space for the main LWLock array and all tranches and

 * initialize it.  We also register extension LWLock tranches here.

 */

void

CreateLWLocks(void)

{

    if (!IsUnderPostmaster)

    {

        Size        spaceLocks = LWLockShmemSize();

        int        *LWLockCounter;

        char       *ptr;


        /* Allocate space */

        ptr = (char *) ShmemAlloc(spaceLocks);


        /* Leave room for dynamic allocation of tranches */

        ptr += sizeof(int);


        /* Ensure desired alignment of LWLock array */

        ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;


        MainLWLockArray = (LWLockPadded *) ptr;


        /*

         * Initialize the dynamic-allocation counter for tranches, which is

         * stored just before the first LWLock.

         */

        LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));

        *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;


        /* Initialize all LWLocks */

        InitializeLWLocks();

    }


    /* Register named extension LWLock tranches in the current process. */

    for (int i = 0; i < NamedLWLockTrancheRequests; i++)

        LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,

                              NamedLWLockTrancheArray[i].trancheName);

}


/*

 * Initialize LWLocks that are fixed and those belonging to named tranches.

 */

static void

InitializeLWLocks(void)

{

    int         numNamedLocks = NumLWLocksForNamedTranches();

    int         id;

    int         i;

    int         j;

    LWLockPadded *lock;


    /* Initialize all individual LWLocks in main array */

    for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)

        LWLockInitialize(&lock->lock, id);


    /* Initialize buffer mapping LWLocks in main array */

    lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;

    for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)

        LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);


    /* Initialize lmgrs' LWLocks in main array */

    lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;

    for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)

        LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);


    /* Initialize predicate lmgrs' LWLocks in main array */

    lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;

    for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)

        LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);


    /*

     * Copy the info about any named tranches into shared memory (so that

     * other processes can see it), and initialize the requested LWLocks.

     */

    if (NamedLWLockTrancheRequests > 0)

    {

        char       *trancheNames;


        NamedLWLockTrancheArray = (NamedLWLockTranche *)

            &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];


        trancheNames = (char *) NamedLWLockTrancheArray +

            (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));

        lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];


        for (i = 0; i < NamedLWLockTrancheRequests; i++)

        {

            NamedLWLockTrancheRequest *request;

            NamedLWLockTranche *tranche;

            char       *name;


            request = &NamedLWLockTrancheRequestArray[i];

            tranche = &NamedLWLockTrancheArray[i];


            name = trancheNames;

            trancheNames += strlen(request->tranche_name) + 1;

            strcpy(name, request->tranche_name);

            tranche->trancheId = LWLockNewTrancheId();

            tranche->trancheName = name;


            for (j = 0; j < request->num_lwlocks; j++, lock++)

                LWLockInitialize(&lock->lock, tranche->trancheId);

        }

    }

}


/*

 * InitLWLockAccess - initialize backend-local state needed to hold LWLocks

 */

void

InitLWLockAccess(void)

{

#ifdef LWLOCK_STATS

    init_lwlock_stats();

#endif

}


/*

 * GetNamedLWLockTranche - returns the base address of LWLock from the

 *      specified tranche.

 *

 * Caller needs to retrieve the requested number of LWLocks starting from

 * the base lock address returned by this API.  This can be used for

 * tranches that are requested by using RequestNamedLWLockTranche() API.

 */

LWLockPadded *

GetNamedLWLockTranche(const char *tranche_name)

{

    int         lock_pos;

    int         i;


    /*

     * Obtain the position of base address of LWLock belonging to requested

     * tranche_name in MainLWLockArray.  LWLocks for named tranches are placed

     * in MainLWLockArray after fixed locks.

     */

    lock_pos = NUM_FIXED_LWLOCKS;

    for (i = 0; i < NamedLWLockTrancheRequests; i++)

    {

        if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,

                   tranche_name) == 0)

            return &MainLWLockArray[lock_pos];


        lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;

    }


    elog(ERROR, "requested tranche is not registered");


    /* just to keep compiler quiet */

    return NULL;

}


/*

 * Allocate a new tranche ID.

 */

int

LWLockNewTrancheId(void)

{

    int         result;

    int        *LWLockCounter;


    LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));

    /* We use the ShmemLock spinlock to protect LWLockCounter */

    SpinLockAcquire(ShmemLock);

    result = (*LWLockCounter)++;

    SpinLockRelease(ShmemLock);


    return result;

}


/*

 * Register a dynamic tranche name in the lookup table of the current process.

 *

 * This routine will save a pointer to the tranche name passed as an argument,

 * so the name should be allocated in a backend-lifetime context

 * (shared memory, TopMemoryContext, static constant, or similar).

 *

 * The tranche name will be user-visible as a wait event name, so try to

 * use a name that fits the style for those.

 */

void

LWLockRegisterTranche(int tranche_id, const char *tranche_name)

{

    /* This should only be called for user-defined tranches. */

    if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)

        return;


    /* Convert to array index. */

    tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;


    /* If necessary, create or enlarge array. */

    if (tranche_id >= LWLockTrancheNamesAllocated)

    {

        int         newalloc;


        newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));


        if (LWLockTrancheNames == NULL)

            LWLockTrancheNames = (const char **)

                MemoryContextAllocZero(TopMemoryContext,

                                       newalloc * sizeof(char *));

        else

            LWLockTrancheNames =

                repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);

        LWLockTrancheNamesAllocated = newalloc;

    }


    LWLockTrancheNames[tranche_id] = tranche_name;

}


/*

 * RequestNamedLWLockTranche

 *      Request that extra LWLocks be allocated during postmaster

 *      startup.

 *

 * This may only be called via the shmem_request_hook of a library that is

 * loaded into the postmaster via shared_preload_libraries.  Calls from

 * elsewhere will fail.

 *

 * The tranche name will be user-visible as a wait event name, so try to

 * use a name that fits the style for those.

 */

void

RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)

{

    NamedLWLockTrancheRequest *request;


    if (!process_shmem_requests_in_progress)

        elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");


    if (NamedLWLockTrancheRequestArray == NULL)

    {

        NamedLWLockTrancheRequestsAllocated = 16;

        NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)

            MemoryContextAlloc(TopMemoryContext,

                               NamedLWLockTrancheRequestsAllocated

                               * sizeof(NamedLWLockTrancheRequest));

    }


    if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)

    {

        int         i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);


        NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)

            repalloc(NamedLWLockTrancheRequestArray,

                     i * sizeof(NamedLWLockTrancheRequest));

        NamedLWLockTrancheRequestsAllocated = i;

    }


    request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];

    Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);

    strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);

    request->num_lwlocks = num_lwlocks;

    NamedLWLockTrancheRequests++;

}


/*

 * LWLockInitialize - initialize a new lwlock; it's initially unlocked

 */

void

LWLockInitialize(LWLock *lock, int tranche_id)

{

    pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);

#ifdef LOCK_DEBUG

    pg_atomic_init_u32(&lock->nwaiters, 0);

#endif

    lock->tranche = tranche_id;

    proclist_init(&lock->waiters);

}


/*

 * Report start of wait event for light-weight locks.

 *

 * This function will be used by all the light-weight lock calls which

 * needs to wait to acquire the lock.  This function distinguishes wait

 * event based on tranche and lock id.

 */

static inline void

LWLockReportWaitStart(LWLock *lock)

{

    pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);

}


/*

 * Report end of wait event for light-weight locks.

 */

static inline void

LWLockReportWaitEnd(void)

{

    pgstat_report_wait_end();

}


/*

 * Return the name of an LWLock tranche.

 */

static const char *

GetLWTrancheName(uint16 trancheId)

{

    /* Built-in tranche or individual LWLock? */

    if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)

        return BuiltinTrancheNames[trancheId];


    /*

     * It's an extension tranche, so look in LWLockTrancheNames[].  However,

     * it's possible that the tranche has never been registered in the current

     * process, in which case give up and return "extension".

     */

    trancheId -= LWTRANCHE_FIRST_USER_DEFINED;


    if (trancheId >= LWLockTrancheNamesAllocated ||

        LWLockTrancheNames[trancheId] == NULL)

        return "extension";


    return LWLockTrancheNames[trancheId];

}


/*

 * Return an identifier for an LWLock based on the wait class and event.

 */

const char *

GetLWLockIdentifier(uint32 classId, uint16 eventId)

{

    Assert(classId == PG_WAIT_LWLOCK);

    /* The event IDs are just tranche numbers. */

    return GetLWTrancheName(eventId);

}


/*

 * Internal function that tries to atomically acquire the lwlock in the passed

 * in mode.

 *

 * This function will not block waiting for a lock to become free - that's the

 * caller's job.

 *

 * Returns true if the lock isn't free and we need to wait.

 */

static bool

LWLockAttemptLock(LWLock *lock, LWLockMode mode)

{

    uint32      old_state;


    Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);


    /*

     * Read once outside the loop, later iterations will get the newer value

     * via compare & exchange.

     */

    old_state = pg_atomic_read_u32(&lock->state);


    /* loop until we've determined whether we could acquire the lock or not */

    while (true)

    {

        uint32      desired_state;

        bool        lock_free;


        desired_state = old_state;


        if (mode == LW_EXCLUSIVE)

        {

            lock_free = (old_state & LW_LOCK_MASK) == 0;

            if (lock_free)

                desired_state += LW_VAL_EXCLUSIVE;

        }

        else

        {

            lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;

            if (lock_free)

                desired_state += LW_VAL_SHARED;

        }


        /*

         * Attempt to swap in the state we are expecting. If we didn't see

         * lock to be free, that's just the old value. If we saw it as free,

         * we'll attempt to mark it acquired. The reason that we always swap

         * in the value is that this doubles as a memory barrier. We could try

         * to be smarter and only swap in values if we saw the lock as free,

         * but benchmark haven't shown it as beneficial so far.

         *

         * Retry if the value changed since we last looked at it.

         */

        if (pg_atomic_compare_exchange_u32(&lock->state,

                                           &old_state, desired_state))

        {

            if (lock_free)

            {

                /* Great! Got the lock. */

#ifdef LOCK_DEBUG

                if (mode == LW_EXCLUSIVE)

                    lock->owner = MyProc;

#endif

                return false;

            }

            else

                return true;    /* somebody else has the lock */

        }

    }

    pg_unreachable();

}


/*

 * Lock the LWLock's wait list against concurrent activity.

 *

 * NB: even though the wait list is locked, non-conflicting lock operations

 * may still happen concurrently.

 *

 * Time spent holding mutex should be short!

 */

static void

LWLockWaitListLock(LWLock *lock)

{

    uint32      old_state;

#ifdef LWLOCK_STATS

    lwlock_stats *lwstats;

    uint32      delays = 0;


    lwstats = get_lwlock_stats_entry(lock);

#endif


    while (true)

    {

        /* always try once to acquire lock directly */

        old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);

        if (!(old_state & LW_FLAG_LOCKED))

            break;              /* got lock */


        /* and then spin without atomic operations until lock is released */

        {

            SpinDelayStatus delayStatus;


            init_local_spin_delay(&delayStatus);


            while (old_state & LW_FLAG_LOCKED)

            {

                perform_spin_delay(&delayStatus);

                old_state = pg_atomic_read_u32(&lock->state);

            }

#ifdef LWLOCK_STATS

            delays += delayStatus.delays;

#endif

            finish_spin_delay(&delayStatus);

        }


        /*

         * Retry. The lock might obviously already be re-acquired by the time

         * we're attempting to get it again.

         */

    }


#ifdef LWLOCK_STATS

    lwstats->spin_delay_count += delays;

#endif

}


/*

 * Unlock the LWLock's wait list.

 *

 * Note that it can be more efficient to manipulate flags and release the

 * locks in a single atomic operation.

 */

static void

LWLockWaitListUnlock(LWLock *lock)

{

    uint32      old_state PG_USED_FOR_ASSERTS_ONLY;


    old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);


    Assert(old_state & LW_FLAG_LOCKED);

}


/*

 * Wakeup all the lockers that currently have a chance to acquire the lock.

 */

static void

LWLockWakeup(LWLock *lock)

{

    bool        new_release_ok;

    bool        wokeup_somebody = false;

    proclist_head wakeup;

    proclist_mutable_iter iter;


    proclist_init(&wakeup);


    new_release_ok = true;


    /* lock wait list while collecting backends to wake up */

    LWLockWaitListLock(lock);


    proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)

    {

        PGPROC     *waiter = GetPGProcByNumber(iter.cur);


        if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)

            continue;


        proclist_delete(&lock->waiters, iter.cur, lwWaitLink);

        proclist_push_tail(&wakeup, iter.cur, lwWaitLink);


        if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)

        {

            /*

             * Prevent additional wakeups until retryer gets to run. Backends

             * that are just waiting for the lock to become free don't retry

             * automatically.

             */

            new_release_ok = false;


            /*

             * Don't wakeup (further) exclusive locks.

             */

            wokeup_somebody = true;

        }


        /*

         * Signal that the process isn't on the wait list anymore. This allows

         * LWLockDequeueSelf() to remove itself of the waitlist with a

         * proclist_delete(), rather than having to check if it has been

         * removed from the list.

         */

        Assert(waiter->lwWaiting == LW_WS_WAITING);

        waiter->lwWaiting = LW_WS_PENDING_WAKEUP;


        /*

         * Once we've woken up an exclusive lock, there's no point in waking

         * up anybody else.

         */

        if (waiter->lwWaitMode == LW_EXCLUSIVE)

            break;

    }


    Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);


    /* unset required flags, and release lock, in one fell swoop */

    {

        uint32      old_state;

        uint32      desired_state;


        old_state = pg_atomic_read_u32(&lock->state);

        while (true)

        {

            desired_state = old_state;


            /* compute desired flags */


            if (new_release_ok)

                desired_state |= LW_FLAG_RELEASE_OK;

            else

                desired_state &= ~LW_FLAG_RELEASE_OK;


            if (proclist_is_empty(&wakeup))

                desired_state &= ~LW_FLAG_HAS_WAITERS;


            desired_state &= ~LW_FLAG_LOCKED;   /* release lock */


            if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,

                                               desired_state))

                break;

        }

    }


    /* Awaken any waiters I removed from the queue. */

    proclist_foreach_modify(iter, &wakeup, lwWaitLink)

    {

        PGPROC     *waiter = GetPGProcByNumber(iter.cur);


        LOG_LWDEBUG("LWLockRelease", lock, "release waiter");

        proclist_delete(&wakeup, iter.cur, lwWaitLink);


        /*

         * Guarantee that lwWaiting being unset only becomes visible once the

         * unlink from the link has completed. Otherwise the target backend

         * could be woken up for other reason and enqueue for a new lock - if

         * that happens before the list unlink happens, the list would end up

         * being corrupted.

         *

         * The barrier pairs with the LWLockWaitListLock() when enqueuing for

         * another lock.

         */

        pg_write_barrier();

        waiter->lwWaiting = LW_WS_NOT_WAITING;

        PGSemaphoreUnlock(waiter->sem);

    }

}


/*

 * Add ourselves to the end of the queue.

 *

 * NB: Mode can be LW_WAIT_UNTIL_FREE here!

 */

static void

LWLockQueueSelf(LWLock *lock, LWLockMode mode)

{

    /*

     * If we don't have a PGPROC structure, there's no way to wait. This

     * should never occur, since MyProc should only be null during shared

     * memory initialization.

     */

    if (MyProc == NULL)

        elog(PANIC, "cannot wait without a PGPROC structure");


    if (MyProc->lwWaiting != LW_WS_NOT_WAITING)

        elog(PANIC, "queueing for lock while waiting on another one");


    LWLockWaitListLock(lock);


    /* setting the flag is protected by the spinlock */

    pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);


    MyProc->lwWaiting = LW_WS_WAITING;

    MyProc->lwWaitMode = mode;


    /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */

    if (mode == LW_WAIT_UNTIL_FREE)

        proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);

    else

        proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);


    /* Can release the mutex now */

    LWLockWaitListUnlock(lock);


#ifdef LOCK_DEBUG

    pg_atomic_fetch_add_u32(&lock->nwaiters, 1);

#endif

}


/*

 * Remove ourselves from the waitlist.

 *

 * This is used if we queued ourselves because we thought we needed to sleep

 * but, after further checking, we discovered that we don't actually need to

 * do so.

 */

static void

LWLockDequeueSelf(LWLock *lock)

{

    bool        on_waitlist;


#ifdef LWLOCK_STATS

    lwlock_stats *lwstats;


    lwstats = get_lwlock_stats_entry(lock);


    lwstats->dequeue_self_count++;

#endif


    LWLockWaitListLock(lock);


    /*

     * Remove ourselves from the waitlist, unless we've already been removed.

     * The removal happens with the wait list lock held, so there's no race in

     * this check.

     */

    on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;

    if (on_waitlist)

        proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);


    if (proclist_is_empty(&lock->waiters) &&

        (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)

    {

        pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);

    }


    /* XXX: combine with fetch_and above? */

    LWLockWaitListUnlock(lock);


    /* clear waiting state again, nice for debugging */

    if (on_waitlist)

        MyProc->lwWaiting = LW_WS_NOT_WAITING;

    else

    {

        int         extraWaits = 0;


        /*

         * Somebody else dequeued us and has or will wake us up. Deal with the

         * superfluous absorption of a wakeup.

         */


        /*

         * Reset RELEASE_OK flag if somebody woke us before we removed

         * ourselves - they'll have set it to false.

         */

        pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);


        /*

         * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would

         * get reset at some inconvenient point later. Most of the time this

         * will immediately return.

         */

        for (;;)

        {

            PGSemaphoreLock(MyProc->sem);

            if (MyProc->lwWaiting == LW_WS_NOT_WAITING)

                break;

            extraWaits++;

        }


        /*

         * Fix the process wait semaphore's count for any absorbed wakeups.

         */

        while (extraWaits-- > 0)

            PGSemaphoreUnlock(MyProc->sem);

    }


#ifdef LOCK_DEBUG

    {

        /* not waiting anymore */

        uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);


        Assert(nwaiters < MAX_BACKENDS);

    }

#endif

}


/*

 * LWLockAcquire - acquire a lightweight lock in the specified mode

 *

 * If the lock is not available, sleep until it is.  Returns true if the lock

 * was available immediately, false if we had to sleep.

 *

 * Side effect: cancel/die interrupts are held off until lock release.

 */

bool

LWLockAcquire(LWLock *lock, LWLockMode mode)

{

    PGPROC     *proc = MyProc;

    bool        result = true;

    int         extraWaits = 0;

#ifdef LWLOCK_STATS

    lwlock_stats *lwstats;


    lwstats = get_lwlock_stats_entry(lock);

#endif


    Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);


    PRINT_LWDEBUG("LWLockAcquire", lock, mode);


#ifdef LWLOCK_STATS

    /* Count lock acquisition attempts */

    if (mode == LW_EXCLUSIVE)

        lwstats->ex_acquire_count++;

    else

        lwstats->sh_acquire_count++;

#endif                          /* LWLOCK_STATS */


    /*

     * We can't wait if we haven't got a PGPROC.  This should only occur

     * during bootstrap or shared memory initialization.  Put an Assert here

     * to catch unsafe coding practices.

     */

    Assert(!(proc == NULL && IsUnderPostmaster));


    /* Ensure we will have room to remember the lock */

    if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)

        elog(ERROR, "too many LWLocks taken");


    /*

     * Lock out cancel/die interrupts until we exit the code section protected

     * by the LWLock.  This ensures that interrupts will not interfere with

     * manipulations of data structures in shared memory.

     */

    HOLD_INTERRUPTS();


    /*

     * Loop here to try to acquire lock after each time we are signaled by

     * LWLockRelease.

     *

     * NOTE: it might seem better to have LWLockRelease actually grant us the

     * lock, rather than retrying and possibly having to go back to sleep. But

     * in practice that is no good because it means a process swap for every

     * lock acquisition when two or more processes are contending for the same

     * lock.  Since LWLocks are normally used to protect not-very-long

     * sections of computation, a process needs to be able to acquire and

     * release the same lock many times during a single CPU time slice, even

     * in the presence of contention.  The efficiency of being able to do that

     * outweighs the inefficiency of sometimes wasting a process dispatch

     * cycle because the lock is not free when a released waiter finally gets

     * to run.  See pgsql-hackers archives for 29-Dec-01.

     */

    for (;;)

    {

        bool        mustwait;


        /*

         * Try to grab the lock the first time, we're not in the waitqueue

         * yet/anymore.

         */

        mustwait = LWLockAttemptLock(lock, mode);


        if (!mustwait)

        {

            LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");

            break;              /* got the lock */

        }


        /*

         * Ok, at this point we couldn't grab the lock on the first try. We

         * cannot simply queue ourselves to the end of the list and wait to be

         * woken up because by now the lock could long have been released.

         * Instead add us to the queue and try to grab the lock again. If we

         * succeed we need to revert the queuing and be happy, otherwise we

         * recheck the lock. If we still couldn't grab it, we know that the

         * other locker will see our queue entries when releasing since they

         * existed before we checked for the lock.

         */


        /* add to the queue */

        LWLockQueueSelf(lock, mode);


        /* we're now guaranteed to be woken up if necessary */

        mustwait = LWLockAttemptLock(lock, mode);


        /* ok, grabbed the lock the second time round, need to undo queueing */

        if (!mustwait)

        {

            LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");


            LWLockDequeueSelf(lock);

            break;

        }


        /*

         * Wait until awakened.

         *

         * It is possible that we get awakened for a reason other than being

         * signaled by LWLockRelease.  If so, loop back and wait again.  Once

         * we've gotten the LWLock, re-increment the sema by the number of

         * additional signals received.

         */

        LOG_LWDEBUG("LWLockAcquire", lock, "waiting");


#ifdef LWLOCK_STATS

        lwstats->block_count++;

#endif


        LWLockReportWaitStart(lock);

        if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())

            TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);


        for (;;)

        {

            PGSemaphoreLock(proc->sem);

            if (proc->lwWaiting == LW_WS_NOT_WAITING)

                break;

            extraWaits++;

        }


        /* Retrying, allow LWLockRelease to release waiters again. */

        pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);


#ifdef LOCK_DEBUG

        {

            /* not waiting anymore */

            uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);


            Assert(nwaiters < MAX_BACKENDS);

        }

#endif


        if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())

            TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);

        LWLockReportWaitEnd();


        LOG_LWDEBUG("LWLockAcquire", lock, "awakened");


        /* Now loop back and try to acquire lock again. */

        result = false;

    }


    if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())

        TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);


    /* Add lock to list of locks held by this backend */

    held_lwlocks[num_held_lwlocks].lock = lock;

    held_lwlocks[num_held_lwlocks++].mode = mode;


    /*

     * Fix the process wait semaphore's count for any absorbed wakeups.

     */

    while (extraWaits-- > 0)

        PGSemaphoreUnlock(proc->sem);


    return result;

}


/*

 * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode

 *

 * If the lock is not available, return false with no side-effects.

 *

 * If successful, cancel/die interrupts are held off until lock release.

 */

bool

LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)

{

    bool        mustwait;


    Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);


    PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);


    /* Ensure we will have room to remember the lock */

    if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)

        elog(ERROR, "too many LWLocks taken");


    /*

     * Lock out cancel/die interrupts until we exit the code section protected

     * by the LWLock.  This ensures that interrupts will not interfere with

     * manipulations of data structures in shared memory.

     */

    HOLD_INTERRUPTS();


    /* Check for the lock */

    mustwait = LWLockAttemptLock(lock, mode);


    if (mustwait)

    {

        /* Failed to get lock, so release interrupt holdoff */

        RESUME_INTERRUPTS();


        LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");

        if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())

            TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);

    }

    else

    {

        /* Add lock to list of locks held by this backend */

        held_lwlocks[num_held_lwlocks].lock = lock;

        held_lwlocks[num_held_lwlocks++].mode = mode;

        if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())

            TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);

    }

    return !mustwait;

}


/*

 * LWLockAcquireOrWait - Acquire lock, or wait until it's free

 *

 * The semantics of this function are a bit funky.  If the lock is currently

 * free, it is acquired in the given mode, and the function returns true.  If

 * the lock isn't immediately free, the function waits until it is released

 * and returns false, but does not acquire the lock.

 *

 * This is currently used for WALWriteLock: when a backend flushes the WAL,

 * holding WALWriteLock, it can flush the commit records of many other

 * backends as a side-effect.  Those other backends need to wait until the

 * flush finishes, but don't need to acquire the lock anymore.  They can just

 * wake up, observe that their records have already been flushed, and return.

 */

bool

LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)

{

    PGPROC     *proc = MyProc;

    bool        mustwait;

    int         extraWaits = 0;

#ifdef LWLOCK_STATS

    lwlock_stats *lwstats;


    lwstats = get_lwlock_stats_entry(lock);

#endif


    Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);


    PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);


    /* Ensure we will have room to remember the lock */

    if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)

        elog(ERROR, "too many LWLocks taken");


    /*

     * Lock out cancel/die interrupts until we exit the code section protected

     * by the LWLock.  This ensures that interrupts will not interfere with

     * manipulations of data structures in shared memory.

     */

    HOLD_INTERRUPTS();


    /*

     * NB: We're using nearly the same twice-in-a-row lock acquisition

     * protocol as LWLockAcquire(). Check its comments for details.

     */

    mustwait = LWLockAttemptLock(lock, mode);


    if (mustwait)

    {

        LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);


        mustwait = LWLockAttemptLock(lock, mode);


        if (mustwait)

        {

            /*

             * Wait until awakened.  Like in LWLockAcquire, be prepared for

             * bogus wakeups.

             */

            LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");


#ifdef LWLOCK_STATS

            lwstats->block_count++;

#endif


            LWLockReportWaitStart(lock);

            if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())

                TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);


            for (;;)

            {

                PGSemaphoreLock(proc->sem);

                if (proc->lwWaiting == LW_WS_NOT_WAITING)

                    break;

                extraWaits++;

            }


#ifdef LOCK_DEBUG

            {

                /* not waiting anymore */

                uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);


                Assert(nwaiters < MAX_BACKENDS);

            }

#endif

            if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())

                TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);

            LWLockReportWaitEnd();


            LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");

        }

        else

        {

            LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");


            /*

             * Got lock in the second attempt, undo queueing. We need to treat

             * this as having successfully acquired the lock, otherwise we'd

             * not necessarily wake up people we've prevented from acquiring

             * the lock.

             */

            LWLockDequeueSelf(lock);

        }

    }


    /*

     * Fix the process wait semaphore's count for any absorbed wakeups.

     */

    while (extraWaits-- > 0)

        PGSemaphoreUnlock(proc->sem);


    if (mustwait)

    {

        /* Failed to get lock, so release interrupt holdoff */

        RESUME_INTERRUPTS();

        LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");

        if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())

            TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);

    }

    else

    {

        LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");

        /* Add lock to list of locks held by this backend */

        held_lwlocks[num_held_lwlocks].lock = lock;

        held_lwlocks[num_held_lwlocks++].mode = mode;

        if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())

            TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);

    }


    return !mustwait;

}


/*

 * Does the lwlock in its current state need to wait for the variable value to

 * change?

 *

 * If we don't need to wait, and it's because the value of the variable has

 * changed, store the current value in newval.

 *

 * *result is set to true if the lock was free, and false otherwise.

 */

static bool

LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,

                       uint64 *newval, bool *result)

{

    bool        mustwait;

    uint64      value;


    /*

     * Test first to see if it the slot is free right now.

     *

     * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()

     * via LWLockWaitForVar(), uses an implied barrier with a spinlock before

     * this, so we don't need a memory barrier here as far as the current

     * usage is concerned.  But that might not be safe in general.

     */

    mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;


    if (!mustwait)

    {

        *result = true;

        return false;

    }


    *result = false;


    /*

     * Reading this value atomically is safe even on platforms where uint64

     * cannot be read without observing a torn value.

     */

    value = pg_atomic_read_u64(valptr);


    if (value != oldval)

    {

        mustwait = false;

        *newval = value;

    }

    else

    {

        mustwait = true;

    }


    return mustwait;

}


/*

 * LWLockWaitForVar - Wait until lock is free, or a variable is updated.

 *

 * If the lock is held and *valptr equals oldval, waits until the lock is

 * either freed, or the lock holder updates *valptr by calling

 * LWLockUpdateVar.  If the lock is free on exit (immediately or after

 * waiting), returns true.  If the lock is still held, but *valptr no longer

 * matches oldval, returns false and sets *newval to the current value in

 * *valptr.

 *

 * Note: this function ignores shared lock holders; if the lock is held

 * in shared mode, returns 'true'.

 *

 * Be aware that LWLockConflictsWithVar() does not include a memory barrier,

 * hence the caller of this function may want to rely on an explicit barrier or

 * an implied barrier via spinlock or LWLock to avoid memory ordering issues.

 */

bool

LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,

                 uint64 *newval)

{

    PGPROC     *proc = MyProc;

    int         extraWaits = 0;

    bool        result = false;

#ifdef LWLOCK_STATS

    lwlock_stats *lwstats;


    lwstats = get_lwlock_stats_entry(lock);

#endif


    PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);


    /*

     * Lock out cancel/die interrupts while we sleep on the lock.  There is no

     * cleanup mechanism to remove us from the wait queue if we got

     * interrupted.

     */

    HOLD_INTERRUPTS();


    /*

     * Loop here to check the lock's status after each time we are signaled.

     */

    for (;;)

    {

        bool        mustwait;


        mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,

                                          &result);


        if (!mustwait)

            break;              /* the lock was free or value didn't match */


        /*

         * Add myself to wait queue. Note that this is racy, somebody else

         * could wakeup before we're finished queuing. NB: We're using nearly

         * the same twice-in-a-row lock acquisition protocol as

         * LWLockAcquire(). Check its comments for details. The only

         * difference is that we also have to check the variable's values when

         * checking the state of the lock.

         */

        LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);


        /*

         * Set RELEASE_OK flag, to make sure we get woken up as soon as the

         * lock is released.

         */

        pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);


        /*

         * We're now guaranteed to be woken up if necessary. Recheck the lock

         * and variables state.

         */

        mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,

                                          &result);


        /* Ok, no conflict after we queued ourselves. Undo queueing. */

        if (!mustwait)

        {

            LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");


            LWLockDequeueSelf(lock);

            break;

        }


        /*

         * Wait until awakened.

         *

         * It is possible that we get awakened for a reason other than being

         * signaled by LWLockRelease.  If so, loop back and wait again.  Once

         * we've gotten the LWLock, re-increment the sema by the number of

         * additional signals received.

         */

        LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");


#ifdef LWLOCK_STATS

        lwstats->block_count++;

#endif


        LWLockReportWaitStart(lock);

        if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())

            TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);


        for (;;)

        {

            PGSemaphoreLock(proc->sem);

            if (proc->lwWaiting == LW_WS_NOT_WAITING)

                break;

            extraWaits++;

        }


#ifdef LOCK_DEBUG

        {

            /* not waiting anymore */

            uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);


            Assert(nwaiters < MAX_BACKENDS);

        }

#endif


        if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())

            TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);

        LWLockReportWaitEnd();


        LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");


        /* Now loop back and check the status of the lock again. */

    }


    /*

     * Fix the process wait semaphore's count for any absorbed wakeups.

     */

    while (extraWaits-- > 0)

        PGSemaphoreUnlock(proc->sem);


    /*

     * Now okay to allow cancel/die interrupts.

     */

    RESUME_INTERRUPTS();


    return result;

}


/*

 * LWLockUpdateVar - Update a variable and wake up waiters atomically

 *

 * Sets *valptr to 'val', and wakes up all processes waiting for us with

 * LWLockWaitForVar().  It first sets the value atomically and then wakes up

 * waiting processes so that any process calling LWLockWaitForVar() on the same

 * lock is guaranteed to see the new value, and act accordingly.

 *

 * The caller must be holding the lock in exclusive mode.

 */

void

LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)

{

    proclist_head wakeup;

    proclist_mutable_iter iter;


    PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);


    /*

     * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed

     * that the variable is updated before waking up waiters.

     */

    pg_atomic_exchange_u64(valptr, val);


    proclist_init(&wakeup);


    LWLockWaitListLock(lock);


    Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);


    /*

     * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken

     * up. They are always in the front of the queue.

     */

    proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)

    {

        PGPROC     *waiter = GetPGProcByNumber(iter.cur);


        if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)

            break;


        proclist_delete(&lock->waiters, iter.cur, lwWaitLink);

        proclist_push_tail(&wakeup, iter.cur, lwWaitLink);


        /* see LWLockWakeup() */

        Assert(waiter->lwWaiting == LW_WS_WAITING);

        waiter->lwWaiting = LW_WS_PENDING_WAKEUP;

    }


    /* We are done updating shared state of the lock itself. */

    LWLockWaitListUnlock(lock);


    /*

     * Awaken any waiters I removed from the queue.

     */

    proclist_foreach_modify(iter, &wakeup, lwWaitLink)

    {

        PGPROC     *waiter = GetPGProcByNumber(iter.cur);


        proclist_delete(&wakeup, iter.cur, lwWaitLink);

        /* check comment in LWLockWakeup() about this barrier */

        pg_write_barrier();

        waiter->lwWaiting = LW_WS_NOT_WAITING;

        PGSemaphoreUnlock(waiter->sem);

    }

}


/*

 * Stop treating lock as held by current backend.

 *

 * This is the code that can be shared between actually releasing a lock

 * (LWLockRelease()) and just not tracking ownership of the lock anymore

 * without releasing the lock (LWLockDisown()).

 *

 * Returns the mode in which the lock was held by the current backend.

 *

 * NB: This does not call RESUME_INTERRUPTS(), but leaves that responsibility

 * of the caller.

 *

 * NB: This will leave lock->owner pointing to the current backend (if

 * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to

 * debug cases of missing wakeups during lock release.

 */

static inline LWLockMode

LWLockDisownInternal(LWLock *lock)

{

    LWLockMode  mode;

    int         i;


    /*

     * Remove lock from list of locks held.  Usually, but not always, it will

     * be the latest-acquired lock; so search array backwards.

     */

    for (i = num_held_lwlocks; --i >= 0;)

        if (lock == held_lwlocks[i].lock)

            break;


    if (i < 0)

        elog(ERROR, "lock %s is not held", T_NAME(lock));


    mode = held_lwlocks[i].mode;


    num_held_lwlocks--;

    for (; i < num_held_lwlocks; i++)

        held_lwlocks[i] = held_lwlocks[i + 1];


    return mode;

}


/*

 * Helper function to release lock, shared between LWLockRelease() and

 * LWLockReleaseDisowned().

 */

static void

LWLockReleaseInternal(LWLock *lock, LWLockMode mode)

{

    uint32      oldstate;

    bool        check_waiters;


    /*

     * Release my hold on lock, after that it can immediately be acquired by

     * others, even if we still have to wakeup other waiters.

     */

    if (mode == LW_EXCLUSIVE)

        oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);

    else

        oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);


    /* nobody else can have that kind of lock */

    Assert(!(oldstate & LW_VAL_EXCLUSIVE));


    if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())

        TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));


    /*

     * We're still waiting for backends to get scheduled, don't wake them up

     * again.

     */

    if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==

        (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&

        (oldstate & LW_LOCK_MASK) == 0)

        check_waiters = true;

    else

        check_waiters = false;


    /*

     * As waking up waiters requires the spinlock to be acquired, only do so

     * if necessary.

     */

    if (check_waiters)

    {

        /* XXX: remove before commit? */

        LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");

        LWLockWakeup(lock);

    }

}


/*

 * Stop treating lock as held by current backend.

 *

 * After calling this function it's the callers responsibility to ensure that

 * the lock gets released (via LWLockReleaseDisowned()), even in case of an

 * error. This only is desirable if the lock is going to be released in a

 * different process than the process that acquired it.

 */

void

LWLockDisown(LWLock *lock)

{

    LWLockDisownInternal(lock);


    RESUME_INTERRUPTS();

}


/*

 * LWLockRelease - release a previously acquired lock

 */

void

LWLockRelease(LWLock *lock)

{

    LWLockMode  mode;


    mode = LWLockDisownInternal(lock);


    PRINT_LWDEBUG("LWLockRelease", lock, mode);


    LWLockReleaseInternal(lock, mode);


    /*

     * Now okay to allow cancel/die interrupts.

     */

    RESUME_INTERRUPTS();

}


/*

 * Release lock previously disowned with LWLockDisown().

 */

void

LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)

{

    LWLockReleaseInternal(lock, mode);

}


/*

 * LWLockReleaseClearVar - release a previously acquired lock, reset variable

 */

void

LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)

{

    /*

     * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed

     * that the variable is updated before releasing the lock.

     */

    pg_atomic_exchange_u64(valptr, val);


    LWLockRelease(lock);

}


/*

 * LWLockReleaseAll - release all currently-held locks

 *

 * Used to clean up after ereport(ERROR). An important difference between this

 * function and retail LWLockRelease calls is that InterruptHoldoffCount is

 * unchanged by this operation.  This is necessary since InterruptHoldoffCount

 * has been set to an appropriate level earlier in error recovery. We could

 * decrement it below zero if we allow it to drop for each released lock!

 */

void

LWLockReleaseAll(void)

{

    while (num_held_lwlocks > 0)

    {

        HOLD_INTERRUPTS();      /* match the upcoming RESUME_INTERRUPTS */


        LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);

    }

}


/*

 * ForEachLWLockHeldByMe - run a callback for each held lock

 *

 * This is meant as debug support only.

 */

void

ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *),

                      void *context)

{

    int         i;


    for (i = 0; i < num_held_lwlocks; i++)

        callback(held_lwlocks[i].lock, held_lwlocks[i].mode, context);

}


/*

 * LWLockHeldByMe - test whether my process holds a lock in any mode

 *

 * This is meant as debug support only.

 */

bool

LWLockHeldByMe(LWLock *lock)

{

    int         i;


    for (i = 0; i < num_held_lwlocks; i++)

    {

        if (held_lwlocks[i].lock == lock)

            return true;

    }

    return false;

}


/*

 * LWLockAnyHeldByMe - test whether my process holds any of an array of locks

 *

 * This is meant as debug support only.

 */

bool

LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)

{

    char       *held_lock_addr;

    char       *begin;

    char       *end;

    int         i;


    begin = (char *) lock;

    end = begin + nlocks * stride;

    for (i = 0; i < num_held_lwlocks; i++)

    {

        held_lock_addr = (char *) held_lwlocks[i].lock;

        if (held_lock_addr >= begin &&

            held_lock_addr < end &&

            (held_lock_addr - begin) % stride == 0)

            return true;

    }

    return false;

}


/*

 * LWLockHeldByMeInMode - test whether my process holds a lock in given mode

 *

 * This is meant as debug support only.

 */

bool

LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)

{

    int         i;


    for (i = 0; i < num_held_lwlocks; i++)

    {

        if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)

            return true;

    }

    return false;

}

pg_atomic_fetch_and_u32
static uint32 pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_)
Definition: atomics.h:396

pg_atomic_compare_exchange_u32
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:349

pg_atomic_fetch_or_u32
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:410

pg_atomic_sub_fetch_u32
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:439

pg_atomic_fetch_sub_u32
static uint32 pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:381

pg_atomic_init_u32
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:221

pg_write_barrier
#define pg_write_barrier()
Definition: atomics.h:157

pg_atomic_fetch_add_u32
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:366

pg_atomic_read_u32
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:239

pg_atomic_read_u64
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:467

pg_atomic_exchange_u64
static uint64 pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval)
Definition: atomics.h:503

PG_USED_FOR_ASSERTS_ONLY
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:224

Max
#define Max(x, y)
Definition: c.h:969

uint64
uint64_t uint64
Definition: c.h:503

uint16
uint16_t uint16
Definition: c.h:501

pg_unreachable
#define pg_unreachable()
Definition: c.h:332

uint32
uint32_t uint32
Definition: c.h:502

lengthof
#define lengthof(array)
Definition: c.h:759

MemSet
#define MemSet(start, val, len)
Definition: c.h:991

Size
size_t Size
Definition: c.h:576

fprintf
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21

hash_search
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:956

hash_seq_search
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1421

hash_create
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
Definition: dynahash.c:352

hash_seq_init
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1386

errmsg_internal
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158

errhidestmt
int errhidestmt(bool hide_stmt)
Definition: elog.c:1433

errhidecontext
int errhidecontext(bool hide_ctx)
Definition: elog.c:1452

LOG
#define LOG
Definition: elog.h:31

FATAL
#define FATAL
Definition: elog.h:41

PANIC
#define PANIC
Definition: elog.h:42

ERROR
#define ERROR
Definition: elog.h:39

elog
#define elog(elevel,...)
Definition: elog.h:225

ereport
#define ereport(elevel,...)
Definition: elog.h:149

MyProcPid
int MyProcPid
Definition: globals.c:47

MyProcNumber
ProcNumber MyProcNumber
Definition: globals.c:90

IsUnderPostmaster
bool IsUnderPostmaster
Definition: globals.c:120

newval
#define newval

Assert
Assert(PointerIsAligned(start, uint64))

hsearch.h

HASH_ENTER
@ HASH_ENTER
Definition: hsearch.h:114

HASH_CONTEXT
#define HASH_CONTEXT
Definition: hsearch.h:102

HASH_ELEM
#define HASH_ELEM
Definition: hsearch.h:95

HASH_BLOBS
#define HASH_BLOBS
Definition: hsearch.h:97

val
long val
Definition: informix.c:689

value
static struct @165 value

on_shmem_exit
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365

j
int j
Definition: isn.c:78

i
int i
Definition: isn.c:77

LW_VAL_EXCLUSIVE
#define LW_VAL_EXCLUSIVE
Definition: lwlock.c:101

LWLockUpdateVar
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1732

LWLockWakeup
static void LWLockWakeup(LWLock *lock)
Definition: lwlock.c:932

LW_FLAG_LOCKED
#define LW_FLAG_LOCKED
Definition: lwlock.c:96

LWLockHeldByMe
bool LWLockHeldByMe(LWLock *lock)
Definition: lwlock.c:1983

held_lwlocks
static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS]
Definition: lwlock.c:218

LWLockTrancheNamesAllocated
static int LWLockTrancheNamesAllocated
Definition: lwlock.c:193

LWLockReleaseClearVar
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1929

LWLockAcquire
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1180

LWLockReleaseInternal
static void LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1836

CreateLWLocks
void CreateLWLocks(void)
Definition: lwlock.c:462

NamedLWLockTrancheArray
NamedLWLockTranche * NamedLWLockTrancheArray
Definition: lwlock.c:239

LWLockDisown
void LWLockDisown(LWLock *lock)
Definition: lwlock.c:1889

LWLockDisownInternal
static LWLockMode LWLockDisownInternal(LWLock *lock)
Definition: lwlock.c:1806

LW_VAL_SHARED
#define LW_VAL_SHARED
Definition: lwlock.c:102

LWLockAttemptLock
static bool LWLockAttemptLock(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:796

LWLockWaitListLock
static void LWLockWaitListLock(LWLock *lock)
Definition: lwlock.c:867

LWLockRegisterTranche
void LWLockRegisterTranche(int tranche_id, const char *tranche_name)
Definition: lwlock.c:640

GetNamedLWLockTranche
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:585

LWLockHeldByMeInMode
bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:2027

LWLockReportWaitEnd
static void LWLockReportWaitEnd(void)
Definition: lwlock.c:746

LWLockHandle
struct LWLockHandle LWLockHandle

LWLockWaitForVar
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1596

LWLockNewTrancheId
int LWLockNewTrancheId(void)
Definition: lwlock.c:615

GetLWTrancheName
static const char * GetLWTrancheName(uint16 trancheId)
Definition: lwlock.c:755

LW_LOCK_MASK
#define LW_LOCK_MASK
Definition: lwlock.c:106

NamedLWLockTrancheRequests
int NamedLWLockTrancheRequests
Definition: lwlock.c:236

RequestNamedLWLockTranche
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:682

LW_FLAG_RELEASE_OK
#define LW_FLAG_RELEASE_OK
Definition: lwlock.c:95

LW_FLAG_MASK
#define LW_FLAG_MASK
Definition: lwlock.c:98

LW_FLAG_HAS_WAITERS
#define LW_FLAG_HAS_WAITERS
Definition: lwlock.c:94

MAX_SIMUL_LWLOCKS
#define MAX_SIMUL_LWLOCKS
Definition: lwlock.c:208

NamedLWLockTrancheRequest
struct NamedLWLockTrancheRequest NamedLWLockTrancheRequest

NumLWLocksForNamedTranches
static int NumLWLocksForNamedTranches(void)
Definition: lwlock.c:417

LWLockRelease
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1900

T_NAME
#define T_NAME(lock)
Definition: lwlock.c:246

num_held_lwlocks
static int num_held_lwlocks
Definition: lwlock.c:217

LWLockReleaseAll
void LWLockReleaseAll(void)
Definition: lwlock.c:1951

InitializeLWLocks
static void InitializeLWLocks(void)
Definition: lwlock.c:502

LWLockInitialize
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:719

NamedLWLockTrancheRequestsAllocated
static int NamedLWLockTrancheRequestsAllocated
Definition: lwlock.c:228

BuiltinTrancheNames
static const char *const BuiltinTrancheNames[]
Definition: lwlock.c:136

NamedLWLockTrancheRequestArray
static NamedLWLockTrancheRequest * NamedLWLockTrancheRequestArray
Definition: lwlock.c:227

LWLockReleaseDisowned
void LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1920

LWLockWaitListUnlock
static void LWLockWaitListUnlock(LWLock *lock)
Definition: lwlock.c:919

LWLockTrancheNames
static const char ** LWLockTrancheNames
Definition: lwlock.c:192

LOG_LWDEBUG
#define LOG_LWDEBUG(a, b, c)
Definition: lwlock.c:311

LWLockConditionalAcquire
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1351

LWLockAcquireOrWait
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1408

LWLockQueueSelf
static void LWLockQueueSelf(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1048

PRINT_LWDEBUG
#define PRINT_LWDEBUG(a, b, c)
Definition: lwlock.c:310

LWLockReportWaitStart
static void LWLockReportWaitStart(LWLock *lock)
Definition: lwlock.c:737

MainLWLockArray
LWLockPadded * MainLWLockArray
Definition: lwlock.c:200

StaticAssertDecl
StaticAssertDecl(((MAX_BACKENDS+1) &MAX_BACKENDS)==0, "MAX_BACKENDS + 1 needs to be a power of 2")

ForEachLWLockHeldByMe
void ForEachLWLockHeldByMe(void(*callback)(LWLock *, LWLockMode, void *), void *context)
Definition: lwlock.c:1968

GetLWLockIdentifier
const char * GetLWLockIdentifier(uint32 classId, uint16 eventId)
Definition: lwlock.c:779

LWLockDequeueSelf
static void LWLockDequeueSelf(LWLock *lock)
Definition: lwlock.c:1091

LWLockShmemSize
Size LWLockShmemSize(void)
Definition: lwlock.c:432

LWLockAnyHeldByMe
bool LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
Definition: lwlock.c:2001

LW_SHARED_MASK
#define LW_SHARED_MASK
Definition: lwlock.c:105

LWLockConflictsWithVar
static bool LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval, bool *result)
Definition: lwlock.c:1535

InitLWLockAccess
void InitLWLockAccess(void)
Definition: lwlock.c:569

LW_WS_NOT_WAITING
@ LW_WS_NOT_WAITING
Definition: lwlock.h:30

LW_WS_WAITING
@ LW_WS_WAITING
Definition: lwlock.h:31

LW_WS_PENDING_WAKEUP
@ LW_WS_PENDING_WAKEUP
Definition: lwlock.h:32

LWLOCK_PADDED_SIZE
#define LWLOCK_PADDED_SIZE
Definition: lwlock.h:62

BUFFER_MAPPING_LWLOCK_OFFSET
#define BUFFER_MAPPING_LWLOCK_OFFSET
Definition: lwlock.h:104

NUM_LOCK_PARTITIONS
#define NUM_LOCK_PARTITIONS
Definition: lwlock.h:97

LWTRANCHE_FIRST_USER_DEFINED
@ LWTRANCHE_FIRST_USER_DEFINED
Definition: lwlock.h:224

LWTRANCHE_SHARED_TIDBITMAP
@ LWTRANCHE_SHARED_TIDBITMAP
Definition: lwlock.h:205

LWTRANCHE_SERIAL_SLRU
@ LWTRANCHE_SERIAL_SLRU
Definition: lwlock.h:219

LWTRANCHE_PER_SESSION_DSA
@ LWTRANCHE_PER_SESSION_DSA
Definition: lwlock.h:201

LWTRANCHE_PARALLEL_QUERY_DSA
@ LWTRANCHE_PARALLEL_QUERY_DSA
Definition: lwlock.h:200

LWTRANCHE_COMMITTS_BUFFER
@ LWTRANCHE_COMMITTS_BUFFER
Definition: lwlock.h:184

LWTRANCHE_PARALLEL_VACUUM_DSA
@ LWTRANCHE_PARALLEL_VACUUM_DSA
Definition: lwlock.h:222

LWTRANCHE_AIO_URING_COMPLETION
@ LWTRANCHE_AIO_URING_COMPLETION
Definition: lwlock.h:223

LWTRANCHE_PGSTATS_HASH
@ LWTRANCHE_PGSTATS_HASH
Definition: lwlock.h:209

LWTRANCHE_PARALLEL_BTREE_SCAN
@ LWTRANCHE_PARALLEL_BTREE_SCAN
Definition: lwlock.h:199

LWTRANCHE_SUBTRANS_BUFFER
@ LWTRANCHE_SUBTRANS_BUFFER
Definition: lwlock.h:185

LWTRANCHE_PER_SESSION_RECORD_TYPMOD
@ LWTRANCHE_PER_SESSION_RECORD_TYPMOD
Definition: lwlock.h:203

LWTRANCHE_LAUNCHER_HASH
@ LWTRANCHE_LAUNCHER_HASH
Definition: lwlock.h:212

LWTRANCHE_DSM_REGISTRY_DSA
@ LWTRANCHE_DSM_REGISTRY_DSA
Definition: lwlock.h:213

LWTRANCHE_XACT_BUFFER
@ LWTRANCHE_XACT_BUFFER
Definition: lwlock.h:183

LWTRANCHE_DSM_REGISTRY_HASH
@ LWTRANCHE_DSM_REGISTRY_HASH
Definition: lwlock.h:214

LWTRANCHE_NOTIFY_SLRU
@ LWTRANCHE_NOTIFY_SLRU
Definition: lwlock.h:218

LWTRANCHE_REPLICATION_ORIGIN_STATE
@ LWTRANCHE_REPLICATION_ORIGIN_STATE
Definition: lwlock.h:192

LWTRANCHE_MULTIXACTOFFSET_SLRU
@ LWTRANCHE_MULTIXACTOFFSET_SLRU
Definition: lwlock.h:217

LWTRANCHE_PARALLEL_APPEND
@ LWTRANCHE_PARALLEL_APPEND
Definition: lwlock.h:206

LWTRANCHE_REPLICATION_SLOT_IO
@ LWTRANCHE_REPLICATION_SLOT_IO
Definition: lwlock.h:193

LWTRANCHE_SUBTRANS_SLRU
@ LWTRANCHE_SUBTRANS_SLRU
Definition: lwlock.h:220

LWTRANCHE_MULTIXACTMEMBER_SLRU
@ LWTRANCHE_MULTIXACTMEMBER_SLRU
Definition: lwlock.h:216

LWTRANCHE_BUFFER_CONTENT
@ LWTRANCHE_BUFFER_CONTENT
Definition: lwlock.h:191

LWTRANCHE_MULTIXACTMEMBER_BUFFER
@ LWTRANCHE_MULTIXACTMEMBER_BUFFER
Definition: lwlock.h:187

LWTRANCHE_NOTIFY_BUFFER
@ LWTRANCHE_NOTIFY_BUFFER
Definition: lwlock.h:188

LWTRANCHE_PER_SESSION_RECORD_TYPE
@ LWTRANCHE_PER_SESSION_RECORD_TYPE
Definition: lwlock.h:202

LWTRANCHE_PREDICATE_LOCK_MANAGER
@ LWTRANCHE_PREDICATE_LOCK_MANAGER
Definition: lwlock.h:197

LWTRANCHE_BUFFER_MAPPING
@ LWTRANCHE_BUFFER_MAPPING
Definition: lwlock.h:195

LWTRANCHE_SERIAL_BUFFER
@ LWTRANCHE_SERIAL_BUFFER
Definition: lwlock.h:189

LWTRANCHE_LAUNCHER_DSA
@ LWTRANCHE_LAUNCHER_DSA
Definition: lwlock.h:211

LWTRANCHE_PGSTATS_DSA
@ LWTRANCHE_PGSTATS_DSA
Definition: lwlock.h:208

LWTRANCHE_PARALLEL_HASH_JOIN
@ LWTRANCHE_PARALLEL_HASH_JOIN
Definition: lwlock.h:198

LWTRANCHE_COMMITTS_SLRU
@ LWTRANCHE_COMMITTS_SLRU
Definition: lwlock.h:215

LWTRANCHE_PGSTATS_DATA
@ LWTRANCHE_PGSTATS_DATA
Definition: lwlock.h:210

LWTRANCHE_PER_XACT_PREDICATE_LIST
@ LWTRANCHE_PER_XACT_PREDICATE_LIST
Definition: lwlock.h:207

LWTRANCHE_XACT_SLRU
@ LWTRANCHE_XACT_SLRU
Definition: lwlock.h:221

LWTRANCHE_MULTIXACTOFFSET_BUFFER
@ LWTRANCHE_MULTIXACTOFFSET_BUFFER
Definition: lwlock.h:186

LWTRANCHE_WAL_INSERT
@ LWTRANCHE_WAL_INSERT
Definition: lwlock.h:190

LWTRANCHE_LOCK_MANAGER
@ LWTRANCHE_LOCK_MANAGER
Definition: lwlock.h:196

LWTRANCHE_SHARED_TUPLESTORE
@ LWTRANCHE_SHARED_TUPLESTORE
Definition: lwlock.h:204

LWTRANCHE_LOCK_FASTPATH
@ LWTRANCHE_LOCK_FASTPATH
Definition: lwlock.h:194

LOCK_MANAGER_LWLOCK_OFFSET
#define LOCK_MANAGER_LWLOCK_OFFSET
Definition: lwlock.h:105

NUM_BUFFER_PARTITIONS
#define NUM_BUFFER_PARTITIONS
Definition: lwlock.h:93

PREDICATELOCK_MANAGER_LWLOCK_OFFSET
#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET
Definition: lwlock.h:107

NUM_FIXED_LWLOCKS
#define NUM_FIXED_LWLOCKS
Definition: lwlock.h:109

LWLockMode
LWLockMode
Definition: lwlock.h:113

LW_SHARED
@ LW_SHARED
Definition: lwlock.h:115

LW_WAIT_UNTIL_FREE
@ LW_WAIT_UNTIL_FREE
Definition: lwlock.h:116

LW_EXCLUSIVE
@ LW_EXCLUSIVE
Definition: lwlock.h:114

NUM_PREDICATELOCK_PARTITIONS
#define NUM_PREDICATELOCK_PARTITIONS
Definition: lwlock.h:101

lwlocklist.h

MemoryContextAlloc
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1185

MemoryContextAllocZero
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1219

repalloc
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1548

TopMemoryContext
MemoryContext TopMemoryContext
Definition: mcxt.c:149

MemoryContextDelete
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454

MemoryContextAllowInCriticalSection
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:694

memutils.h

AllocSetContextCreate
#define AllocSetContextCreate
Definition: memutils.h:129

ALLOCSET_DEFAULT_SIZES
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160

miscadmin.h

RESUME_INTERRUPTS
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:135

HOLD_INTERRUPTS
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:133

process_shmem_requests_in_progress
bool process_shmem_requests_in_progress
Definition: miscinit.c:1841

sort-test.key
key
Definition: sort-test.py:19

repalloc0_array
#define repalloc0_array(pointer, type, oldcount, count)
Definition: palloc.h:109

arg
void * arg
Definition: pg_backup_utils.c:29

pg_bitutils.h

pg_nextpower2_32
static uint32 pg_nextpower2_32(uint32 num)
Definition: pg_bitutils.h:189

mode
static PgChecksumMode mode
Definition: pg_checksums.c:55

NAMEDATALEN
#define NAMEDATALEN
Definition: pg_config_manual.h:29

pg_trace.h

pgstat.h

strlcpy
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45

PGSemaphoreUnlock
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: posix_sema.c:339

PGSemaphoreLock
void PGSemaphoreLock(PGSemaphore sema)
Definition: posix_sema.c:319

postgres.h

Datum
uintptr_t Datum
Definition: postgres.h:69

proc.h

GetPGProcByNumber
#define GetPGProcByNumber(n)
Definition: proc.h:432

proclist.h

proclist_delete
#define proclist_delete(list, procno, link_member)
Definition: proclist.h:187

proclist_init
static void proclist_init(proclist_head *list)
Definition: proclist.h:29

proclist_push_tail
#define proclist_push_tail(list, procno, link_member)
Definition: proclist.h:191

proclist_push_head
#define proclist_push_head(list, procno, link_member)
Definition: proclist.h:189

proclist_foreach_modify
#define proclist_foreach_modify(iter, lhead, link_member)
Definition: proclist.h:206

proclist_is_empty
static bool proclist_is_empty(const proclist_head *list)
Definition: proclist.h:38

procnumber.h

MAX_BACKENDS
#define MAX_BACKENDS
Definition: procnumber.h:39

ctl
tree ctl
Definition: radixtree.h:1838

perform_spin_delay
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:126

finish_spin_delay
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:186

init_local_spin_delay
#define init_local_spin_delay(status)
Definition: s_lock.h:751

add_size
Size add_size(Size s1, Size s2)
Definition: shmem.c:493

mul_size
Size mul_size(Size s1, Size s2)
Definition: shmem.c:510

ShmemAlloc
void * ShmemAlloc(Size size)
Definition: shmem.c:152

ShmemLock
slock_t * ShmemLock
Definition: shmem.c:88

spin.h

SpinLockRelease
#define SpinLockRelease(lock)
Definition: spin.h:61

SpinLockAcquire
#define SpinLockAcquire(lock)
Definition: spin.h:59

MyProc
PGPROC * MyProc
Definition: proc.c:66

HASHCTL
Definition: hsearch.h:66

HASH_SEQ_STATUS
Definition: hsearch.h:121

HTAB
Definition: dynahash.c:220

LWLockHandle
Definition: lwlock.c:212

LWLockHandle::mode
LWLockMode mode
Definition: lwlock.c:214

LWLockHandle::lock
LWLock * lock
Definition: lwlock.c:213

LWLock
Definition: lwlock.h:42

LWLock::state
pg_atomic_uint32 state
Definition: lwlock.h:44

LWLock::tranche
uint16 tranche
Definition: lwlock.h:43

LWLock::waiters
proclist_head waiters
Definition: lwlock.h:45

MemoryContextData
Definition: memnodes.h:118

NamedLWLockTrancheRequest
Definition: lwlock.c:222

NamedLWLockTrancheRequest::tranche_name
char tranche_name[NAMEDATALEN]
Definition: lwlock.c:223

NamedLWLockTrancheRequest::num_lwlocks
int num_lwlocks
Definition: lwlock.c:224

NamedLWLockTranche
Definition: lwlock.h:78

NamedLWLockTranche::trancheId
int trancheId
Definition: lwlock.h:79

NamedLWLockTranche::trancheName
char * trancheName
Definition: lwlock.h:80

PGPROC
Definition: proc.h:171

PGPROC::lwWaitMode
uint8 lwWaitMode
Definition: proc.h:233

PGPROC::sem
PGSemaphore sem
Definition: proc.h:175

PGPROC::lwWaiting
uint8 lwWaiting
Definition: proc.h:232

SpinDelayStatus
Definition: s_lock.h:730

SpinDelayStatus::delays
int delays
Definition: s_lock.h:732

pg_atomic_uint64
Definition: fallback.h:27

proclist_head
Definition: proclist_types.h:39

proclist_mutable_iter
Definition: proclist_types.h:48

proclist_mutable_iter::cur
ProcNumber cur
Definition: proclist_types.h:49

state
Definition: regguts.h:323

callback
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46

LWLockPadded
Definition: lwlock.h:69

LWLockPadded::lock
LWLock lock
Definition: lwlock.h:70

PG_WAIT_LWLOCK
#define PG_WAIT_LWLOCK
Definition: wait_classes.h:18

pgstat_report_wait_start
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:69

pgstat_report_wait_end
static void pgstat_report_wait_end(void)
Definition: wait_event.h:85

name
const char * name
Definition: wait_event_funcs.c:28

wakeup
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:130