PostgreSQL Source Code  git master
lwlock.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * lwlock.c
4  * Lightweight lock manager
5  *
6  * Lightweight locks are intended primarily to provide mutual exclusion of
7  * access to shared-memory data structures. Therefore, they offer both
8  * exclusive and shared lock modes (to support read/write and read-only
9  * access to a shared object). There are few other frammishes. User-level
10  * locking should be done with the full lock manager --- which depends on
11  * LWLocks to protect its shared state.
12  *
13  * In addition to exclusive and shared modes, lightweight locks can be used to
14  * wait until a variable changes value. The variable is initially not set
15  * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16  * value it was set to when the lock was released last, and can be updated
17  * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18  * waits for the variable to be updated, or until the lock is free. When
19  * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20  * appropriate value for a free lock. The meaning of the variable is up to
21  * the caller, the lightweight lock code just assigns and compares it.
22  *
23  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
24  * Portions Copyright (c) 1994, Regents of the University of California
25  *
26  * IDENTIFICATION
27  * src/backend/storage/lmgr/lwlock.c
28  *
29  * NOTES:
30  *
31  * This used to be a pretty straight forward reader-writer lock
32  * implementation, in which the internal state was protected by a
33  * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34  * too high for workloads/locks that were taken in shared mode very
35  * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36  * while trying to acquire a shared lock that was actually free.
37  *
38  * Thus a new implementation was devised that provides wait-free shared lock
39  * acquisition for locks that aren't exclusively locked.
40  *
41  * The basic idea is to have a single atomic variable 'lockcount' instead of
42  * the formerly separate shared and exclusive counters and to use atomic
43  * operations to acquire the lock. That's fairly easy to do for plain
44  * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45  * in the OS.
46  *
47  * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48  * variable. For exclusive lock we swap in a sentinel value
49  * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50  *
51  * To release the lock we use an atomic decrement to release the lock. If the
52  * new value is zero (we get that atomically), we know we can/have to release
53  * waiters.
54  *
55  * Obviously it is important that the sentinel value for exclusive locks
56  * doesn't conflict with the maximum number of possible share lockers -
57  * luckily MAX_BACKENDS makes that easily possible.
58  *
59  *
60  * The attentive reader might have noticed that naively doing the above has a
61  * glaring race condition: We try to lock using the atomic operations and
62  * notice that we have to wait. Unfortunately by the time we have finished
63  * queuing, the former locker very well might have already finished it's
64  * work. That's problematic because we're now stuck waiting inside the OS.
65 
66  * To mitigate those races we use a two phased attempt at locking:
67  * Phase 1: Try to do it atomically, if we succeed, nice
68  * Phase 2: Add ourselves to the waitqueue of the lock
69  * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70  * the queue
71  * Phase 4: Sleep till wake-up, goto Phase 1
72  *
73  * This protects us against the problem from above as nobody can release too
74  * quick, before we're queued, since after Phase 2 we're already queued.
75  * -------------------------------------------------------------------------
76  */
77 #include "postgres.h"
78 
79 #include "miscadmin.h"
80 #include "pg_trace.h"
81 #include "pgstat.h"
82 #include "postmaster/postmaster.h"
83 #include "replication/slot.h"
84 #include "storage/ipc.h"
85 #include "storage/predicate.h"
86 #include "storage/proc.h"
87 #include "storage/proclist.h"
88 #include "storage/spin.h"
89 #include "utils/memutils.h"
90 
91 #ifdef LWLOCK_STATS
92 #include "utils/hsearch.h"
93 #endif
94 
95 
96 /* We use the ShmemLock spinlock to protect LWLockCounter */
97 extern slock_t *ShmemLock;
98 
99 #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30)
100 #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29)
101 #define LW_FLAG_LOCKED ((uint32) 1 << 28)
102 
103 #define LW_VAL_EXCLUSIVE ((uint32) 1 << 24)
104 #define LW_VAL_SHARED 1
105 
106 #define LW_LOCK_MASK ((uint32) ((1 << 25)-1))
107 /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
108 #define LW_SHARED_MASK ((uint32) ((1 << 24)-1))
109 
110 /*
111  * This is indexed by tranche ID and stores the names of all tranches known
112  * to the current backend.
113  */
114 static const char **LWLockTrancheArray = NULL;
115 static int LWLockTranchesAllocated = 0;
116 
117 #define T_NAME(lock) \
118  (LWLockTrancheArray[(lock)->tranche])
119 
120 /*
121  * This points to the main array of LWLocks in shared memory. Backends inherit
122  * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
123  * where we have special measures to pass it down).
124  */
126 
127 /*
128  * We use this structure to keep track of locked LWLocks for release
129  * during error recovery. Normally, only a few will be held at once, but
130  * occasionally the number can be much higher; for example, the pg_buffercache
131  * extension locks all buffer partitions simultaneously.
132  */
133 #define MAX_SIMUL_LWLOCKS 200
134 
135 /* struct representing the LWLocks we're holding */
136 typedef struct LWLockHandle
137 {
140 } LWLockHandle;
141 
142 static int num_held_lwlocks = 0;
144 
145 /* struct representing the LWLock tranche request for named tranche */
147 {
148  char tranche_name[NAMEDATALEN];
151 
155 
157 
158 static bool lock_named_request_allowed = true;
159 
160 static void InitializeLWLocks(void);
161 static void RegisterLWLockTranches(void);
162 
163 static inline void LWLockReportWaitStart(LWLock *lock);
164 static inline void LWLockReportWaitEnd(void);
165 
166 #ifdef LWLOCK_STATS
167 typedef struct lwlock_stats_key
168 {
169  int tranche;
170  void *instance;
171 } lwlock_stats_key;
172 
173 typedef struct lwlock_stats
174 {
175  lwlock_stats_key key;
176  int sh_acquire_count;
177  int ex_acquire_count;
178  int block_count;
179  int dequeue_self_count;
180  int spin_delay_count;
181 } lwlock_stats;
182 
183 static HTAB *lwlock_stats_htab;
184 static lwlock_stats lwlock_stats_dummy;
185 #endif
186 
187 #ifdef LOCK_DEBUG
188 bool Trace_lwlocks = false;
189 
190 inline static void
191 PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
192 {
193  /* hide statement & context here, otherwise the log is just too verbose */
194  if (Trace_lwlocks)
195  {
197 
198  ereport(LOG,
199  (errhidestmt(true),
200  errhidecontext(true),
201  errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
202  MyProcPid,
203  where, T_NAME(lock), lock,
204  (state & LW_VAL_EXCLUSIVE) != 0,
205  state & LW_SHARED_MASK,
206  (state & LW_FLAG_HAS_WAITERS) != 0,
207  pg_atomic_read_u32(&lock->nwaiters),
208  (state & LW_FLAG_RELEASE_OK) != 0)));
209  }
210 }
211 
212 inline static void
213 LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
214 {
215  /* hide statement & context here, otherwise the log is just too verbose */
216  if (Trace_lwlocks)
217  {
218  ereport(LOG,
219  (errhidestmt(true),
220  errhidecontext(true),
221  errmsg_internal("%s(%s %p): %s", where,
222  T_NAME(lock), lock, msg)));
223  }
224 }
225 
226 #else /* not LOCK_DEBUG */
227 #define PRINT_LWDEBUG(a,b,c) ((void)0)
228 #define LOG_LWDEBUG(a,b,c) ((void)0)
229 #endif /* LOCK_DEBUG */
230 
231 #ifdef LWLOCK_STATS
232 
233 static void init_lwlock_stats(void);
234 static void print_lwlock_stats(int code, Datum arg);
235 static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
236 
237 static void
238 init_lwlock_stats(void)
239 {
240  HASHCTL ctl;
241  static MemoryContext lwlock_stats_cxt = NULL;
242  static bool exit_registered = false;
243 
244  if (lwlock_stats_cxt != NULL)
245  MemoryContextDelete(lwlock_stats_cxt);
246 
247  /*
248  * The LWLock stats will be updated within a critical section, which
249  * requires allocating new hash entries. Allocations within a critical
250  * section are normally not allowed because running out of memory would
251  * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
252  * turned on in production, so that's an acceptable risk. The hash entries
253  * are small, so the risk of running out of memory is minimal in practice.
254  */
255  lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
256  "LWLock stats",
258  MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
259 
260  MemSet(&ctl, 0, sizeof(ctl));
261  ctl.keysize = sizeof(lwlock_stats_key);
262  ctl.entrysize = sizeof(lwlock_stats);
263  ctl.hcxt = lwlock_stats_cxt;
264  lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
266  if (!exit_registered)
267  {
268  on_shmem_exit(print_lwlock_stats, 0);
269  exit_registered = true;
270  }
271 }
272 
273 static void
274 print_lwlock_stats(int code, Datum arg)
275 {
276  HASH_SEQ_STATUS scan;
277  lwlock_stats *lwstats;
278 
279  hash_seq_init(&scan, lwlock_stats_htab);
280 
281  /* Grab an LWLock to keep different backends from mixing reports */
282  LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
283 
284  while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
285  {
286  fprintf(stderr,
287  "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
288  MyProcPid, LWLockTrancheArray[lwstats->key.tranche],
289  lwstats->key.instance, lwstats->sh_acquire_count,
290  lwstats->ex_acquire_count, lwstats->block_count,
291  lwstats->spin_delay_count, lwstats->dequeue_self_count);
292  }
293 
294  LWLockRelease(&MainLWLockArray[0].lock);
295 }
296 
297 static lwlock_stats *
298 get_lwlock_stats_entry(LWLock *lock)
299 {
300  lwlock_stats_key key;
301  lwlock_stats *lwstats;
302  bool found;
303 
304  /*
305  * During shared memory initialization, the hash table doesn't exist yet.
306  * Stats of that phase aren't very interesting, so just collect operations
307  * on all locks in a single dummy entry.
308  */
309  if (lwlock_stats_htab == NULL)
310  return &lwlock_stats_dummy;
311 
312  /* Fetch or create the entry. */
313  MemSet(&key, 0, sizeof(key));
314  key.tranche = lock->tranche;
315  key.instance = lock;
316  lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
317  if (!found)
318  {
319  lwstats->sh_acquire_count = 0;
320  lwstats->ex_acquire_count = 0;
321  lwstats->block_count = 0;
322  lwstats->dequeue_self_count = 0;
323  lwstats->spin_delay_count = 0;
324  }
325  return lwstats;
326 }
327 #endif /* LWLOCK_STATS */
328 
329 
330 /*
331  * Compute number of LWLocks required by named tranches. These will be
332  * allocated in the main array.
333  */
334 static int
336 {
337  int numLocks = 0;
338  int i;
339 
340  for (i = 0; i < NamedLWLockTrancheRequests; i++)
341  numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
342 
343  return numLocks;
344 }
345 
346 /*
347  * Compute shmem space needed for LWLocks and named tranches.
348  */
349 Size
351 {
352  Size size;
353  int i;
354  int numLocks = NUM_FIXED_LWLOCKS;
355 
356  numLocks += NumLWLocksByNamedTranches();
357 
358  /* Space for the LWLock array. */
359  size = mul_size(numLocks, sizeof(LWLockPadded));
360 
361  /* Space for dynamic allocation counter, plus room for alignment. */
362  size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
363 
364  /* space for named tranches. */
366 
367  /* space for name of each tranche. */
368  for (i = 0; i < NamedLWLockTrancheRequests; i++)
369  size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
370 
371  /* Disallow named LWLocks' requests after startup */
373 
374  return size;
375 }
376 
377 /*
378  * Allocate shmem space for the main LWLock array and all tranches and
379  * initialize it. We also register all the LWLock tranches here.
380  */
381 void
383 {
385  "MAX_BACKENDS too big for lwlock.c");
386 
388  sizeof(LWLock) <= LWLOCK_PADDED_SIZE,
389  "Miscalculated LWLock padding");
390 
391  if (!IsUnderPostmaster)
392  {
393  Size spaceLocks = LWLockShmemSize();
394  int *LWLockCounter;
395  char *ptr;
396 
397  /* Allocate space */
398  ptr = (char *) ShmemAlloc(spaceLocks);
399 
400  /* Leave room for dynamic allocation of tranches */
401  ptr += sizeof(int);
402 
403  /* Ensure desired alignment of LWLock array */
404  ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
405 
406  MainLWLockArray = (LWLockPadded *) ptr;
407 
408  /*
409  * Initialize the dynamic-allocation counter for tranches, which is
410  * stored just before the first LWLock.
411  */
412  LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
413  *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
414 
415  /* Initialize all LWLocks */
417  }
418 
419  /* Register all LWLock tranches */
421 }
422 
423 /*
424  * Initialize LWLocks that are fixed and those belonging to named tranches.
425  */
426 static void
428 {
429  int numNamedLocks = NumLWLocksByNamedTranches();
430  int id;
431  int i;
432  int j;
434 
435  /* Initialize all individual LWLocks in main array */
436  for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
437  LWLockInitialize(&lock->lock, id);
438 
439  /* Initialize buffer mapping LWLocks in main array */
440  lock = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS;
441  for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
443 
444  /* Initialize lmgrs' LWLocks in main array */
445  lock = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS + NUM_BUFFER_PARTITIONS;
446  for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
448 
449  /* Initialize predicate lmgrs' LWLocks in main array */
450  lock = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS +
451  NUM_BUFFER_PARTITIONS + NUM_LOCK_PARTITIONS;
452  for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
454 
455  /* Initialize named tranches. */
457  {
458  char *trancheNames;
459 
460  NamedLWLockTrancheArray = (NamedLWLockTranche *)
461  &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
462 
463  trancheNames = (char *) NamedLWLockTrancheArray +
465  lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
466 
467  for (i = 0; i < NamedLWLockTrancheRequests; i++)
468  {
469  NamedLWLockTrancheRequest *request;
470  NamedLWLockTranche *tranche;
471  char *name;
472 
473  request = &NamedLWLockTrancheRequestArray[i];
474  tranche = &NamedLWLockTrancheArray[i];
475 
476  name = trancheNames;
477  trancheNames += strlen(request->tranche_name) + 1;
478  strcpy(name, request->tranche_name);
479  tranche->trancheId = LWLockNewTrancheId();
480  tranche->trancheName = name;
481 
482  for (j = 0; j < request->num_lwlocks; j++, lock++)
483  LWLockInitialize(&lock->lock, tranche->trancheId);
484  }
485  }
486 }
487 
488 /*
489  * Register named tranches and tranches for fixed LWLocks.
490  */
491 static void
493 {
494  int i;
495 
496  if (LWLockTrancheArray == NULL)
497  {
499  LWLockTrancheArray = (const char **)
501  LWLockTranchesAllocated * sizeof(char *));
503  }
504 
505  for (i = 0; i < NUM_INDIVIDUAL_LWLOCKS; ++i)
507 
511  "predicate_lock_manager");
513  "parallel_query_dsa");
515  "session_dsa");
517  "session_record_table");
519  "session_typmod_table");
521  "shared_tuplestore");
525  LWLockRegisterTranche(LWTRANCHE_SXACT, "serializable_xact");
526 
527  /* Register named tranches. */
528  for (i = 0; i < NamedLWLockTrancheRequests; i++)
529  LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
530  NamedLWLockTrancheArray[i].trancheName);
531 }
532 
533 /*
534  * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
535  */
536 void
538 {
539 #ifdef LWLOCK_STATS
540  init_lwlock_stats();
541 #endif
542 }
543 
544 /*
545  * GetNamedLWLockTranche - returns the base address of LWLock from the
546  * specified tranche.
547  *
548  * Caller needs to retrieve the requested number of LWLocks starting from
549  * the base lock address returned by this API. This can be used for
550  * tranches that are requested by using RequestNamedLWLockTranche() API.
551  */
552 LWLockPadded *
553 GetNamedLWLockTranche(const char *tranche_name)
554 {
555  int lock_pos;
556  int i;
557 
558  /*
559  * Obtain the position of base address of LWLock belonging to requested
560  * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
561  * in MainLWLockArray after fixed locks.
562  */
563  lock_pos = NUM_FIXED_LWLOCKS;
564  for (i = 0; i < NamedLWLockTrancheRequests; i++)
565  {
566  if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
567  tranche_name) == 0)
568  return &MainLWLockArray[lock_pos];
569 
570  lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
571  }
572 
573  if (i >= NamedLWLockTrancheRequests)
574  elog(ERROR, "requested tranche is not registered");
575 
576  /* just to keep compiler quiet */
577  return NULL;
578 }
579 
580 /*
581  * Allocate a new tranche ID.
582  */
583 int
585 {
586  int result;
587  int *LWLockCounter;
588 
589  LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
591  result = (*LWLockCounter)++;
593 
594  return result;
595 }
596 
597 /*
598  * Register a tranche ID in the lookup table for the current process. This
599  * routine will save a pointer to the tranche name passed as an argument,
600  * so the name should be allocated in a backend-lifetime context
601  * (TopMemoryContext, static variable, or similar).
602  */
603 void
604 LWLockRegisterTranche(int tranche_id, const char *tranche_name)
605 {
606  Assert(LWLockTrancheArray != NULL);
607 
608  if (tranche_id >= LWLockTranchesAllocated)
609  {
611  int j = LWLockTranchesAllocated;
612 
613  while (i <= tranche_id)
614  i *= 2;
615 
616  LWLockTrancheArray = (const char **)
617  repalloc(LWLockTrancheArray, i * sizeof(char *));
619  while (j < LWLockTranchesAllocated)
620  LWLockTrancheArray[j++] = NULL;
621  }
622 
623  LWLockTrancheArray[tranche_id] = tranche_name;
624 }
625 
626 /*
627  * RequestNamedLWLockTranche
628  * Request that extra LWLocks be allocated during postmaster
629  * startup.
630  *
631  * This is only useful for extensions if called from the _PG_init hook
632  * of a library that is loaded into the postmaster via
633  * shared_preload_libraries. Once shared memory has been allocated, calls
634  * will be ignored. (We could raise an error, but it seems better to make
635  * it a no-op, so that libraries containing such calls can be reloaded if
636  * needed.)
637  */
638 void
639 RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
640 {
641  NamedLWLockTrancheRequest *request;
642 
644  return; /* too late */
645 
646  if (NamedLWLockTrancheRequestArray == NULL)
647  {
649  NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
652  * sizeof(NamedLWLockTrancheRequest));
653  }
654 
656  {
658 
659  while (i <= NamedLWLockTrancheRequests)
660  i *= 2;
661 
662  NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
663  repalloc(NamedLWLockTrancheRequestArray,
664  i * sizeof(NamedLWLockTrancheRequest));
666  }
667 
668  request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
669  Assert(strlen(tranche_name) + 1 < NAMEDATALEN);
670  StrNCpy(request->tranche_name, tranche_name, NAMEDATALEN);
671  request->num_lwlocks = num_lwlocks;
673 }
674 
675 /*
676  * LWLockInitialize - initialize a new lwlock; it's initially unlocked
677  */
678 void
679 LWLockInitialize(LWLock *lock, int tranche_id)
680 {
682 #ifdef LOCK_DEBUG
683  pg_atomic_init_u32(&lock->nwaiters, 0);
684 #endif
685  lock->tranche = tranche_id;
686  proclist_init(&lock->waiters);
687 }
688 
689 /*
690  * Report start of wait event for light-weight locks.
691  *
692  * This function will be used by all the light-weight lock calls which
693  * needs to wait to acquire the lock. This function distinguishes wait
694  * event based on tranche and lock id.
695  */
696 static inline void
698 {
700 }
701 
702 /*
703  * Report end of wait event for light-weight locks.
704  */
705 static inline void
707 {
709 }
710 
711 /*
712  * Return an identifier for an LWLock based on the wait class and event.
713  */
714 const char *
716 {
717  Assert(classId == PG_WAIT_LWLOCK);
718 
719  /*
720  * It is quite possible that user has registered tranche in one of the
721  * backends (e.g. by allocating lwlocks in dynamic shared memory) but not
722  * all of them, so we can't assume the tranche is registered here.
723  */
724  if (eventId >= LWLockTranchesAllocated ||
725  LWLockTrancheArray[eventId] == NULL)
726  return "extension";
727 
728  return LWLockTrancheArray[eventId];
729 }
730 
731 /*
732  * Internal function that tries to atomically acquire the lwlock in the passed
733  * in mode.
734  *
735  * This function will not block waiting for a lock to become free - that's the
736  * callers job.
737  *
738  * Returns true if the lock isn't free and we need to wait.
739  */
740 static bool
742 {
743  uint32 old_state;
744 
745  AssertArg(mode == LW_EXCLUSIVE || mode == LW_SHARED);
746 
747  /*
748  * Read once outside the loop, later iterations will get the newer value
749  * via compare & exchange.
750  */
751  old_state = pg_atomic_read_u32(&lock->state);
752 
753  /* loop until we've determined whether we could acquire the lock or not */
754  while (true)
755  {
756  uint32 desired_state;
757  bool lock_free;
758 
759  desired_state = old_state;
760 
761  if (mode == LW_EXCLUSIVE)
762  {
763  lock_free = (old_state & LW_LOCK_MASK) == 0;
764  if (lock_free)
765  desired_state += LW_VAL_EXCLUSIVE;
766  }
767  else
768  {
769  lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
770  if (lock_free)
771  desired_state += LW_VAL_SHARED;
772  }
773 
774  /*
775  * Attempt to swap in the state we are expecting. If we didn't see
776  * lock to be free, that's just the old value. If we saw it as free,
777  * we'll attempt to mark it acquired. The reason that we always swap
778  * in the value is that this doubles as a memory barrier. We could try
779  * to be smarter and only swap in values if we saw the lock as free,
780  * but benchmark haven't shown it as beneficial so far.
781  *
782  * Retry if the value changed since we last looked at it.
783  */
785  &old_state, desired_state))
786  {
787  if (lock_free)
788  {
789  /* Great! Got the lock. */
790 #ifdef LOCK_DEBUG
791  if (mode == LW_EXCLUSIVE)
792  lock->owner = MyProc;
793 #endif
794  return false;
795  }
796  else
797  return true; /* somebody else has the lock */
798  }
799  }
800  pg_unreachable();
801 }
802 
803 /*
804  * Lock the LWLock's wait list against concurrent activity.
805  *
806  * NB: even though the wait list is locked, non-conflicting lock operations
807  * may still happen concurrently.
808  *
809  * Time spent holding mutex should be short!
810  */
811 static void
813 {
814  uint32 old_state;
815 #ifdef LWLOCK_STATS
816  lwlock_stats *lwstats;
817  uint32 delays = 0;
818 
819  lwstats = get_lwlock_stats_entry(lock);
820 #endif
821 
822  while (true)
823  {
824  /* always try once to acquire lock directly */
825  old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
826  if (!(old_state & LW_FLAG_LOCKED))
827  break; /* got lock */
828 
829  /* and then spin without atomic operations until lock is released */
830  {
831  SpinDelayStatus delayStatus;
832 
833  init_local_spin_delay(&delayStatus);
834 
835  while (old_state & LW_FLAG_LOCKED)
836  {
837  perform_spin_delay(&delayStatus);
838  old_state = pg_atomic_read_u32(&lock->state);
839  }
840 #ifdef LWLOCK_STATS
841  delays += delayStatus.delays;
842 #endif
843  finish_spin_delay(&delayStatus);
844  }
845 
846  /*
847  * Retry. The lock might obviously already be re-acquired by the time
848  * we're attempting to get it again.
849  */
850  }
851 
852 #ifdef LWLOCK_STATS
853  lwstats->spin_delay_count += delays;
854 #endif
855 }
856 
857 /*
858  * Unlock the LWLock's wait list.
859  *
860  * Note that it can be more efficient to manipulate flags and release the
861  * locks in a single atomic operation.
862  */
863 static void
865 {
867 
868  old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
869 
870  Assert(old_state & LW_FLAG_LOCKED);
871 }
872 
873 /*
874  * Wakeup all the lockers that currently have a chance to acquire the lock.
875  */
876 static void
878 {
879  bool new_release_ok;
880  bool wokeup_somebody = false;
881  proclist_head wakeup;
883 
884  proclist_init(&wakeup);
885 
886  new_release_ok = true;
887 
888  /* lock wait list while collecting backends to wake up */
889  LWLockWaitListLock(lock);
890 
891  proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
892  {
893  PGPROC *waiter = GetPGProcByNumber(iter.cur);
894 
895  if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
896  continue;
897 
898  proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
899  proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
900 
901  if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
902  {
903  /*
904  * Prevent additional wakeups until retryer gets to run. Backends
905  * that are just waiting for the lock to become free don't retry
906  * automatically.
907  */
908  new_release_ok = false;
909 
910  /*
911  * Don't wakeup (further) exclusive locks.
912  */
913  wokeup_somebody = true;
914  }
915 
916  /*
917  * Once we've woken up an exclusive lock, there's no point in waking
918  * up anybody else.
919  */
920  if (waiter->lwWaitMode == LW_EXCLUSIVE)
921  break;
922  }
923 
925 
926  /* unset required flags, and release lock, in one fell swoop */
927  {
928  uint32 old_state;
929  uint32 desired_state;
930 
931  old_state = pg_atomic_read_u32(&lock->state);
932  while (true)
933  {
934  desired_state = old_state;
935 
936  /* compute desired flags */
937 
938  if (new_release_ok)
939  desired_state |= LW_FLAG_RELEASE_OK;
940  else
941  desired_state &= ~LW_FLAG_RELEASE_OK;
942 
943  if (proclist_is_empty(&wakeup))
944  desired_state &= ~LW_FLAG_HAS_WAITERS;
945 
946  desired_state &= ~LW_FLAG_LOCKED; /* release lock */
947 
948  if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
949  desired_state))
950  break;
951  }
952  }
953 
954  /* Awaken any waiters I removed from the queue. */
955  proclist_foreach_modify(iter, &wakeup, lwWaitLink)
956  {
957  PGPROC *waiter = GetPGProcByNumber(iter.cur);
958 
959  LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
960  proclist_delete(&wakeup, iter.cur, lwWaitLink);
961 
962  /*
963  * Guarantee that lwWaiting being unset only becomes visible once the
964  * unlink from the link has completed. Otherwise the target backend
965  * could be woken up for other reason and enqueue for a new lock - if
966  * that happens before the list unlink happens, the list would end up
967  * being corrupted.
968  *
969  * The barrier pairs with the LWLockWaitListLock() when enqueuing for
970  * another lock.
971  */
973  waiter->lwWaiting = false;
974  PGSemaphoreUnlock(waiter->sem);
975  }
976 }
977 
978 /*
979  * Add ourselves to the end of the queue.
980  *
981  * NB: Mode can be LW_WAIT_UNTIL_FREE here!
982  */
983 static void
985 {
986  /*
987  * If we don't have a PGPROC structure, there's no way to wait. This
988  * should never occur, since MyProc should only be null during shared
989  * memory initialization.
990  */
991  if (MyProc == NULL)
992  elog(PANIC, "cannot wait without a PGPROC structure");
993 
994  if (MyProc->lwWaiting)
995  elog(PANIC, "queueing for lock while waiting on another one");
996 
997  LWLockWaitListLock(lock);
998 
999  /* setting the flag is protected by the spinlock */
1001 
1002  MyProc->lwWaiting = true;
1003  MyProc->lwWaitMode = mode;
1004 
1005  /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1006  if (mode == LW_WAIT_UNTIL_FREE)
1007  proclist_push_head(&lock->waiters, MyProc->pgprocno, lwWaitLink);
1008  else
1009  proclist_push_tail(&lock->waiters, MyProc->pgprocno, lwWaitLink);
1010 
1011  /* Can release the mutex now */
1012  LWLockWaitListUnlock(lock);
1013 
1014 #ifdef LOCK_DEBUG
1015  pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1016 #endif
1017 
1018 }
1019 
1020 /*
1021  * Remove ourselves from the waitlist.
1022  *
1023  * This is used if we queued ourselves because we thought we needed to sleep
1024  * but, after further checking, we discovered that we don't actually need to
1025  * do so.
1026  */
1027 static void
1029 {
1030  bool found = false;
1031  proclist_mutable_iter iter;
1032 
1033 #ifdef LWLOCK_STATS
1034  lwlock_stats *lwstats;
1035 
1036  lwstats = get_lwlock_stats_entry(lock);
1037 
1038  lwstats->dequeue_self_count++;
1039 #endif
1040 
1041  LWLockWaitListLock(lock);
1042 
1043  /*
1044  * Can't just remove ourselves from the list, but we need to iterate over
1045  * all entries as somebody else could have dequeued us.
1046  */
1047  proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1048  {
1049  if (iter.cur == MyProc->pgprocno)
1050  {
1051  found = true;
1052  proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1053  break;
1054  }
1055  }
1056 
1057  if (proclist_is_empty(&lock->waiters) &&
1058  (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1059  {
1061  }
1062 
1063  /* XXX: combine with fetch_and above? */
1064  LWLockWaitListUnlock(lock);
1065 
1066  /* clear waiting state again, nice for debugging */
1067  if (found)
1068  MyProc->lwWaiting = false;
1069  else
1070  {
1071  int extraWaits = 0;
1072 
1073  /*
1074  * Somebody else dequeued us and has or will wake us up. Deal with the
1075  * superfluous absorption of a wakeup.
1076  */
1077 
1078  /*
1079  * Reset RELEASE_OK flag if somebody woke us before we removed
1080  * ourselves - they'll have set it to false.
1081  */
1083 
1084  /*
1085  * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1086  * get reset at some inconvenient point later. Most of the time this
1087  * will immediately return.
1088  */
1089  for (;;)
1090  {
1092  if (!MyProc->lwWaiting)
1093  break;
1094  extraWaits++;
1095  }
1096 
1097  /*
1098  * Fix the process wait semaphore's count for any absorbed wakeups.
1099  */
1100  while (extraWaits-- > 0)
1102  }
1103 
1104 #ifdef LOCK_DEBUG
1105  {
1106  /* not waiting anymore */
1107  uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1108 
1109  Assert(nwaiters < MAX_BACKENDS);
1110  }
1111 #endif
1112 }
1113 
1114 /*
1115  * LWLockAcquire - acquire a lightweight lock in the specified mode
1116  *
1117  * If the lock is not available, sleep until it is. Returns true if the lock
1118  * was available immediately, false if we had to sleep.
1119  *
1120  * Side effect: cancel/die interrupts are held off until lock release.
1121  */
1122 bool
1124 {
1125  PGPROC *proc = MyProc;
1126  bool result = true;
1127  int extraWaits = 0;
1128 #ifdef LWLOCK_STATS
1129  lwlock_stats *lwstats;
1130 
1131  lwstats = get_lwlock_stats_entry(lock);
1132 #endif
1133 
1134  AssertArg(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1135 
1136  PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1137 
1138 #ifdef LWLOCK_STATS
1139  /* Count lock acquisition attempts */
1140  if (mode == LW_EXCLUSIVE)
1141  lwstats->ex_acquire_count++;
1142  else
1143  lwstats->sh_acquire_count++;
1144 #endif /* LWLOCK_STATS */
1145 
1146  /*
1147  * We can't wait if we haven't got a PGPROC. This should only occur
1148  * during bootstrap or shared memory initialization. Put an Assert here
1149  * to catch unsafe coding practices.
1150  */
1151  Assert(!(proc == NULL && IsUnderPostmaster));
1152 
1153  /* Ensure we will have room to remember the lock */
1155  elog(ERROR, "too many LWLocks taken");
1156 
1157  /*
1158  * Lock out cancel/die interrupts until we exit the code section protected
1159  * by the LWLock. This ensures that interrupts will not interfere with
1160  * manipulations of data structures in shared memory.
1161  */
1162  HOLD_INTERRUPTS();
1163 
1164  /*
1165  * Loop here to try to acquire lock after each time we are signaled by
1166  * LWLockRelease.
1167  *
1168  * NOTE: it might seem better to have LWLockRelease actually grant us the
1169  * lock, rather than retrying and possibly having to go back to sleep. But
1170  * in practice that is no good because it means a process swap for every
1171  * lock acquisition when two or more processes are contending for the same
1172  * lock. Since LWLocks are normally used to protect not-very-long
1173  * sections of computation, a process needs to be able to acquire and
1174  * release the same lock many times during a single CPU time slice, even
1175  * in the presence of contention. The efficiency of being able to do that
1176  * outweighs the inefficiency of sometimes wasting a process dispatch
1177  * cycle because the lock is not free when a released waiter finally gets
1178  * to run. See pgsql-hackers archives for 29-Dec-01.
1179  */
1180  for (;;)
1181  {
1182  bool mustwait;
1183 
1184  /*
1185  * Try to grab the lock the first time, we're not in the waitqueue
1186  * yet/anymore.
1187  */
1188  mustwait = LWLockAttemptLock(lock, mode);
1189 
1190  if (!mustwait)
1191  {
1192  LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1193  break; /* got the lock */
1194  }
1195 
1196  /*
1197  * Ok, at this point we couldn't grab the lock on the first try. We
1198  * cannot simply queue ourselves to the end of the list and wait to be
1199  * woken up because by now the lock could long have been released.
1200  * Instead add us to the queue and try to grab the lock again. If we
1201  * succeed we need to revert the queuing and be happy, otherwise we
1202  * recheck the lock. If we still couldn't grab it, we know that the
1203  * other locker will see our queue entries when releasing since they
1204  * existed before we checked for the lock.
1205  */
1206 
1207  /* add to the queue */
1208  LWLockQueueSelf(lock, mode);
1209 
1210  /* we're now guaranteed to be woken up if necessary */
1211  mustwait = LWLockAttemptLock(lock, mode);
1212 
1213  /* ok, grabbed the lock the second time round, need to undo queueing */
1214  if (!mustwait)
1215  {
1216  LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1217 
1218  LWLockDequeueSelf(lock);
1219  break;
1220  }
1221 
1222  /*
1223  * Wait until awakened.
1224  *
1225  * Since we share the process wait semaphore with the regular lock
1226  * manager and ProcWaitForSignal, and we may need to acquire an LWLock
1227  * while one of those is pending, it is possible that we get awakened
1228  * for a reason other than being signaled by LWLockRelease. If so,
1229  * loop back and wait again. Once we've gotten the LWLock,
1230  * re-increment the sema by the number of additional signals received,
1231  * so that the lock manager or signal manager will see the received
1232  * signal when it next waits.
1233  */
1234  LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1235 
1236 #ifdef LWLOCK_STATS
1237  lwstats->block_count++;
1238 #endif
1239 
1240  LWLockReportWaitStart(lock);
1241  TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1242 
1243  for (;;)
1244  {
1245  PGSemaphoreLock(proc->sem);
1246  if (!proc->lwWaiting)
1247  break;
1248  extraWaits++;
1249  }
1250 
1251  /* Retrying, allow LWLockRelease to release waiters again. */
1253 
1254 #ifdef LOCK_DEBUG
1255  {
1256  /* not waiting anymore */
1257  uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1258 
1259  Assert(nwaiters < MAX_BACKENDS);
1260  }
1261 #endif
1262 
1263  TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1265 
1266  LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1267 
1268  /* Now loop back and try to acquire lock again. */
1269  result = false;
1270  }
1271 
1272  TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1273 
1274  /* Add lock to list of locks held by this backend */
1275  held_lwlocks[num_held_lwlocks].lock = lock;
1276  held_lwlocks[num_held_lwlocks++].mode = mode;
1277 
1278  /*
1279  * Fix the process wait semaphore's count for any absorbed wakeups.
1280  */
1281  while (extraWaits-- > 0)
1282  PGSemaphoreUnlock(proc->sem);
1283 
1284  return result;
1285 }
1286 
1287 /*
1288  * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1289  *
1290  * If the lock is not available, return false with no side-effects.
1291  *
1292  * If successful, cancel/die interrupts are held off until lock release.
1293  */
1294 bool
1296 {
1297  bool mustwait;
1298 
1299  AssertArg(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1300 
1301  PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1302 
1303  /* Ensure we will have room to remember the lock */
1305  elog(ERROR, "too many LWLocks taken");
1306 
1307  /*
1308  * Lock out cancel/die interrupts until we exit the code section protected
1309  * by the LWLock. This ensures that interrupts will not interfere with
1310  * manipulations of data structures in shared memory.
1311  */
1312  HOLD_INTERRUPTS();
1313 
1314  /* Check for the lock */
1315  mustwait = LWLockAttemptLock(lock, mode);
1316 
1317  if (mustwait)
1318  {
1319  /* Failed to get lock, so release interrupt holdoff */
1321 
1322  LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1323  TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1324  }
1325  else
1326  {
1327  /* Add lock to list of locks held by this backend */
1328  held_lwlocks[num_held_lwlocks].lock = lock;
1329  held_lwlocks[num_held_lwlocks++].mode = mode;
1330  TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1331  }
1332  return !mustwait;
1333 }
1334 
1335 /*
1336  * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1337  *
1338  * The semantics of this function are a bit funky. If the lock is currently
1339  * free, it is acquired in the given mode, and the function returns true. If
1340  * the lock isn't immediately free, the function waits until it is released
1341  * and returns false, but does not acquire the lock.
1342  *
1343  * This is currently used for WALWriteLock: when a backend flushes the WAL,
1344  * holding WALWriteLock, it can flush the commit records of many other
1345  * backends as a side-effect. Those other backends need to wait until the
1346  * flush finishes, but don't need to acquire the lock anymore. They can just
1347  * wake up, observe that their records have already been flushed, and return.
1348  */
1349 bool
1351 {
1352  PGPROC *proc = MyProc;
1353  bool mustwait;
1354  int extraWaits = 0;
1355 #ifdef LWLOCK_STATS
1356  lwlock_stats *lwstats;
1357 
1358  lwstats = get_lwlock_stats_entry(lock);
1359 #endif
1360 
1361  Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1362 
1363  PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1364 
1365  /* Ensure we will have room to remember the lock */
1367  elog(ERROR, "too many LWLocks taken");
1368 
1369  /*
1370  * Lock out cancel/die interrupts until we exit the code section protected
1371  * by the LWLock. This ensures that interrupts will not interfere with
1372  * manipulations of data structures in shared memory.
1373  */
1374  HOLD_INTERRUPTS();
1375 
1376  /*
1377  * NB: We're using nearly the same twice-in-a-row lock acquisition
1378  * protocol as LWLockAcquire(). Check its comments for details.
1379  */
1380  mustwait = LWLockAttemptLock(lock, mode);
1381 
1382  if (mustwait)
1383  {
1385 
1386  mustwait = LWLockAttemptLock(lock, mode);
1387 
1388  if (mustwait)
1389  {
1390  /*
1391  * Wait until awakened. Like in LWLockAcquire, be prepared for
1392  * bogus wakeups, because we share the semaphore with
1393  * ProcWaitForSignal.
1394  */
1395  LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1396 
1397 #ifdef LWLOCK_STATS
1398  lwstats->block_count++;
1399 #endif
1400 
1401  LWLockReportWaitStart(lock);
1402  TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1403 
1404  for (;;)
1405  {
1406  PGSemaphoreLock(proc->sem);
1407  if (!proc->lwWaiting)
1408  break;
1409  extraWaits++;
1410  }
1411 
1412 #ifdef LOCK_DEBUG
1413  {
1414  /* not waiting anymore */
1415  uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1416 
1417  Assert(nwaiters < MAX_BACKENDS);
1418  }
1419 #endif
1420  TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1422 
1423  LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1424  }
1425  else
1426  {
1427  LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1428 
1429  /*
1430  * Got lock in the second attempt, undo queueing. We need to treat
1431  * this as having successfully acquired the lock, otherwise we'd
1432  * not necessarily wake up people we've prevented from acquiring
1433  * the lock.
1434  */
1435  LWLockDequeueSelf(lock);
1436  }
1437  }
1438 
1439  /*
1440  * Fix the process wait semaphore's count for any absorbed wakeups.
1441  */
1442  while (extraWaits-- > 0)
1443  PGSemaphoreUnlock(proc->sem);
1444 
1445  if (mustwait)
1446  {
1447  /* Failed to get lock, so release interrupt holdoff */
1449  LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1450  TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1451  }
1452  else
1453  {
1454  LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1455  /* Add lock to list of locks held by this backend */
1456  held_lwlocks[num_held_lwlocks].lock = lock;
1457  held_lwlocks[num_held_lwlocks++].mode = mode;
1458  TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1459  }
1460 
1461  return !mustwait;
1462 }
1463 
1464 /*
1465  * Does the lwlock in its current state need to wait for the variable value to
1466  * change?
1467  *
1468  * If we don't need to wait, and it's because the value of the variable has
1469  * changed, store the current value in newval.
1470  *
1471  * *result is set to true if the lock was free, and false otherwise.
1472  */
1473 static bool
1475  uint64 *valptr, uint64 oldval, uint64 *newval,
1476  bool *result)
1477 {
1478  bool mustwait;
1479  uint64 value;
1480 
1481  /*
1482  * Test first to see if it the slot is free right now.
1483  *
1484  * XXX: the caller uses a spinlock before this, so we don't need a memory
1485  * barrier here as far as the current usage is concerned. But that might
1486  * not be safe in general.
1487  */
1488  mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1489 
1490  if (!mustwait)
1491  {
1492  *result = true;
1493  return false;
1494  }
1495 
1496  *result = false;
1497 
1498  /*
1499  * Read value using the lwlock's wait list lock, as we can't generally
1500  * rely on atomic 64 bit reads/stores. TODO: On platforms with a way to
1501  * do atomic 64 bit reads/writes the spinlock should be optimized away.
1502  */
1503  LWLockWaitListLock(lock);
1504  value = *valptr;
1505  LWLockWaitListUnlock(lock);
1506 
1507  if (value != oldval)
1508  {
1509  mustwait = false;
1510  *newval = value;
1511  }
1512  else
1513  {
1514  mustwait = true;
1515  }
1516 
1517  return mustwait;
1518 }
1519 
1520 /*
1521  * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1522  *
1523  * If the lock is held and *valptr equals oldval, waits until the lock is
1524  * either freed, or the lock holder updates *valptr by calling
1525  * LWLockUpdateVar. If the lock is free on exit (immediately or after
1526  * waiting), returns true. If the lock is still held, but *valptr no longer
1527  * matches oldval, returns false and sets *newval to the current value in
1528  * *valptr.
1529  *
1530  * Note: this function ignores shared lock holders; if the lock is held
1531  * in shared mode, returns 'true'.
1532  */
1533 bool
1534 LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
1535 {
1536  PGPROC *proc = MyProc;
1537  int extraWaits = 0;
1538  bool result = false;
1539 #ifdef LWLOCK_STATS
1540  lwlock_stats *lwstats;
1541 
1542  lwstats = get_lwlock_stats_entry(lock);
1543 #endif
1544 
1545  PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1546 
1547  /*
1548  * Lock out cancel/die interrupts while we sleep on the lock. There is no
1549  * cleanup mechanism to remove us from the wait queue if we got
1550  * interrupted.
1551  */
1552  HOLD_INTERRUPTS();
1553 
1554  /*
1555  * Loop here to check the lock's status after each time we are signaled.
1556  */
1557  for (;;)
1558  {
1559  bool mustwait;
1560 
1561  mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1562  &result);
1563 
1564  if (!mustwait)
1565  break; /* the lock was free or value didn't match */
1566 
1567  /*
1568  * Add myself to wait queue. Note that this is racy, somebody else
1569  * could wakeup before we're finished queuing. NB: We're using nearly
1570  * the same twice-in-a-row lock acquisition protocol as
1571  * LWLockAcquire(). Check its comments for details. The only
1572  * difference is that we also have to check the variable's values when
1573  * checking the state of the lock.
1574  */
1576 
1577  /*
1578  * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1579  * lock is released.
1580  */
1582 
1583  /*
1584  * We're now guaranteed to be woken up if necessary. Recheck the lock
1585  * and variables state.
1586  */
1587  mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1588  &result);
1589 
1590  /* Ok, no conflict after we queued ourselves. Undo queueing. */
1591  if (!mustwait)
1592  {
1593  LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1594 
1595  LWLockDequeueSelf(lock);
1596  break;
1597  }
1598 
1599  /*
1600  * Wait until awakened.
1601  *
1602  * Since we share the process wait semaphore with the regular lock
1603  * manager and ProcWaitForSignal, and we may need to acquire an LWLock
1604  * while one of those is pending, it is possible that we get awakened
1605  * for a reason other than being signaled by LWLockRelease. If so,
1606  * loop back and wait again. Once we've gotten the LWLock,
1607  * re-increment the sema by the number of additional signals received,
1608  * so that the lock manager or signal manager will see the received
1609  * signal when it next waits.
1610  */
1611  LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1612 
1613 #ifdef LWLOCK_STATS
1614  lwstats->block_count++;
1615 #endif
1616 
1617  LWLockReportWaitStart(lock);
1618  TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1619 
1620  for (;;)
1621  {
1622  PGSemaphoreLock(proc->sem);
1623  if (!proc->lwWaiting)
1624  break;
1625  extraWaits++;
1626  }
1627 
1628 #ifdef LOCK_DEBUG
1629  {
1630  /* not waiting anymore */
1631  uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1632 
1633  Assert(nwaiters < MAX_BACKENDS);
1634  }
1635 #endif
1636 
1637  TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
1639 
1640  LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1641 
1642  /* Now loop back and check the status of the lock again. */
1643  }
1644 
1645  TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), LW_EXCLUSIVE);
1646 
1647  /*
1648  * Fix the process wait semaphore's count for any absorbed wakeups.
1649  */
1650  while (extraWaits-- > 0)
1651  PGSemaphoreUnlock(proc->sem);
1652 
1653  /*
1654  * Now okay to allow cancel/die interrupts.
1655  */
1657 
1658  return result;
1659 }
1660 
1661 
1662 /*
1663  * LWLockUpdateVar - Update a variable and wake up waiters atomically
1664  *
1665  * Sets *valptr to 'val', and wakes up all processes waiting for us with
1666  * LWLockWaitForVar(). Setting the value and waking up the processes happen
1667  * atomically so that any process calling LWLockWaitForVar() on the same lock
1668  * is guaranteed to see the new value, and act accordingly.
1669  *
1670  * The caller must be holding the lock in exclusive mode.
1671  */
1672 void
1673 LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
1674 {
1675  proclist_head wakeup;
1676  proclist_mutable_iter iter;
1677 
1678  PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1679 
1680  proclist_init(&wakeup);
1681 
1682  LWLockWaitListLock(lock);
1683 
1685 
1686  /* Update the lock's value */
1687  *valptr = val;
1688 
1689  /*
1690  * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1691  * up. They are always in the front of the queue.
1692  */
1693  proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1694  {
1695  PGPROC *waiter = GetPGProcByNumber(iter.cur);
1696 
1697  if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1698  break;
1699 
1700  proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1701  proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1702  }
1703 
1704  /* We are done updating shared state of the lock itself. */
1705  LWLockWaitListUnlock(lock);
1706 
1707  /*
1708  * Awaken any waiters I removed from the queue.
1709  */
1710  proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1711  {
1712  PGPROC *waiter = GetPGProcByNumber(iter.cur);
1713 
1714  proclist_delete(&wakeup, iter.cur, lwWaitLink);
1715  /* check comment in LWLockWakeup() about this barrier */
1716  pg_write_barrier();
1717  waiter->lwWaiting = false;
1718  PGSemaphoreUnlock(waiter->sem);
1719  }
1720 }
1721 
1722 
1723 /*
1724  * LWLockRelease - release a previously acquired lock
1725  */
1726 void
1728 {
1729  LWLockMode mode;
1730  uint32 oldstate;
1731  bool check_waiters;
1732  int i;
1733 
1734  /*
1735  * Remove lock from list of locks held. Usually, but not always, it will
1736  * be the latest-acquired lock; so search array backwards.
1737  */
1738  for (i = num_held_lwlocks; --i >= 0;)
1739  if (lock == held_lwlocks[i].lock)
1740  break;
1741 
1742  if (i < 0)
1743  elog(ERROR, "lock %s is not held", T_NAME(lock));
1744 
1745  mode = held_lwlocks[i].mode;
1746 
1747  num_held_lwlocks--;
1748  for (; i < num_held_lwlocks; i++)
1749  held_lwlocks[i] = held_lwlocks[i + 1];
1750 
1751  PRINT_LWDEBUG("LWLockRelease", lock, mode);
1752 
1753  /*
1754  * Release my hold on lock, after that it can immediately be acquired by
1755  * others, even if we still have to wakeup other waiters.
1756  */
1757  if (mode == LW_EXCLUSIVE)
1758  oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1759  else
1760  oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1761 
1762  /* nobody else can have that kind of lock */
1763  Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1764 
1765 
1766  /*
1767  * We're still waiting for backends to get scheduled, don't wake them up
1768  * again.
1769  */
1770  if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1772  (oldstate & LW_LOCK_MASK) == 0)
1773  check_waiters = true;
1774  else
1775  check_waiters = false;
1776 
1777  /*
1778  * As waking up waiters requires the spinlock to be acquired, only do so
1779  * if necessary.
1780  */
1781  if (check_waiters)
1782  {
1783  /* XXX: remove before commit? */
1784  LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1785  LWLockWakeup(lock);
1786  }
1787 
1788  TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1789 
1790  /*
1791  * Now okay to allow cancel/die interrupts.
1792  */
1794 }
1795 
1796 /*
1797  * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1798  */
1799 void
1800 LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val)
1801 {
1802  LWLockWaitListLock(lock);
1803 
1804  /*
1805  * Set the variable's value before releasing the lock, that prevents race
1806  * a race condition wherein a new locker acquires the lock, but hasn't yet
1807  * set the variables value.
1808  */
1809  *valptr = val;
1810  LWLockWaitListUnlock(lock);
1811 
1812  LWLockRelease(lock);
1813 }
1814 
1815 
1816 /*
1817  * LWLockReleaseAll - release all currently-held locks
1818  *
1819  * Used to clean up after ereport(ERROR). An important difference between this
1820  * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1821  * unchanged by this operation. This is necessary since InterruptHoldoffCount
1822  * has been set to an appropriate level earlier in error recovery. We could
1823  * decrement it below zero if we allow it to drop for each released lock!
1824  */
1825 void
1827 {
1828  while (num_held_lwlocks > 0)
1829  {
1830  HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1831 
1832  LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1833  }
1834 }
1835 
1836 
1837 /*
1838  * LWLockHeldByMe - test whether my process holds a lock in any mode
1839  *
1840  * This is meant as debug support only.
1841  */
1842 bool
1844 {
1845  int i;
1846 
1847  for (i = 0; i < num_held_lwlocks; i++)
1848  {
1849  if (held_lwlocks[i].lock == l)
1850  return true;
1851  }
1852  return false;
1853 }
1854 
1855 /*
1856  * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1857  *
1858  * This is meant as debug support only.
1859  */
1860 bool
1862 {
1863  int i;
1864 
1865  for (i = 0; i < num_held_lwlocks; i++)
1866  {
1867  if (held_lwlocks[i].lock == l && held_lwlocks[i].mode == mode)
1868  return true;
1869  }
1870  return false;
1871 }
#define T_NAME(lock)
Definition: lwlock.c:117
int slock_t
Definition: s_lock.h:934
#define init_local_spin_delay(status)
Definition: s_lock.h:1043
static uint32 pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:343
Definition: lwlock.h:32
#define pg_unreachable()
Definition: c.h:191
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
bool LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
Definition: lwlock.c:1861
#define AllocSetContextCreate
Definition: memutils.h:170
#define GetPGProcByNumber(n)
Definition: proc.h:282
static bool LWLockAttemptLock(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:741
int MyProcPid
Definition: globals.c:40
#define LW_FLAG_LOCKED
Definition: lwlock.c:101
int LWLockNewTrancheId(void)
Definition: lwlock.c:584
LWLockMode
Definition: lwlock.h:132
#define HASH_CONTEXT
Definition: hsearch.h:93
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:411
#define HASH_ELEM
Definition: hsearch.h:87
void PGSemaphoreUnlock(PGSemaphore sema)
Definition: posix_sema.c:340
MemoryContext hcxt
Definition: hsearch.h:78
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:639
static bool lock_named_request_allowed
Definition: lwlock.c:158
bool LWLockHeldByMe(LWLock *l)
Definition: lwlock.c:1843
#define LW_FLAG_HAS_WAITERS
Definition: lwlock.c:99
static bool pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 *expected, uint32 newval)
Definition: atomics.h:311
PGPROC * MyProc
Definition: proc.c:67
proclist_head waiters
Definition: lwlock.h:36
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
Definition: lwlock.c:1673
char tranche_name[NAMEDATALEN]
Definition: lwlock.c:148
struct LWLockHandle LWLockHandle
bool lwWaiting
Definition: proc.h:130
#define MAX_SIMUL_LWLOCKS
Definition: lwlock.c:133
Size entrysize
Definition: hsearch.h:73
int errhidestmt(bool hide_stmt)
Definition: elog.c:1142
#define MemSet(start, val, len)
Definition: c.h:971
static void LWLockWaitListUnlock(LWLock *lock)
Definition: lwlock.c:864
#define proclist_foreach_modify(iter, lhead, link_member)
Definition: proclist.h:206
#define LW_SHARED_MASK
Definition: lwlock.c:108
uint8 lwWaitMode
Definition: proc.h:131
void * ShmemAlloc(Size size)
Definition: shmem.c:161
pg_atomic_uint32 state
Definition: lwlock.h:35
#define LW_LOCK_MASK
Definition: lwlock.c:106
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:907
#define LOG
Definition: elog.h:26
#define fprintf
Definition: port.h:197
NamedLWLockTranche * NamedLWLockTrancheArray
Definition: lwlock.c:156
#define PANIC
Definition: elog.h:53
#define proclist_delete(list, procno, link_member)
Definition: proclist.h:187
Size LWLockShmemSize(void)
Definition: lwlock.c:350
#define LW_VAL_SHARED
Definition: lwlock.c:104
void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val)
Definition: lwlock.c:1800
void LWLockRegisterTranche(int tranche_id, const char *tranche_name)
Definition: lwlock.c:604
static void RegisterLWLockTranches(void)
Definition: lwlock.c:492
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1727
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:118
static int LWLockTranchesAllocated
Definition: lwlock.c:115
#define NAMEDATALEN
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:852
#define SpinLockAcquire(lock)
Definition: spin.h:62
Definition: dynahash.c:209
#define NUM_FIXED_LWLOCKS
Definition: lwlock.h:129
unsigned short uint16
Definition: c.h:366
static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS]
Definition: lwlock.c:143
#define LWLOCK_MINIMAL_SIZE
Definition: lwlock.h:74
#define ERROR
Definition: elog.h:43
#define MAX_BACKENDS
Definition: postmaster.h:75
LWLockMode mode
Definition: lwlock.c:139
#define proclist_push_head(list, procno, link_member)
Definition: proclist.h:189
int NamedLWLockTrancheRequests
Definition: lwlock.c:154
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:192
void finish_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:174
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:361
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1295
bool IsUnderPostmaster
Definition: globals.c:109
bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1534
static const char ** LWLockTrancheArray
Definition: lwlock.c:114
static void LWLockReportWaitEnd(void)
Definition: lwlock.c:706
static void LWLockWakeup(LWLock *lock)
Definition: lwlock.c:877
static uint32 pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_)
Definition: atomics.h:358
#define NUM_BUFFER_PARTITIONS
Definition: lwlock.h:113
unsigned int uint32
Definition: c.h:367
static void InitializeLWLocks(void)
Definition: lwlock.c:427
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1386
uint16 tranche
Definition: lwlock.h:34
static void LWLockDequeueSelf(LWLock *lock)
Definition: lwlock.c:1028
#define AssertArg(condition)
Definition: c.h:740
char * trancheName
Definition: lwlock.h:97
MemoryContext TopMemoryContext
Definition: mcxt.c:44
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:679
static void LWLockReportWaitStart(LWLock *lock)
Definition: lwlock.c:697
#define SpinLockRelease(lock)
Definition: spin.h:64
#define HASH_BLOBS
Definition: hsearch.h:88
slock_t * ShmemLock
Definition: shmem.c:88
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
HTAB * hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
Definition: dynahash.c:317
uintptr_t Datum
Definition: postgres.h:367
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
static int NamedLWLockTrancheRequestsAllocated
Definition: lwlock.c:153
Size keysize
Definition: hsearch.h:72
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1350
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:839
static struct @143 value
#define ereport(elevel,...)
Definition: elog.h:144
static void LWLockQueueSelf(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:984
static int num_held_lwlocks
Definition: lwlock.c:142
LWLock lock
Definition: lwlock.h:79
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:328
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
void CreateLWLocks(void)
Definition: lwlock.c:382
#define Assert(condition)
Definition: c.h:738
#define StrNCpy(dst, src, len)
Definition: c.h:944
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:553
Definition: regguts.h:298
size_t Size
Definition: c.h:466
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1362
LWLock * lock
Definition: lwlock.c:138
#define newval
#define PRINT_LWDEBUG(a, b, c)
Definition: lwlock.c:227
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1123
static bool proclist_is_empty(proclist_head *list)
Definition: proclist.h:38
#define LW_FLAG_RELEASE_OK
Definition: lwlock.c:100
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1390
static int NumLWLocksByNamedTranches(void)
Definition: lwlock.c:335
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1380
#define proclist_push_tail(list, procno, link_member)
Definition: proclist.h:191
void PGSemaphoreLock(PGSemaphore sema)
Definition: posix_sema.c:320
const char * name
Definition: encode.c:521
NamedLWLockTrancheRequest * NamedLWLockTrancheRequestArray
Definition: lwlock.c:152
static bool LWLockConflictsWithVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval, bool *result)
Definition: lwlock.c:1474
static void LWLockWaitListLock(LWLock *lock)
Definition: lwlock.c:812
struct NamedLWLockTrancheRequest NamedLWLockTrancheRequest
int pgprocno
Definition: proc.h:110
#define LW_VAL_EXCLUSIVE
Definition: lwlock.c:103
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
const char * GetLWLockIdentifier(uint32 classId, uint16 eventId)
Definition: lwlock.c:715
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:116
#define elog(elevel,...)
Definition: elog.h:214
int i
static void proclist_init(proclist_head *list)
Definition: proclist.h:29
#define pg_write_barrier()
Definition: atomics.h:159
void * arg
static uint32 pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
Definition: atomics.h:372
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:223
void LWLockReleaseAll(void)
Definition: lwlock.c:1826
#define LWLOCK_PADDED_SIZE
Definition: lwlock.h:73
PGSemaphore sem
Definition: proc.h:101
LWLockPadded * MainLWLockArray
Definition: lwlock.c:125
#define PG_WAIT_LWLOCK
Definition: pgstat.h:785
Definition: proc.h:95
long val
Definition: informix.c:664
const char *const MainLWLockNames[]
int errhidecontext(bool hide_ctx)
Definition: elog.c:1161
void InitLWLockAccess(void)
Definition: lwlock.c:537
#define NUM_LOCK_PARTITIONS
Definition: lwlock.h:117
void perform_spin_delay(SpinDelayStatus *status)
Definition: s_lock.c:124
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:121
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
#define NUM_PREDICATELOCK_PARTITIONS
Definition: lwlock.h:121
#define LOG_LWDEBUG(a, b, c)
Definition: lwlock.c:228