PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
deadlock.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * deadlock.c
4 * POSTGRES deadlock detection code
5 *
6 * See src/backend/storage/lmgr/README for a description of the deadlock
7 * detection and resolution algorithms.
8 *
9 *
10 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 *
14 * IDENTIFICATION
15 * src/backend/storage/lmgr/deadlock.c
16 *
17 * Interface:
18 *
19 * DeadLockCheck()
20 * DeadLockReport()
21 * RememberSimpleDeadLock()
22 * InitDeadLockChecking()
23 *
24 *-------------------------------------------------------------------------
25 */
26#include "postgres.h"
27
28#include "miscadmin.h"
29#include "pg_trace.h"
30#include "pgstat.h"
31#include "storage/lmgr.h"
32#include "storage/proc.h"
33#include "storage/procnumber.h"
34#include "utils/memutils.h"
35
36
37/*
38 * One edge in the waits-for graph.
39 *
40 * waiter and blocker may or may not be members of a lock group, but if either
41 * is, it will be the leader rather than any other member of the lock group.
42 * The group leaders act as representatives of the whole group even though
43 * those particular processes need not be waiting at all. There will be at
44 * least one member of the waiter's lock group on the wait queue for the given
45 * lock, maybe more.
46 */
47typedef struct
48{
49 PGPROC *waiter; /* the leader of the waiting lock group */
50 PGPROC *blocker; /* the leader of the group it is waiting for */
51 LOCK *lock; /* the lock being waited for */
52 int pred; /* workspace for TopoSort */
53 int link; /* workspace for TopoSort */
54} EDGE;
55
56/* One potential reordering of a lock's wait queue */
57typedef struct
58{
59 LOCK *lock; /* the lock whose wait queue is described */
60 PGPROC **procs; /* array of PGPROC *'s in new wait order */
61 int nProcs;
63
64/*
65 * Information saved about each edge in a detected deadlock cycle. This
66 * is used to print a diagnostic message upon failure.
67 *
68 * Note: because we want to examine this info after releasing the lock
69 * manager's partition locks, we can't just store LOCK and PGPROC pointers;
70 * we must extract out all the info we want to be able to print.
71 */
72typedef struct
73{
74 LOCKTAG locktag; /* ID of awaited lock object */
75 LOCKMODE lockmode; /* type of lock we're waiting for */
76 int pid; /* PID of blocked backend */
78
79
80static bool DeadLockCheckRecurse(PGPROC *proc);
81static int TestConfiguration(PGPROC *startProc);
82static bool FindLockCycle(PGPROC *checkProc,
83 EDGE *softEdges, int *nSoftEdges);
84static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
85 EDGE *softEdges, int *nSoftEdges);
86static bool FindLockCycleRecurseMember(PGPROC *checkProc,
87 PGPROC *checkProcLeader,
88 int depth, EDGE *softEdges, int *nSoftEdges);
89static bool ExpandConstraints(EDGE *constraints, int nConstraints);
90static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
91 PGPROC **ordering);
92
93#ifdef DEBUG_DEADLOCK
94static void PrintLockQueue(LOCK *lock, const char *info);
95#endif
96
97
98/*
99 * Working space for the deadlock detector
100 */
101
102/* Workspace for FindLockCycle */
103static PGPROC **visitedProcs; /* Array of visited procs */
104static int nVisitedProcs;
105
106/* Workspace for TopoSort */
107static PGPROC **topoProcs; /* Array of not-yet-output procs */
108static int *beforeConstraints; /* Counts of remaining before-constraints */
109static int *afterConstraints; /* List head for after-constraints */
110
111/* Output area for ExpandConstraints */
112static WAIT_ORDER *waitOrders; /* Array of proposed queue rearrangements */
113static int nWaitOrders;
114static PGPROC **waitOrderProcs; /* Space for waitOrders queue contents */
115
116/* Current list of constraints being considered */
120
121/* Storage space for results from FindLockCycle */
127
128/* PGPROC pointer of any blocking autovacuum worker found */
130
131
132/*
133 * InitDeadLockChecking -- initialize deadlock checker during backend startup
134 *
135 * This does per-backend initialization of the deadlock checker; primarily,
136 * allocation of working memory for DeadLockCheck. We do this per-backend
137 * since there's no percentage in making the kernel do copy-on-write
138 * inheritance of workspace from the postmaster. We want to allocate the
139 * space at startup because (a) the deadlock checker might be invoked when
140 * there's no free memory left, and (b) the checker is normally run inside a
141 * signal handler, which is a very dangerous place to invoke palloc from.
142 */
143void
145{
146 MemoryContext oldcxt;
147
148 /* Make sure allocations are permanent */
150
151 /*
152 * FindLockCycle needs at most MaxBackends entries in visitedProcs[] and
153 * deadlockDetails[].
154 */
155 visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
157
158 /*
159 * TopoSort needs to consider at most MaxBackends wait-queue entries, and
160 * it needn't run concurrently with FindLockCycle.
161 */
162 topoProcs = visitedProcs; /* re-use this space */
163 beforeConstraints = (int *) palloc(MaxBackends * sizeof(int));
164 afterConstraints = (int *) palloc(MaxBackends * sizeof(int));
165
166 /*
167 * We need to consider rearranging at most MaxBackends/2 wait queues
168 * (since it takes at least two waiters in a queue to create a soft edge),
169 * and the expanded form of the wait queues can't involve more than
170 * MaxBackends total waiters.
171 */
173 palloc((MaxBackends / 2) * sizeof(WAIT_ORDER));
174 waitOrderProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
175
176 /*
177 * Allow at most MaxBackends distinct constraints in a configuration. (Is
178 * this enough? In practice it seems it should be, but I don't quite see
179 * how to prove it. If we run out, we might fail to find a workable wait
180 * queue rearrangement even though one exists.) NOTE that this number
181 * limits the maximum recursion depth of DeadLockCheckRecurse. Making it
182 * really big might potentially allow a stack-overflow problem.
183 */
186
187 /*
188 * Allow up to 3*MaxBackends constraints to be saved without having to
189 * re-run TestConfiguration. (This is probably more than enough, but we
190 * can survive if we run low on space by doing excess runs of
191 * TestConfiguration to re-compute constraint lists each time needed.) The
192 * last MaxBackends entries in possibleConstraints[] are reserved as
193 * output workspace for FindLockCycle.
194 */
196 "MAX_BACKENDS_BITS too big for * 4");
199 (EDGE *) palloc(maxPossibleConstraints * sizeof(EDGE));
200
201 MemoryContextSwitchTo(oldcxt);
202}
203
204/*
205 * DeadLockCheck -- Checks for deadlocks for a given process
206 *
207 * This code looks for deadlocks involving the given process. If any
208 * are found, it tries to rearrange lock wait queues to resolve the
209 * deadlock. If resolution is impossible, return DS_HARD_DEADLOCK ---
210 * the caller is then expected to abort the given proc's transaction.
211 *
212 * Caller must already have locked all partitions of the lock tables.
213 *
214 * On failure, deadlock details are recorded in deadlockDetails[] for
215 * subsequent printing by DeadLockReport(). That activity is separate
216 * because (a) we don't want to do it while holding all those LWLocks,
217 * and (b) we are typically invoked inside a signal handler.
218 */
221{
222 /* Initialize to "no constraints" */
223 nCurConstraints = 0;
225 nWaitOrders = 0;
226
227 /* Initialize to not blocked by an autovacuum worker */
229
230 /* Search for deadlocks and possible fixes */
231 if (DeadLockCheckRecurse(proc))
232 {
233 /*
234 * Call FindLockCycle one more time, to record the correct
235 * deadlockDetails[] for the basic state with no rearrangements.
236 */
237 int nSoftEdges;
238
239 TRACE_POSTGRESQL_DEADLOCK_FOUND();
240
241 nWaitOrders = 0;
242 if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
243 elog(FATAL, "deadlock seems to have disappeared");
244
245 return DS_HARD_DEADLOCK; /* cannot find a non-deadlocked state */
246 }
247
248 /* Apply any needed rearrangements of wait queues */
249 for (int i = 0; i < nWaitOrders; i++)
250 {
251 LOCK *lock = waitOrders[i].lock;
252 PGPROC **procs = waitOrders[i].procs;
253 int nProcs = waitOrders[i].nProcs;
254 dclist_head *waitQueue = &lock->waitProcs;
255
256 Assert(nProcs == dclist_count(waitQueue));
257
258#ifdef DEBUG_DEADLOCK
259 PrintLockQueue(lock, "DeadLockCheck:");
260#endif
261
262 /* Reset the queue and re-add procs in the desired order */
263 dclist_init(waitQueue);
264 for (int j = 0; j < nProcs; j++)
265 dclist_push_tail(waitQueue, &procs[j]->links);
266
267#ifdef DEBUG_DEADLOCK
268 PrintLockQueue(lock, "rearranged to:");
269#endif
270
271 /* See if any waiters for the lock can be woken up now */
273 }
274
275 /* Return code tells caller if we had to escape a deadlock or not */
276 if (nWaitOrders > 0)
277 return DS_SOFT_DEADLOCK;
278 else if (blocking_autovacuum_proc != NULL)
280 else
281 return DS_NO_DEADLOCK;
282}
283
284/*
285 * Return the PGPROC of the autovacuum that's blocking a process.
286 *
287 * We reset the saved pointer as soon as we pass it back.
288 */
289PGPROC *
291{
292 PGPROC *ptr;
293
296
297 return ptr;
298}
299
300/*
301 * DeadLockCheckRecurse -- recursively search for valid orderings
302 *
303 * curConstraints[] holds the current set of constraints being considered
304 * by an outer level of recursion. Add to this each possible solution
305 * constraint for any cycle detected at this level.
306 *
307 * Returns true if no solution exists. Returns false if a deadlock-free
308 * state is attainable, in which case waitOrders[] shows the required
309 * rearrangements of lock wait queues (if any).
310 */
311static bool
313{
314 int nEdges;
315 int oldPossibleConstraints;
316 bool savedList;
317 int i;
318
319 nEdges = TestConfiguration(proc);
320 if (nEdges < 0)
321 return true; /* hard deadlock --- no solution */
322 if (nEdges == 0)
323 return false; /* good configuration found */
325 return true; /* out of room for active constraints? */
326 oldPossibleConstraints = nPossibleConstraints;
328 {
329 /* We can save the edge list in possibleConstraints[] */
330 nPossibleConstraints += nEdges;
331 savedList = true;
332 }
333 else
334 {
335 /* Not room; will need to regenerate the edges on-the-fly */
336 savedList = false;
337 }
338
339 /*
340 * Try each available soft edge as an addition to the configuration.
341 */
342 for (i = 0; i < nEdges; i++)
343 {
344 if (!savedList && i > 0)
345 {
346 /* Regenerate the list of possible added constraints */
347 if (nEdges != TestConfiguration(proc))
348 elog(FATAL, "inconsistent results during deadlock check");
349 }
351 possibleConstraints[oldPossibleConstraints + i];
353 if (!DeadLockCheckRecurse(proc))
354 return false; /* found a valid solution! */
355 /* give up on that added constraint, try again */
357 }
358 nPossibleConstraints = oldPossibleConstraints;
359 return true; /* no solution found */
360}
361
362
363/*--------------------
364 * Test a configuration (current set of constraints) for validity.
365 *
366 * Returns:
367 * 0: the configuration is good (no deadlocks)
368 * -1: the configuration has a hard deadlock or is not self-consistent
369 * >0: the configuration has one or more soft deadlocks
370 *
371 * In the soft-deadlock case, one of the soft cycles is chosen arbitrarily
372 * and a list of its soft edges is returned beginning at
373 * possibleConstraints+nPossibleConstraints. The return value is the
374 * number of soft edges.
375 *--------------------
376 */
377static int
379{
380 int softFound = 0;
382 int nSoftEdges;
383 int i;
384
385 /*
386 * Make sure we have room for FindLockCycle's output.
387 */
389 return -1;
390
391 /*
392 * Expand current constraint set into wait orderings. Fail if the
393 * constraint set is not self-consistent.
394 */
396 return -1;
397
398 /*
399 * Check for cycles involving startProc or any of the procs mentioned in
400 * constraints. We check startProc last because if it has a soft cycle
401 * still to be dealt with, we want to deal with that first.
402 */
403 for (i = 0; i < nCurConstraints; i++)
404 {
405 if (FindLockCycle(curConstraints[i].waiter, softEdges, &nSoftEdges))
406 {
407 if (nSoftEdges == 0)
408 return -1; /* hard deadlock detected */
409 softFound = nSoftEdges;
410 }
411 if (FindLockCycle(curConstraints[i].blocker, softEdges, &nSoftEdges))
412 {
413 if (nSoftEdges == 0)
414 return -1; /* hard deadlock detected */
415 softFound = nSoftEdges;
416 }
417 }
418 if (FindLockCycle(startProc, softEdges, &nSoftEdges))
419 {
420 if (nSoftEdges == 0)
421 return -1; /* hard deadlock detected */
422 softFound = nSoftEdges;
423 }
424 return softFound;
425}
426
427
428/*
429 * FindLockCycle -- basic check for deadlock cycles
430 *
431 * Scan outward from the given proc to see if there is a cycle in the
432 * waits-for graph that includes this proc. Return true if a cycle
433 * is found, else false. If a cycle is found, we return a list of
434 * the "soft edges", if any, included in the cycle. These edges could
435 * potentially be eliminated by rearranging wait queues. We also fill
436 * deadlockDetails[] with information about the detected cycle; this info
437 * is not used by the deadlock algorithm itself, only to print a useful
438 * message after failing.
439 *
440 * Since we need to be able to check hypothetical configurations that would
441 * exist after wait queue rearrangement, the routine pays attention to the
442 * table of hypothetical queue orders in waitOrders[]. These orders will
443 * be believed in preference to the actual ordering seen in the locktable.
444 */
445static bool
447 EDGE *softEdges, /* output argument */
448 int *nSoftEdges) /* output argument */
449{
450 nVisitedProcs = 0;
452 *nSoftEdges = 0;
453 return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
454}
455
456static bool
458 int depth,
459 EDGE *softEdges, /* output argument */
460 int *nSoftEdges) /* output argument */
461{
462 int i;
463 dlist_iter iter;
464
465 /*
466 * If this process is a lock group member, check the leader instead. (Note
467 * that we might be the leader, in which case this is a no-op.)
468 */
469 if (checkProc->lockGroupLeader != NULL)
470 checkProc = checkProc->lockGroupLeader;
471
472 /*
473 * Have we already seen this proc?
474 */
475 for (i = 0; i < nVisitedProcs; i++)
476 {
477 if (visitedProcs[i] == checkProc)
478 {
479 /* If we return to starting point, we have a deadlock cycle */
480 if (i == 0)
481 {
482 /*
483 * record total length of cycle --- outer levels will now fill
484 * deadlockDetails[]
485 */
486 Assert(depth <= MaxBackends);
487 nDeadlockDetails = depth;
488
489 return true;
490 }
491
492 /*
493 * Otherwise, we have a cycle but it does not include the start
494 * point, so say "no deadlock".
495 */
496 return false;
497 }
498 }
499 /* Mark proc as seen */
501 visitedProcs[nVisitedProcs++] = checkProc;
502
503 /*
504 * If the process is waiting, there is an outgoing waits-for edge to each
505 * process that blocks it.
506 */
507 if (checkProc->links.next != NULL && checkProc->waitLock != NULL &&
508 FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges,
509 nSoftEdges))
510 return true;
511
512 /*
513 * If the process is not waiting, there could still be outgoing waits-for
514 * edges if it is part of a lock group, because other members of the lock
515 * group might be waiting even though this process is not. (Given lock
516 * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
517 * that is a deadlock even neither of B1 and A2 are waiting for anything.)
518 */
519 dlist_foreach(iter, &checkProc->lockGroupMembers)
520 {
521 PGPROC *memberProc;
522
523 memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur);
524
525 if (memberProc->links.next != NULL && memberProc->waitLock != NULL &&
526 memberProc != checkProc &&
527 FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges,
528 nSoftEdges))
529 return true;
530 }
531
532 return false;
533}
534
535static bool
537 PGPROC *checkProcLeader,
538 int depth,
539 EDGE *softEdges, /* output argument */
540 int *nSoftEdges) /* output argument */
541{
542 PGPROC *proc;
543 LOCK *lock = checkProc->waitLock;
544 dlist_iter proclock_iter;
545 LockMethod lockMethodTable;
546 int conflictMask;
547 int i;
548 int numLockModes,
549 lm;
550
551 /*
552 * The relation extension lock can never participate in actual deadlock
553 * cycle. See Assert in LockAcquireExtended. So, there is no advantage
554 * in checking wait edges from it.
555 */
557 return false;
558
559 lockMethodTable = GetLocksMethodTable(lock);
560 numLockModes = lockMethodTable->numLockModes;
561 conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
562
563 /*
564 * Scan for procs that already hold conflicting locks. These are "hard"
565 * edges in the waits-for graph.
566 */
567 dlist_foreach(proclock_iter, &lock->procLocks)
568 {
569 PROCLOCK *proclock = dlist_container(PROCLOCK, lockLink, proclock_iter.cur);
570 PGPROC *leader;
571
572 proc = proclock->tag.myProc;
573 leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader;
574
575 /* A proc never blocks itself or any other lock group member */
576 if (leader != checkProcLeader)
577 {
578 for (lm = 1; lm <= numLockModes; lm++)
579 {
580 if ((proclock->holdMask & LOCKBIT_ON(lm)) &&
581 (conflictMask & LOCKBIT_ON(lm)))
582 {
583 /* This proc hard-blocks checkProc */
584 if (FindLockCycleRecurse(proc, depth + 1,
585 softEdges, nSoftEdges))
586 {
587 /* fill deadlockDetails[] */
588 DEADLOCK_INFO *info = &deadlockDetails[depth];
589
590 info->locktag = lock->tag;
591 info->lockmode = checkProc->waitLockMode;
592 info->pid = checkProc->pid;
593
594 return true;
595 }
596
597 /*
598 * No deadlock here, but see if this proc is an autovacuum
599 * that is directly hard-blocking our own proc. If so,
600 * report it so that the caller can send a cancel signal
601 * to it, if appropriate. If there's more than one such
602 * proc, it's indeterminate which one will be reported.
603 *
604 * We don't touch autovacuums that are indirectly blocking
605 * us; it's up to the direct blockee to take action. This
606 * rule simplifies understanding the behavior and ensures
607 * that an autovacuum won't be canceled with less than
608 * deadlock_timeout grace period.
609 *
610 * Note we read statusFlags without any locking. This is
611 * OK only for checking the PROC_IS_AUTOVACUUM flag,
612 * because that flag is set at process start and never
613 * reset. There is logic elsewhere to avoid canceling an
614 * autovacuum that is working to prevent XID wraparound
615 * problems (which needs to read a different statusFlags
616 * bit), but we don't do that here to avoid grabbing
617 * ProcArrayLock.
618 */
619 if (checkProc == MyProc &&
622
623 /* We're done looking at this proclock */
624 break;
625 }
626 }
627 }
628 }
629
630 /*
631 * Scan for procs that are ahead of this one in the lock's wait queue.
632 * Those that have conflicting requests soft-block this one. This must be
633 * done after the hard-block search, since if another proc both hard- and
634 * soft-blocks this one, we want to call it a hard edge.
635 *
636 * If there is a proposed re-ordering of the lock's wait order, use that
637 * rather than the current wait order.
638 */
639 for (i = 0; i < nWaitOrders; i++)
640 {
641 if (waitOrders[i].lock == lock)
642 break;
643 }
644
645 if (i < nWaitOrders)
646 {
647 /* Use the given hypothetical wait queue order */
648 PGPROC **procs = waitOrders[i].procs;
649 int queue_size = waitOrders[i].nProcs;
650
651 for (i = 0; i < queue_size; i++)
652 {
653 PGPROC *leader;
654
655 proc = procs[i];
656 leader = proc->lockGroupLeader == NULL ? proc :
657 proc->lockGroupLeader;
658
659 /*
660 * TopoSort will always return an ordering with group members
661 * adjacent to each other in the wait queue (see comments
662 * therein). So, as soon as we reach a process in the same lock
663 * group as checkProc, we know we've found all the conflicts that
664 * precede any member of the lock group lead by checkProcLeader.
665 */
666 if (leader == checkProcLeader)
667 break;
668
669 /* Is there a conflict with this guy's request? */
670 if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0)
671 {
672 /* This proc soft-blocks checkProc */
673 if (FindLockCycleRecurse(proc, depth + 1,
674 softEdges, nSoftEdges))
675 {
676 /* fill deadlockDetails[] */
677 DEADLOCK_INFO *info = &deadlockDetails[depth];
678
679 info->locktag = lock->tag;
680 info->lockmode = checkProc->waitLockMode;
681 info->pid = checkProc->pid;
682
683 /*
684 * Add this edge to the list of soft edges in the cycle
685 */
686 Assert(*nSoftEdges < MaxBackends);
687 softEdges[*nSoftEdges].waiter = checkProcLeader;
688 softEdges[*nSoftEdges].blocker = leader;
689 softEdges[*nSoftEdges].lock = lock;
690 (*nSoftEdges)++;
691 return true;
692 }
693 }
694 }
695 }
696 else
697 {
698 PGPROC *lastGroupMember = NULL;
699 dlist_iter proc_iter;
700 dclist_head *waitQueue;
701
702 /* Use the true lock wait queue order */
703 waitQueue = &lock->waitProcs;
704
705 /*
706 * Find the last member of the lock group that is present in the wait
707 * queue. Anything after this is not a soft lock conflict. If group
708 * locking is not in use, then we know immediately which process we're
709 * looking for, but otherwise we've got to search the wait queue to
710 * find the last process actually present.
711 */
712 if (checkProc->lockGroupLeader == NULL)
713 lastGroupMember = checkProc;
714 else
715 {
716 dclist_foreach(proc_iter, waitQueue)
717 {
718 proc = dlist_container(PGPROC, links, proc_iter.cur);
719
720 if (proc->lockGroupLeader == checkProcLeader)
721 lastGroupMember = proc;
722 }
723 Assert(lastGroupMember != NULL);
724 }
725
726 /*
727 * OK, now rescan (or scan) the queue to identify the soft conflicts.
728 */
729 dclist_foreach(proc_iter, waitQueue)
730 {
731 PGPROC *leader;
732
733 proc = dlist_container(PGPROC, links, proc_iter.cur);
734
735 leader = proc->lockGroupLeader == NULL ? proc :
736 proc->lockGroupLeader;
737
738 /* Done when we reach the target proc */
739 if (proc == lastGroupMember)
740 break;
741
742 /* Is there a conflict with this guy's request? */
743 if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 &&
744 leader != checkProcLeader)
745 {
746 /* This proc soft-blocks checkProc */
747 if (FindLockCycleRecurse(proc, depth + 1,
748 softEdges, nSoftEdges))
749 {
750 /* fill deadlockDetails[] */
751 DEADLOCK_INFO *info = &deadlockDetails[depth];
752
753 info->locktag = lock->tag;
754 info->lockmode = checkProc->waitLockMode;
755 info->pid = checkProc->pid;
756
757 /*
758 * Add this edge to the list of soft edges in the cycle
759 */
760 Assert(*nSoftEdges < MaxBackends);
761 softEdges[*nSoftEdges].waiter = checkProcLeader;
762 softEdges[*nSoftEdges].blocker = leader;
763 softEdges[*nSoftEdges].lock = lock;
764 (*nSoftEdges)++;
765 return true;
766 }
767 }
768 }
769 }
770
771 /*
772 * No conflict detected here.
773 */
774 return false;
775}
776
777
778/*
779 * ExpandConstraints -- expand a list of constraints into a set of
780 * specific new orderings for affected wait queues
781 *
782 * Input is a list of soft edges to be reversed. The output is a list
783 * of nWaitOrders WAIT_ORDER structs in waitOrders[], with PGPROC array
784 * workspace in waitOrderProcs[].
785 *
786 * Returns true if able to build an ordering that satisfies all the
787 * constraints, false if not (there are contradictory constraints).
788 */
789static bool
791 int nConstraints)
792{
793 int nWaitOrderProcs = 0;
794 int i,
795 j;
796
797 nWaitOrders = 0;
798
799 /*
800 * Scan constraint list backwards. This is because the last-added
801 * constraint is the only one that could fail, and so we want to test it
802 * for inconsistency first.
803 */
804 for (i = nConstraints; --i >= 0;)
805 {
806 LOCK *lock = constraints[i].lock;
807
808 /* Did we already make a list for this lock? */
809 for (j = nWaitOrders; --j >= 0;)
810 {
811 if (waitOrders[j].lock == lock)
812 break;
813 }
814 if (j >= 0)
815 continue;
816 /* No, so allocate a new list */
818 waitOrders[nWaitOrders].procs = waitOrderProcs + nWaitOrderProcs;
820 nWaitOrderProcs += dclist_count(&lock->waitProcs);
821 Assert(nWaitOrderProcs <= MaxBackends);
822
823 /*
824 * Do the topo sort. TopoSort need not examine constraints after this
825 * one, since they must be for different locks.
826 */
827 if (!TopoSort(lock, constraints, i + 1,
828 waitOrders[nWaitOrders].procs))
829 return false;
830 nWaitOrders++;
831 }
832 return true;
833}
834
835
836/*
837 * TopoSort -- topological sort of a wait queue
838 *
839 * Generate a re-ordering of a lock's wait queue that satisfies given
840 * constraints about certain procs preceding others. (Each such constraint
841 * is a fact of a partial ordering.) Minimize rearrangement of the queue
842 * not needed to achieve the partial ordering.
843 *
844 * This is a lot simpler and slower than, for example, the topological sort
845 * algorithm shown in Knuth's Volume 1. However, Knuth's method doesn't
846 * try to minimize the damage to the existing order. In practice we are
847 * not likely to be working with more than a few constraints, so the apparent
848 * slowness of the algorithm won't really matter.
849 *
850 * The initial queue ordering is taken directly from the lock's wait queue.
851 * The output is an array of PGPROC pointers, of length equal to the lock's
852 * wait queue length (the caller is responsible for providing this space).
853 * The partial order is specified by an array of EDGE structs. Each EDGE
854 * is one that we need to reverse, therefore the "waiter" must appear before
855 * the "blocker" in the output array. The EDGE array may well contain
856 * edges associated with other locks; these should be ignored.
857 *
858 * Returns true if able to build an ordering that satisfies all the
859 * constraints, false if not (there are contradictory constraints).
860 */
861static bool
863 EDGE *constraints,
864 int nConstraints,
865 PGPROC **ordering) /* output argument */
866{
867 dclist_head *waitQueue = &lock->waitProcs;
868 int queue_size = dclist_count(waitQueue);
869 PGPROC *proc;
870 int i,
871 j,
872 jj,
873 k,
874 kk,
875 last;
876 dlist_iter proc_iter;
877
878 /* First, fill topoProcs[] array with the procs in their current order */
879 i = 0;
880 dclist_foreach(proc_iter, waitQueue)
881 {
882 proc = dlist_container(PGPROC, links, proc_iter.cur);
883 topoProcs[i++] = proc;
884 }
885 Assert(i == queue_size);
886
887 /*
888 * Scan the constraints, and for each proc in the array, generate a count
889 * of the number of constraints that say it must be before something else,
890 * plus a list of the constraints that say it must be after something
891 * else. The count for the j'th proc is stored in beforeConstraints[j],
892 * and the head of its list in afterConstraints[j]. Each constraint
893 * stores its list link in constraints[i].link (note any constraint will
894 * be in just one list). The array index for the before-proc of the i'th
895 * constraint is remembered in constraints[i].pred.
896 *
897 * Note that it's not necessarily the case that every constraint affects
898 * this particular wait queue. Prior to group locking, a process could be
899 * waiting for at most one lock. But a lock group can be waiting for
900 * zero, one, or multiple locks. Since topoProcs[] is an array of the
901 * processes actually waiting, while constraints[] is an array of group
902 * leaders, we've got to scan through topoProcs[] for each constraint,
903 * checking whether both a waiter and a blocker for that group are
904 * present. If so, the constraint is relevant to this wait queue; if not,
905 * it isn't.
906 */
907 MemSet(beforeConstraints, 0, queue_size * sizeof(int));
908 MemSet(afterConstraints, 0, queue_size * sizeof(int));
909 for (i = 0; i < nConstraints; i++)
910 {
911 /*
912 * Find a representative process that is on the lock queue and part of
913 * the waiting lock group. This may or may not be the leader, which
914 * may or may not be waiting at all. If there are any other processes
915 * in the same lock group on the queue, set their number of
916 * beforeConstraints to -1 to indicate that they should be emitted
917 * with their groupmates rather than considered separately.
918 *
919 * In this loop and the similar one just below, it's critical that we
920 * consistently select the same representative member of any one lock
921 * group, so that all the constraints are associated with the same
922 * proc, and the -1's are only associated with not-representative
923 * members. We select the last one in the topoProcs array.
924 */
925 proc = constraints[i].waiter;
926 Assert(proc != NULL);
927 jj = -1;
928 for (j = queue_size; --j >= 0;)
929 {
930 PGPROC *waiter = topoProcs[j];
931
932 if (waiter == proc || waiter->lockGroupLeader == proc)
933 {
934 Assert(waiter->waitLock == lock);
935 if (jj == -1)
936 jj = j;
937 else
938 {
940 beforeConstraints[j] = -1;
941 }
942 }
943 }
944
945 /* If no matching waiter, constraint is not relevant to this lock. */
946 if (jj < 0)
947 continue;
948
949 /*
950 * Similarly, find a representative process that is on the lock queue
951 * and waiting for the blocking lock group. Again, this could be the
952 * leader but does not need to be.
953 */
954 proc = constraints[i].blocker;
955 Assert(proc != NULL);
956 kk = -1;
957 for (k = queue_size; --k >= 0;)
958 {
959 PGPROC *blocker = topoProcs[k];
960
961 if (blocker == proc || blocker->lockGroupLeader == proc)
962 {
963 Assert(blocker->waitLock == lock);
964 if (kk == -1)
965 kk = k;
966 else
967 {
968 Assert(beforeConstraints[k] <= 0);
969 beforeConstraints[k] = -1;
970 }
971 }
972 }
973
974 /* If no matching blocker, constraint is not relevant to this lock. */
975 if (kk < 0)
976 continue;
977
978 Assert(beforeConstraints[jj] >= 0);
979 beforeConstraints[jj]++; /* waiter must come before */
980 /* add this constraint to list of after-constraints for blocker */
981 constraints[i].pred = jj;
982 constraints[i].link = afterConstraints[kk];
983 afterConstraints[kk] = i + 1;
984 }
985
986 /*--------------------
987 * Now scan the topoProcs array backwards. At each step, output the
988 * last proc that has no remaining before-constraints plus any other
989 * members of the same lock group; then decrease the beforeConstraints
990 * count of each of the procs it was constrained against.
991 * i = index of ordering[] entry we want to output this time
992 * j = search index for topoProcs[]
993 * k = temp for scanning constraint list for proc j
994 * last = last non-null index in topoProcs (avoid redundant searches)
995 *--------------------
996 */
997 last = queue_size - 1;
998 for (i = queue_size - 1; i >= 0;)
999 {
1000 int c;
1001 int nmatches = 0;
1002
1003 /* Find next candidate to output */
1004 while (topoProcs[last] == NULL)
1005 last--;
1006 for (j = last; j >= 0; j--)
1007 {
1008 if (topoProcs[j] != NULL && beforeConstraints[j] == 0)
1009 break;
1010 }
1011
1012 /* If no available candidate, topological sort fails */
1013 if (j < 0)
1014 return false;
1015
1016 /*
1017 * Output everything in the lock group. There's no point in
1018 * outputting an ordering where members of the same lock group are not
1019 * consecutive on the wait queue: if some other waiter is between two
1020 * requests that belong to the same group, then either it conflicts
1021 * with both of them and is certainly not a solution; or it conflicts
1022 * with at most one of them and is thus isomorphic to an ordering
1023 * where the group members are consecutive.
1024 */
1025 proc = topoProcs[j];
1026 if (proc->lockGroupLeader != NULL)
1027 proc = proc->lockGroupLeader;
1028 Assert(proc != NULL);
1029 for (c = 0; c <= last; ++c)
1030 {
1031 if (topoProcs[c] == proc || (topoProcs[c] != NULL &&
1032 topoProcs[c]->lockGroupLeader == proc))
1033 {
1034 ordering[i - nmatches] = topoProcs[c];
1035 topoProcs[c] = NULL;
1036 ++nmatches;
1037 }
1038 }
1039 Assert(nmatches > 0);
1040 i -= nmatches;
1041
1042 /* Update beforeConstraints counts of its predecessors */
1043 for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link)
1044 beforeConstraints[constraints[k - 1].pred]--;
1045 }
1046
1047 /* Done */
1048 return true;
1049}
1050
1051#ifdef DEBUG_DEADLOCK
1052static void
1053PrintLockQueue(LOCK *lock, const char *info)
1054{
1055 dclist_head *waitQueue = &lock->waitProcs;
1056 dlist_iter proc_iter;
1057
1058 printf("%s lock %p queue ", info, lock);
1059
1060 dclist_foreach(proc_iter, waitQueue)
1061 {
1062 PGPROC *proc = dlist_container(PGPROC, links, proc_iter.cur);
1063
1064 printf(" %d", proc->pid);
1065 }
1066 printf("\n");
1067 fflush(stdout);
1068}
1069#endif
1070
1071/*
1072 * Report a detected deadlock, with available details.
1073 */
1074void
1076{
1077 StringInfoData clientbuf; /* errdetail for client */
1078 StringInfoData logbuf; /* errdetail for server log */
1079 StringInfoData locktagbuf;
1080 int i;
1081
1082 initStringInfo(&clientbuf);
1083 initStringInfo(&logbuf);
1084 initStringInfo(&locktagbuf);
1085
1086 /* Generate the "waits for" lines sent to the client */
1087 for (i = 0; i < nDeadlockDetails; i++)
1088 {
1090 int nextpid;
1091
1092 /* The last proc waits for the first one... */
1093 if (i < nDeadlockDetails - 1)
1094 nextpid = info[1].pid;
1095 else
1096 nextpid = deadlockDetails[0].pid;
1097
1098 /* reset locktagbuf to hold next object description */
1099 resetStringInfo(&locktagbuf);
1100
1101 DescribeLockTag(&locktagbuf, &info->locktag);
1102
1103 if (i > 0)
1104 appendStringInfoChar(&clientbuf, '\n');
1105
1106 appendStringInfo(&clientbuf,
1107 _("Process %d waits for %s on %s; blocked by process %d."),
1108 info->pid,
1110 info->lockmode),
1111 locktagbuf.data,
1112 nextpid);
1113 }
1114
1115 /* Duplicate all the above for the server ... */
1116 appendBinaryStringInfo(&logbuf, clientbuf.data, clientbuf.len);
1117
1118 /* ... and add info about query strings */
1119 for (i = 0; i < nDeadlockDetails; i++)
1120 {
1122
1123 appendStringInfoChar(&logbuf, '\n');
1124
1125 appendStringInfo(&logbuf,
1126 _("Process %d: %s"),
1127 info->pid,
1129 }
1130
1132
1133 ereport(ERROR,
1135 errmsg("deadlock detected"),
1136 errdetail_internal("%s", clientbuf.data),
1137 errdetail_log("%s", logbuf.data),
1138 errhint("See server log for query details.")));
1139}
1140
1141/*
1142 * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
1143 * detects a trivial (two-way) deadlock. proc1 wants to block for lockmode
1144 * on lock, but proc2 is already waiting and would be blocked by proc1.
1145 */
1146void
1148 LOCKMODE lockmode,
1149 LOCK *lock,
1150 PGPROC *proc2)
1151{
1152 DEADLOCK_INFO *info = &deadlockDetails[0];
1153
1154 info->locktag = lock->tag;
1155 info->lockmode = lockmode;
1156 info->pid = proc1->pid;
1157 info++;
1158 info->locktag = proc2->waitLock->tag;
1159 info->lockmode = proc2->waitLockMode;
1160 info->pid = proc2->pid;
1161 nDeadlockDetails = 2;
1162}
const char * pgstat_get_backend_current_activity(int pid, bool checkUser)
#define MemSet(start, val, len)
Definition: c.h:991
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:909
static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints, PGPROC **ordering)
Definition: deadlock.c:862
static int TestConfiguration(PGPROC *startProc)
Definition: deadlock.c:378
static int nVisitedProcs
Definition: deadlock.c:104
static WAIT_ORDER * waitOrders
Definition: deadlock.c:112
static bool FindLockCycleRecurseMember(PGPROC *checkProc, PGPROC *checkProcLeader, int depth, EDGE *softEdges, int *nSoftEdges)
Definition: deadlock.c:536
static bool FindLockCycle(PGPROC *checkProc, EDGE *softEdges, int *nSoftEdges)
Definition: deadlock.c:446
static int maxPossibleConstraints
Definition: deadlock.c:124
static bool DeadLockCheckRecurse(PGPROC *proc)
Definition: deadlock.c:312
PGPROC * GetBlockingAutoVacuumPgproc(void)
Definition: deadlock.c:290
static EDGE * possibleConstraints
Definition: deadlock.c:122
static PGPROC ** waitOrderProcs
Definition: deadlock.c:114
void RememberSimpleDeadLock(PGPROC *proc1, LOCKMODE lockmode, LOCK *lock, PGPROC *proc2)
Definition: deadlock.c:1147
static PGPROC ** visitedProcs
Definition: deadlock.c:103
static bool ExpandConstraints(EDGE *constraints, int nConstraints)
Definition: deadlock.c:790
static int * beforeConstraints
Definition: deadlock.c:108
static int nWaitOrders
Definition: deadlock.c:113
static int nDeadlockDetails
Definition: deadlock.c:126
void DeadLockReport(void)
Definition: deadlock.c:1075
static int * afterConstraints
Definition: deadlock.c:109
static DEADLOCK_INFO * deadlockDetails
Definition: deadlock.c:125
static int maxCurConstraints
Definition: deadlock.c:119
void InitDeadLockChecking(void)
Definition: deadlock.c:144
static int nCurConstraints
Definition: deadlock.c:118
DeadLockState DeadLockCheck(PGPROC *proc)
Definition: deadlock.c:220
static PGPROC * blocking_autovacuum_proc
Definition: deadlock.c:129
static EDGE * curConstraints
Definition: deadlock.c:117
static int nPossibleConstraints
Definition: deadlock.c:123
static bool FindLockCycleRecurse(PGPROC *checkProc, int depth, EDGE *softEdges, int *nSoftEdges)
Definition: deadlock.c:457
static PGPROC ** topoProcs
Definition: deadlock.c:107
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1231
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
int errdetail_log(const char *fmt,...)
Definition: elog.c:1252
#define _(x)
Definition: elog.c:91
#define FATAL
Definition: elog.h:41
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
int MaxBackends
Definition: globals.c:147
Assert(PointerIsAligned(start, uint64))
#define dlist_foreach(iter, lhead)
Definition: ilist.h:623
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition: ilist.h:709
static uint32 dclist_count(const dclist_head *head)
Definition: ilist.h:932
static void dclist_init(dclist_head *head)
Definition: ilist.h:671
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593
#define dclist_foreach(iter, lhead)
Definition: ilist.h:970
int j
Definition: isn.c:78
int i
Definition: isn.c:77
void DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
Definition: lmgr.c:1243
const char * GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode)
Definition: lock.c:4220
LockMethod GetLocksMethodTable(const LOCK *lock)
Definition: lock.c:523
#define LOCK_LOCKTAG(lock)
Definition: lock.h:326
@ LOCKTAG_RELATION_EXTEND
Definition: lock.h:139
DeadLockState
Definition: lock.h:510
@ DS_HARD_DEADLOCK
Definition: lock.h:514
@ DS_BLOCKED_BY_AUTOVACUUM
Definition: lock.h:515
@ DS_NO_DEADLOCK
Definition: lock.h:512
@ DS_SOFT_DEADLOCK
Definition: lock.h:513
#define LOCKBIT_ON(lockmode)
Definition: lock.h:85
int LOCKMODE
Definition: lockdefs.h:26
MemoryContext TopMemoryContext
Definition: mcxt.c:165
void * palloc(Size size)
Definition: mcxt.c:1943
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define ERRCODE_T_R_DEADLOCK_DETECTED
Definition: pgbench.c:78
void pgstat_report_deadlock(void)
#define printf(...)
Definition: port.h:245
char * c
#define PROC_IS_AUTOVACUUM
Definition: proc.h:57
#define MAX_BACKENDS_BITS
Definition: procnumber.h:38
PGPROC * MyProc
Definition: proc.c:67
void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
Definition: proc.c:1740
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:126
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
LOCKTAG locktag
Definition: deadlock.c:74
LOCKMODE lockmode
Definition: deadlock.c:75
Definition: deadlock.c:48
int pred
Definition: deadlock.c:52
int link
Definition: deadlock.c:53
PGPROC * blocker
Definition: deadlock.c:50
LOCK * lock
Definition: deadlock.c:51
PGPROC * waiter
Definition: deadlock.c:49
Definition: lock.h:166
uint8 locktag_lockmethodid
Definition: lock.h:172
Definition: lock.h:310
LOCKTAG tag
Definition: lock.h:312
dclist_head waitProcs
Definition: lock.h:318
dlist_head procLocks
Definition: lock.h:317
const LOCKMASK * conflictTab
Definition: lock.h:112
int numLockModes
Definition: lock.h:111
Definition: proc.h:163
dlist_head lockGroupMembers
Definition: proc.h:306
uint8 statusFlags
Definition: proc.h:243
int pid
Definition: proc.h:183
LOCK * waitLock
Definition: proc.h:233
LOCKMODE waitLockMode
Definition: proc.h:235
PGPROC * lockGroupLeader
Definition: proc.h:305
dlist_node links
Definition: proc.h:164
PGPROC * myProc
Definition: lock.h:367
Definition: lock.h:371
LOCKMASK holdMask
Definition: lock.h:377
PROCLOCKTAG tag
Definition: lock.h:373
PGPROC ** procs
Definition: deadlock.c:60
LOCK * lock
Definition: deadlock.c:59
int nProcs
Definition: deadlock.c:61
dlist_node * cur
Definition: ilist.h:179
dlist_node * next
Definition: ilist.h:140
static struct link * links
Definition: zic.c:299