1212 *
1313 *
1414 * IDENTIFICATION
15- * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.15 2002/11/01 00:40:23 tgl Exp $
15+ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.16 2003/01/16 21:01:44 tgl Exp $
1616 *
1717 *Interface:
1818 *
1919 *DeadLockCheck()
20+ *DeadLockReport()
21+ *RememberSimpleDeadLock()
2022 *InitDeadLockChecking()
2123 *
2224 *-------------------------------------------------------------------------
@@ -45,12 +47,27 @@ typedef struct
4547int nProcs ;
4648}WAIT_ORDER ;
4749
50+ /*
51+ * Information saved about each edge in a detected deadlock cycle. This
52+ * is used to print a diagnostic message upon failure.
53+ *
54+ * Note: because we want to examine this info after releasing the LockMgrLock,
55+ * we can't just store LOCK and PGPROC pointers; we must extract out all the
56+ * info we want to be able to print.
57+ */
58+ typedef struct
59+ {
60+ LOCKTAG locktag ;/* ID of awaited lock object */
61+ LOCKMODE lockmode ;/* type of lock we're waiting for */
62+ int pid ;/* PID of blocked backend */
63+ }DEADLOCK_INFO ;
64+
4865
4966static bool DeadLockCheckRecurse (PGPROC * proc );
5067static bool TestConfiguration (PGPROC * startProc );
5168static bool FindLockCycle (PGPROC * checkProc ,
5269EDGE * softEdges ,int * nSoftEdges );
53- static bool FindLockCycleRecurse (PGPROC * checkProc ,
70+ static bool FindLockCycleRecurse (PGPROC * checkProc ,int depth ,
5471EDGE * softEdges ,int * nSoftEdges );
5572static bool ExpandConstraints (EDGE * constraints ,int nConstraints );
5673static bool TopoSort (LOCK * lock ,EDGE * constraints ,int nConstraints ,
@@ -88,6 +105,8 @@ static intmaxCurConstraints;
88105static EDGE * possibleConstraints ;
89106static int nPossibleConstraints ;
90107static int maxPossibleConstraints ;
108+ static DEADLOCK_INFO * deadlockDetails ;
109+ static int nDeadlockDetails ;
91110
92111
93112/*
@@ -110,8 +129,10 @@ InitDeadLockChecking(void)
110129
111130/*
112131 * FindLockCycle needs at most MaxBackends entries in visitedProcs[]
132+ * and deadlockDetails[].
113133 */
114134visitedProcs = (PGPROC * * )palloc (MaxBackends * sizeof (PGPROC * ));
135+ deadlockDetails = (DEADLOCK_INFO * )palloc (MaxBackends * sizeof (DEADLOCK_INFO ));
115136
116137/*
117138 * TopoSort needs to consider at most MaxBackends wait-queue entries,
@@ -176,6 +197,10 @@ InitDeadLockChecking(void)
176197 * table to have a different masterLock, all locks that can block had
177198 * better use the same LWLock, else this code will not be adequately
178199 * interlocked!
200+ *
201+ * On failure, deadlock details are recorded in deadlockDetails[] for
202+ * subsequent printing by DeadLockReport(). That activity is separate
203+ * because we don't want to do it while holding the master lock.
179204 */
180205bool
181206DeadLockCheck (PGPROC * proc )
@@ -190,7 +215,19 @@ DeadLockCheck(PGPROC *proc)
190215
191216/* Search for deadlocks and possible fixes */
192217if (DeadLockCheckRecurse (proc ))
218+ {
219+ /*
220+ * Call FindLockCycle one more time, to record the correct
221+ * deadlockDetails[] for the basic state with no rearrangements.
222+ */
223+ int nSoftEdges ;
224+
225+ nWaitOrders = 0 ;
226+ if (!FindLockCycle (proc ,possibleConstraints ,& nSoftEdges ))
227+ elog (FATAL ,"DeadLockCheck: deadlock seems to have disappeared" );
228+
193229return true;/* cannot find a non-deadlocked state */
230+ }
194231
195232/* Apply any needed rearrangements of wait queues */
196233for (i = 0 ;i < nWaitOrders ;i ++ )
@@ -357,9 +394,12 @@ TestConfiguration(PGPROC *startProc)
357394 *
358395 * Scan outward from the given proc to see if there is a cycle in the
359396 * waits-for graph that includes this proc. Return TRUE if a cycle
360- * is found, else FALSE. If a cycle is found, wealso return a list of
397+ * is found, else FALSE. If a cycle is found, we return a list of
361398 * the "soft edges", if any, included in the cycle. These edges could
362- * potentially be eliminated by rearranging wait queues.
399+ * potentially be eliminated by rearranging wait queues. We also fill
400+ * deadlockDetails[] with information about the detected cycle; this info
401+ * is not used by the deadlock algorithm itself, only to print a useful
402+ * message after failing.
363403 *
364404 * Since we need to be able to check hypothetical configurations that would
365405 * exist after wait queue rearrangement, the routine pays attention to the
@@ -372,12 +412,14 @@ FindLockCycle(PGPROC *checkProc,
372412int * nSoftEdges )/* output argument */
373413{
374414nVisitedProcs = 0 ;
415+ nDeadlockDetails = 0 ;
375416* nSoftEdges = 0 ;
376- return FindLockCycleRecurse (checkProc ,softEdges ,nSoftEdges );
417+ return FindLockCycleRecurse (checkProc ,0 , softEdges ,nSoftEdges );
377418}
378419
379420static bool
380421FindLockCycleRecurse (PGPROC * checkProc ,
422+ int depth ,
381423EDGE * softEdges ,/* output argument */
382424int * nSoftEdges )/* output argument */
383425{
@@ -402,7 +444,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
402444{
403445/* If we return to starting point, we have a deadlock cycle */
404446if (i == 0 )
447+ {
448+ /*
449+ * record total length of cycle --- outer levels will now
450+ * fill deadlockDetails[]
451+ */
452+ Assert (depth <=MaxBackends );
453+ nDeadlockDetails = depth ;
454+
405455return true;
456+ }
406457
407458/*
408459 * Otherwise, we have a cycle but it does not include the
@@ -449,8 +500,18 @@ FindLockCycleRecurse(PGPROC *checkProc,
449500((1 <<lm )& conflictMask )!= 0 )
450501{
451502/* This proc hard-blocks checkProc */
452- if (FindLockCycleRecurse (proc ,softEdges ,nSoftEdges ))
503+ if (FindLockCycleRecurse (proc ,depth + 1 ,
504+ softEdges ,nSoftEdges ))
505+ {
506+ /* fill deadlockDetails[] */
507+ DEADLOCK_INFO * info = & deadlockDetails [depth ];
508+
509+ info -> locktag = lock -> tag ;
510+ info -> lockmode = checkProc -> waitLockMode ;
511+ info -> pid = checkProc -> pid ;
512+
453513return true;
514+ }
454515/* If no deadlock, we're done looking at this holder */
455516break ;
456517}
@@ -496,8 +557,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
496557if (((1 <<proc -> waitLockMode )& conflictMask )!= 0 )
497558{
498559/* This proc soft-blocks checkProc */
499- if (FindLockCycleRecurse (proc ,softEdges ,nSoftEdges ))
560+ if (FindLockCycleRecurse (proc ,depth + 1 ,
561+ softEdges ,nSoftEdges ))
500562{
563+ /* fill deadlockDetails[] */
564+ DEADLOCK_INFO * info = & deadlockDetails [depth ];
565+
566+ info -> locktag = lock -> tag ;
567+ info -> lockmode = checkProc -> waitLockMode ;
568+ info -> pid = checkProc -> pid ;
569+
501570/*
502571 * Add this edge to the list of soft edges in the
503572 * cycle
@@ -529,8 +598,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
529598if (((1 <<proc -> waitLockMode )& conflictMask )!= 0 )
530599{
531600/* This proc soft-blocks checkProc */
532- if (FindLockCycleRecurse (proc ,softEdges ,nSoftEdges ))
601+ if (FindLockCycleRecurse (proc ,depth + 1 ,
602+ softEdges ,nSoftEdges ))
533603{
604+ /* fill deadlockDetails[] */
605+ DEADLOCK_INFO * info = & deadlockDetails [depth ];
606+
607+ info -> locktag = lock -> tag ;
608+ info -> lockmode = checkProc -> waitLockMode ;
609+ info -> pid = checkProc -> pid ;
610+
534611/*
535612 * Add this edge to the list of soft edges in the
536613 * cycle
@@ -758,3 +835,67 @@ PrintLockQueue(LOCK *lock, const char *info)
758835}
759836
760837#endif
838+
839+ /*
840+ * Report details about a detected deadlock.
841+ */
842+ void
843+ DeadLockReport (void )
844+ {
845+ int i ;
846+
847+ for (i = 0 ;i < nDeadlockDetails ;i ++ )
848+ {
849+ DEADLOCK_INFO * info = & deadlockDetails [i ];
850+ int nextpid ;
851+
852+ /* The last proc waits for the first one... */
853+ if (i < nDeadlockDetails - 1 )
854+ nextpid = info [1 ].pid ;
855+ else
856+ nextpid = deadlockDetails [0 ].pid ;
857+
858+ if (info -> locktag .relId == XactLockTableId && info -> locktag .dbId == 0 )
859+ {
860+ /* Lock is for transaction ID */
861+ elog (NOTICE ,"Proc %d waits for %s on transaction %u; blocked by %d" ,
862+ info -> pid ,
863+ GetLockmodeName (info -> lockmode ),
864+ info -> locktag .objId .xid ,
865+ nextpid );
866+ }
867+ else
868+ {
869+ /* Lock is for a relation */
870+ elog (NOTICE ,"Proc %d waits for %s on relation %u database %u; blocked by %d" ,
871+ info -> pid ,
872+ GetLockmodeName (info -> lockmode ),
873+ info -> locktag .relId ,
874+ info -> locktag .dbId ,
875+ nextpid );
876+ }
877+ }
878+ }
879+
880+ /*
881+ * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
882+ * detects a trivial (two-way) deadlock. proc1 wants to block for lockmode
883+ * on lock, but proc2 is already waiting and would be blocked by proc1.
884+ */
885+ void
886+ RememberSimpleDeadLock (PGPROC * proc1 ,
887+ LOCKMODE lockmode ,
888+ LOCK * lock ,
889+ PGPROC * proc2 )
890+ {
891+ DEADLOCK_INFO * info = & deadlockDetails [0 ];
892+
893+ info -> locktag = lock -> tag ;
894+ info -> lockmode = lockmode ;
895+ info -> pid = proc1 -> pid ;
896+ info ++ ;
897+ info -> locktag = proc2 -> waitLock -> tag ;
898+ info -> lockmode = proc2 -> waitLockMode ;
899+ info -> pid = proc2 -> pid ;
900+ nDeadlockDetails = 2 ;
901+ }