88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.173 2006/03/05 15:58:39 momjian Exp $
11+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.174 2006/04/14 03:38:55 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -267,7 +267,8 @@ InitProcess(void)
267267
268268/*
269269 * We might be reusing a semaphore that belonged to a failed process. So
270- * be careful and reinitialize its value here.
270+ * be careful and reinitialize its value here. (This is not strictly
271+ * necessary anymore, but seems like a good idea for cleanliness.)
271272 */
272273PGSemaphoreReset (& MyProc -> sem );
273274
@@ -397,7 +398,8 @@ InitDummyProcess(void)
397398
398399/*
399400 * We might be reusing a semaphore that belonged to a failed process. So
400- * be careful and reinitialize its value here.
401+ * be careful and reinitialize its value here. (This is not strictly
402+ * necessary anymore, but seems like a good idea for cleanliness.)
401403 */
402404PGSemaphoreReset (& MyProc -> sem );
403405
@@ -465,7 +467,6 @@ LockWaitCancel(void)
465467if (MyProc -> links .next != INVALID_OFFSET )
466468{
467469/* We could not have been granted the lock yet */
468- Assert (MyProc -> waitStatus == STATUS_ERROR );
469470RemoveFromWaitQueue (MyProc ,lockAwaited -> partition );
470471}
471472else
@@ -485,15 +486,14 @@ LockWaitCancel(void)
485486LWLockRelease (partitionLock );
486487
487488/*
488- *Reset the proc wait semaphore tozero. This is necessary in the
489- *scenario where someone else granted us the lock we wanted before we
490- *were able to remove ourselves from the wait-list. The semaphore will
491- *have been bumped to 1 by the would-be grantor, and since we are no
492- *longer going to wait on the sema, we have to force it back to zero.
493- *Otherwise, our next attempt to wait for a lock will fall through
494- *prematurely .
489+ *We used to do PGSemaphoreReset() here toensure that our proc's wait
490+ *semaphore is reset to zero. This prevented a leftover wakeup signal
491+ *from remaining in the semaphore if someone else had granted us the
492+ *lock we wanted before we were able to remove ourselves from the
493+ * wait-list. However, now that ProcSleep loops until waitStatus changes,
494+ *a leftover wakeup signal isn't harmful, and it seems not worth
495+ *expending cycles to get rid of a signal that most likely isn't there .
495496 */
496- PGSemaphoreReset (& MyProc -> sem );
497497
498498/*
499499 * Return true even if we were kicked off the lock before we were able to
@@ -767,7 +767,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
767767MyProc -> waitProcLock = proclock ;
768768MyProc -> waitLockMode = lockmode ;
769769
770- MyProc -> waitStatus = STATUS_ERROR ; /* initialize result for error */
770+ MyProc -> waitStatus = STATUS_WAITING ;
771771
772772/*
773773 * If we detected deadlock, give up without waiting. This must agree with
@@ -808,9 +808,11 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
808808/*
809809 * If someone wakes us between LWLockRelease and PGSemaphoreLock,
810810 * PGSemaphoreLock will not block.The wakeup is "saved" by the semaphore
811- * implementation.Note also that if CheckDeadLock is invoked but does
812- * not detect a deadlock, PGSemaphoreLock() will continue to wait.There
813- * used to be a loop here, but it was useless code...
811+ * implementation. While this is normally good, there are cases where
812+ * a saved wakeup might be leftover from a previous operation (for
813+ * example, we aborted ProcWaitForSignal just before someone did
814+ * ProcSendSignal). So, loop to wait again if the waitStatus shows
815+ * we haven't been granted nor denied the lock yet.
814816 *
815817 * We pass interruptOK = true, which eliminates a window in which
816818 * cancel/die interrupts would be held off undesirably. This is a promise
@@ -820,7 +822,9 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
820822 * updating the locallock table, but if we lose control to an error,
821823 * LockWaitCancel will fix that up.
822824 */
823- PGSemaphoreLock (& MyProc -> sem , true);
825+ do {
826+ PGSemaphoreLock (& MyProc -> sem , true);
827+ }while (MyProc -> waitStatus == STATUS_WAITING );
824828
825829/*
826830 * Disable the timer, if it's still running
@@ -865,6 +869,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
865869 * XXX: presently, this code is only used for the "success" case, and only
866870 * works correctly for that case. To clean up in failure case, would need
867871 * to twiddle the lock's request counts too --- see RemoveFromWaitQueue.
872+ * Hence, in practice the waitStatus parameter must be STATUS_OK.
868873 */
869874PGPROC *
870875ProcWakeup (PGPROC * proc ,int waitStatus )
@@ -875,6 +880,7 @@ ProcWakeup(PGPROC *proc, int waitStatus)
875880if (proc -> links .prev == INVALID_OFFSET ||
876881proc -> links .next == INVALID_OFFSET )
877882return NULL ;
883+ Assert (proc -> waitStatus == STATUS_WAITING );
878884
879885/* Save next process before we zap the list link */
880886retProc = (PGPROC * )MAKE_PTR (proc -> links .next );
@@ -1014,16 +1020,13 @@ CheckDeadLock(void)
10141020 * efficiently by relying on lockAwaited, but use this coding to preserve
10151021 * the flexibility to kill some other transaction than the one detecting
10161022 * the deadlock.)
1023+ *
1024+ * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
1025+ * ProcSleep will report an error after we return from the signal handler.
10171026 */
10181027Assert (MyProc -> waitLock != NULL );
10191028RemoveFromWaitQueue (MyProc ,LockTagToPartition (& (MyProc -> waitLock -> tag )));
10201029
1021- /*
1022- * Set MyProc->waitStatus to STATUS_ERROR so that ProcSleep will report an
1023- * error after we return from the signal handler.
1024- */
1025- MyProc -> waitStatus = STATUS_ERROR ;
1026-
10271030/*
10281031 * Unlock my semaphore so that the interrupted ProcSleep() call can
10291032 * finish.
@@ -1058,27 +1061,17 @@ CheckDeadLock(void)
10581061 * This can share the semaphore normally used for waiting for locks,
10591062 * since a backend could never be waiting for a lock and a signal at
10601063 * the same time. As with locks, it's OK if the signal arrives just
1061- * before we actually reach the waiting state.
1064+ * before we actually reach the waiting state. Also as with locks,
1065+ * it's necessary that the caller be robust against bogus wakeups:
1066+ * always check that the desired state has occurred, and wait again
1067+ * if not. This copes with possible "leftover" wakeups.
10621068 */
10631069void
10641070ProcWaitForSignal (void )
10651071{
10661072PGSemaphoreLock (& MyProc -> sem , true);
10671073}
10681074
1069- /*
1070- * ProcCancelWaitForSignal - clean up an aborted wait for signal
1071- *
1072- * We need this in case the signal arrived after we aborted waiting,
1073- * or if it arrived but we never reached ProcWaitForSignal() at all.
1074- * Caller should call this after resetting the signal request status.
1075- */
1076- void
1077- ProcCancelWaitForSignal (void )
1078- {
1079- PGSemaphoreReset (& MyProc -> sem );
1080- }
1081-
10821075/*
10831076 * ProcSendSignal - send a signal to a backend identified by PID
10841077 */