@@ -318,10 +318,17 @@ static void MtmCheckResponse(MtmArbiterMessage* resp)
318
318
&& Mtm -> status != MTM_RECOVERY
319
319
&& Mtm -> nodes [MtmNodeId - 1 ].lastStatusChangeTime + MSEC_TO_USEC (MtmNodeDisableDelay )< MtmGetSystemTime ())
320
320
{
321
- elog (WARNING ,"Node %d thinks that Iwas dead, while I am %s (message %s)" ,resp -> node ,MtmNodeStatusMnem [Mtm -> status ],messageKindText [resp -> code ]);
321
+ elog (WARNING ,"Node %d thinks that Iam dead, while I am %s (message %s)" ,resp -> node ,MtmNodeStatusMnem [Mtm -> status ],messageKindText [resp -> code ]);
322
322
BIT_SET (Mtm -> disabledNodeMask ,MtmNodeId - 1 );
323
323
MtmSwitchClusterMode (MTM_RECOVERY );
324
- }
324
+ }else if (BIT_CHECK (Mtm -> disabledNodeMask ,resp -> node - 1 )&& sockets [resp -> node - 1 ]< 0 ) {
325
+ /* We receive heartbeat from dsiable node with
326
+ * Looks like it is restarted.
327
+ * Try to reconnect to it.
328
+ */
329
+ elog (WARNING ,"Receive heartbeat from disabled node %d" ,resp -> node );
330
+ BIT_SET (Mtm -> reconnectMask ,resp -> node - 1 );
331
+ }
325
332
}
326
333
327
334
static void MtmScheduleHeartbeat ()
@@ -355,7 +362,8 @@ static void MtmSendHeartbeat()
355
362
if (i + 1 != MtmNodeId ) {
356
363
if (!BIT_CHECK (busy_mask ,i )
357
364
&& (Mtm -> status != MTM_ONLINE
358
- || (sockets [i ] >=0 && !BIT_CHECK (Mtm -> disabledNodeMask ,i ))
365
+ || sockets [i ] >=0
366
+ || !BIT_CHECK (Mtm -> disabledNodeMask ,i )
359
367
|| BIT_CHECK (Mtm -> reconnectMask ,i )))
360
368
{
361
369
if (!MtmSendToNode (i ,& msg ,sizeof (msg ))) {
@@ -885,6 +893,8 @@ static void MtmReceiver(Datum arg)
885
893
Mtm -> nodes [node - 1 ].connectivityMask = msg -> connectivityMask ;
886
894
Mtm -> nodes [node - 1 ].lastHeartbeat = MtmGetSystemTime ();
887
895
896
+ MtmCheckResponse (msg );
897
+
888
898
switch (msg -> code ) {
889
899
case MSG_HEARTBEAT :
890
900
MTM_LOG2 ("Receive HEARTBEAT from node %d with timestamp %ld delay %ld" ,
@@ -964,7 +974,6 @@ static void MtmReceiver(Datum arg)
964
974
messageKindText [msg -> code ],ts -> xid ,ts -> gid ,node );
965
975
continue ;
966
976
}
967
- MtmCheckResponse (msg );
968
977
BIT_SET (ts -> votedMask ,node - 1 );
969
978
970
979
if (MtmIsCoordinator (ts )) {