Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitdf81d91

Browse files
committed
Disconnect node only on heartbeat timeout; One disabled state instead of several
1 parentd2018b3 commitdf81d91

File tree

5 files changed

+22
-51
lines changed

5 files changed

+22
-51
lines changed

‎arbiter.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ static void MtmDisconnect(int node)
190190
MtmUnregisterSocket(sockets[node]);
191191
pg_closesocket(sockets[node],MtmUseRDMA);
192192
sockets[node]=-1;
193-
MtmOnNodeDisconnect(node+1);
194193
}
195194

196195
staticintMtmWaitSocket(intsd,boolforWrite,timestamp_ttimeoutMsec)
@@ -541,9 +540,6 @@ static void MtmOpenConnections()
541540
for (i=0;i<nNodes;i++) {
542541
if (i+1!=MtmNodeId&&i<Mtm->nAllNodes) {
543542
sockets[i]=MtmConnectSocket(i,Mtm->nodes[i].con.arbiterPort);
544-
if (sockets[i]<0) {
545-
MtmOnNodeDisconnect(i+1);
546-
}
547543
}
548544
}
549545
MtmStateProcessEvent(MTM_ARBITER_RECEIVER_START);
@@ -579,7 +575,6 @@ static bool MtmSendToNode(int node, void const* buf, int size)
579575
}
580576
sockets[node]=MtmConnectSocket(node,Mtm->nodes[node].con.arbiterPort);
581577
if (sockets[node]<0) {
582-
MtmOnNodeDisconnect(node+1);
583578
result= false;
584579
break;
585580
}

‎multimaster.c

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -218,15 +218,10 @@ static TransactionManager MtmTM =
218218

219219
charconst*constMtmNodeStatusMnem[]=
220220
{
221-
"Initialization",
222-
"Offline",
223-
"Connecting",
224-
"Online",
221+
"Disabled",
225222
"Recovery",
226223
"Recovered",
227-
"InMinor",
228-
"OutOfClique",
229-
"OutOfService"
224+
"Online"
230225
};
231226

232227
charconst*constMtmTxnStatusMnem[]=
@@ -1961,7 +1956,7 @@ void MtmPollStatusOfPreparedTransactionsForDisabledNode(int disabledNodeId)
19611956
MtmBroadcastPollMessage(ts);
19621957
}
19631958
}else {
1964-
MTM_LOG1("Skip transaction %s (%llu) with status %s gtid.node=%d gtid.xid=%llu votedMask=%llx",
1959+
MTM_LOG2("Skip transaction %s (%llu) with status %s gtid.node=%d gtid.xid=%llu votedMask=%llx",
19651960
ts->gid, (long64)ts->xid,MtmTxnStatusMnem[ts->status],ts->gtid.node, (long64)ts->gtid.xid,ts->votedMask);
19661961
}
19671962
}
@@ -2367,7 +2362,7 @@ static void MtmInitialize()
23672362
if (!found)
23682363
{
23692364
MemSet(Mtm,0,sizeof(MtmState)+sizeof(MtmNodeInfo)*(MtmMaxNodes-1));
2370-
Mtm->status=MTM_IN_MINORITY;//MTM_INITIALIZATION;
2365+
Mtm->status=MTM_DISABLED;//MTM_INITIALIZATION;
23712366
Mtm->recoverySlot=0;
23722367
Mtm->locks=GetNamedLWLockTranche(MULTIMASTER_NAME);
23732368
Mtm->csn=MtmGetCurrentTime();
@@ -2774,7 +2769,7 @@ _PG_init(void)
27742769
"Timeout in milliseconds of sending heartbeat messages",
27752770
"Period of broadcasting heartbeat messages by arbiter to all nodes",
27762771
&MtmHeartbeatSendTimeout,
2777-
1000,
2772+
200,
27782773
1,
27792774
INT_MAX,
27802775
PGC_BACKEND,
@@ -2789,7 +2784,7 @@ _PG_init(void)
27892784
"Timeout in milliseconds of receiving heartbeat messages",
27902785
"If no heartbeat message is received from node within this period, it assumed to be dead",
27912786
&MtmHeartbeatRecvTimeout,
2792-
10000,
2787+
1000,
27932788
1,
27942789
INT_MAX,
27952790
PGC_BACKEND,

‎multimaster.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,15 +130,12 @@ typedef enum
130130

131131
typedefenum
132132
{
133-
MTM_INITIALIZATION,/* Initial status */
134-
MTM_OFFLINE,/* Node is excluded from cluster */
135-
MTM_CONNECTED,/* Arbiter is established connections with other nodes */
136-
MTM_ONLINE,/* Ready to receive client's queries */
133+
MTM_DISABLED,/* Node disabled */
137134
MTM_RECOVERY,/* Node is in recovery process */
138-
MTM_RECOVERED,/* Node is recovered by is not yet switched to ONLINE because not all sender/receivers are restarted */
139-
MTM_IN_MINORITY,/* Node is out of quorum */
140-
MTM_OUT_OF_CLIQUE,/* Node is out of cluster by clique detector */
141-
MTM_OUT_OF_SERVICE/* Node is not available totocritical, non-recoverable error */
135+
MTM_RECOVERED,/* Node is recovered by is not yet switched to ONLINE because
136+
* not all sender/receivers are restarted
137+
*/
138+
MTM_ONLINE/* Readytoreceive client's queries */
142139
}MtmNodeStatus;
143140

144141
typedefenum

‎pglogical_receiver.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ pglogical_receiver_main(Datum main_arg)
404404
if (rc&WL_POSTMASTER_DEATH)
405405
proc_exit(1);
406406

407-
if (Mtm->status==MTM_OFFLINE|| (Mtm->status==MTM_RECOVERY&&Mtm->recoverySlot!=nodeId))
407+
if (Mtm->status==MTM_DISABLED|| (Mtm->status==MTM_RECOVERY&&Mtm->recoverySlot!=nodeId))
408408
{
409409
ereport(LOG, (MTM_ERRMSG("%s: restart WAL receiver because node was switched to %s mode",worker_proc,MtmNodeStatusMnem[Mtm->status])));
410410
break;

‎state.c

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,6 @@ MtmSetClusterStatus(MtmNodeStatus status)
6060
MTM_LOG1("[STATE] Switching status from %s to %s status",
6161
MtmNodeStatusMnem[Mtm->status],MtmNodeStatusMnem[status]);
6262

63-
/* Do some actions on specific status transitions */
64-
if (status==MTM_IN_MINORITY)
65-
{
66-
Mtm->recoverySlot=0;
67-
Mtm->pglogicalReceiverMask=0;
68-
Mtm->pglogicalSenderMask=0;
69-
}
70-
7163
Mtm->status=status;
7264
}
7365

@@ -88,27 +80,21 @@ MtmCheckState(void)
8880
maskToString(Mtm->pglogicalSenderMask,Mtm->nAllNodes),
8981
Mtm->nAllNodes);
9082

91-
/* ANY -> MTM_IN_MINORITY */
92-
if (nConnected<Mtm->nAllNodes/2+1)
83+
/* ANY -> MTM_DISABLED */
84+
if (nConnected<Mtm->nAllNodes/2+1||
85+
!BIT_CHECK(Mtm->clique,MtmNodeId-1) )
9386
{
9487
BIT_SET(Mtm->disabledNodeMask,MtmNodeId-1);
95-
MtmSetClusterStatus(MTM_IN_MINORITY);
96-
return;
97-
}
98-
99-
/* ANY -> CLIQUE_DISABLE */
100-
if (!BIT_CHECK(Mtm->clique,MtmNodeId-1))
101-
{
102-
/* Should be already disabled by clique detector */
103-
// Assert(BIT_CHECK(Mtm->disabledNodeMask, MtmNodeId-1));
104-
MtmSetClusterStatus(MTM_OUT_OF_CLIQUE);
88+
Mtm->recoverySlot=0;
89+
Mtm->pglogicalReceiverMask=0;
90+
Mtm->pglogicalSenderMask=0;
91+
MtmSetClusterStatus(MTM_DISABLED);
10592
return;
10693
}
10794

10895
switch (Mtm->status)
10996
{
110-
caseMTM_IN_MINORITY:
111-
caseMTM_OUT_OF_CLIQUE:
97+
caseMTM_DISABLED:
11298
if ( (nConnected >=Mtm->nAllNodes/2+1)&&/* majority */
11399
BIT_CHECK(Mtm->clique,MtmNodeId-1) )/* in clique */
114100
{
@@ -173,7 +159,7 @@ MtmStateProcessNeighborEvent(int node_id, MtmNeighborEvent ev)
173159

174160
caseMTM_NEIGHBOR_WAL_SENDER_START_RECOVERED:
175161
BIT_SET(Mtm->pglogicalSenderMask,node_id-1);
176-
MtmEnableNode(node_id);
162+
MtmEnableNode(node_id);/// XXXX ?
177163
break;
178164

179165
caseMTM_NEIGHBOR_RECOVERY_CAUGHTUP:
@@ -293,14 +279,12 @@ void MtmOnNodeDisconnect(int nodeId)
293279
if (BIT_CHECK(SELF_CONNECTIVITY_MASK,nodeId-1))
294280
return;
295281

296-
MtmDisableNode(nodeId);
297-
298282
MTM_LOG1("[STATE] Node %i: disconnected",nodeId);
299283

300284
MtmLock(LW_EXCLUSIVE);
301285
BIT_SET(SELF_CONNECTIVITY_MASK,nodeId-1);
302286
BIT_SET(Mtm->reconnectMask,nodeId-1);
303-
287+
// MtmDisableNode(nodeId);
304288
MtmCheckState();
305289
MtmUnlock();
306290

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp