Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit53c05a9

Browse files
committed
Disconnect node only on heartbeat timeout; One disabled state instead of several
1 parent6455118 commit53c05a9

File tree

5 files changed

+22
-51
lines changed

5 files changed

+22
-51
lines changed

‎contrib/mmts/arbiter.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ static void MtmDisconnect(int node)
190190
MtmUnregisterSocket(sockets[node]);
191191
pg_closesocket(sockets[node],MtmUseRDMA);
192192
sockets[node]=-1;
193-
MtmOnNodeDisconnect(node+1);
194193
}
195194

196195
staticintMtmWaitSocket(intsd,boolforWrite,timestamp_ttimeoutMsec)
@@ -541,9 +540,6 @@ static void MtmOpenConnections()
541540
for (i=0;i<nNodes;i++) {
542541
if (i+1!=MtmNodeId&&i<Mtm->nAllNodes) {
543542
sockets[i]=MtmConnectSocket(i,Mtm->nodes[i].con.arbiterPort);
544-
if (sockets[i]<0) {
545-
MtmOnNodeDisconnect(i+1);
546-
}
547543
}
548544
}
549545
MtmStateProcessEvent(MTM_ARBITER_RECEIVER_START);
@@ -579,7 +575,6 @@ static bool MtmSendToNode(int node, void const* buf, int size)
579575
}
580576
sockets[node]=MtmConnectSocket(node,Mtm->nodes[node].con.arbiterPort);
581577
if (sockets[node]<0) {
582-
MtmOnNodeDisconnect(node+1);
583578
result= false;
584579
break;
585580
}

‎contrib/mmts/multimaster.c

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -218,15 +218,10 @@ static TransactionManager MtmTM =
218218

219219
charconst*constMtmNodeStatusMnem[]=
220220
{
221-
"Initialization",
222-
"Offline",
223-
"Connecting",
224-
"Online",
221+
"Disabled",
225222
"Recovery",
226223
"Recovered",
227-
"InMinor",
228-
"OutOfClique",
229-
"OutOfService"
224+
"Online"
230225
};
231226

232227
charconst*constMtmTxnStatusMnem[]=
@@ -1960,7 +1955,7 @@ void MtmPollStatusOfPreparedTransactionsForDisabledNode(int disabledNodeId)
19601955
MtmBroadcastPollMessage(ts);
19611956
}
19621957
}else {
1963-
MTM_LOG1("Skip transaction %s (%llu) with status %s gtid.node=%d gtid.xid=%llu votedMask=%llx",
1958+
MTM_LOG2("Skip transaction %s (%llu) with status %s gtid.node=%d gtid.xid=%llu votedMask=%llx",
19641959
ts->gid, (long64)ts->xid,MtmTxnStatusMnem[ts->status],ts->gtid.node, (long64)ts->gtid.xid,ts->votedMask);
19651960
}
19661961
}
@@ -2371,7 +2366,7 @@ static void MtmInitialize()
23712366
if (!found)
23722367
{
23732368
MemSet(Mtm,0,sizeof(MtmState)+sizeof(MtmNodeInfo)*(MtmMaxNodes-1));
2374-
Mtm->status=MTM_IN_MINORITY;//MTM_INITIALIZATION;
2369+
Mtm->status=MTM_DISABLED;//MTM_INITIALIZATION;
23752370
Mtm->recoverySlot=0;
23762371
Mtm->locks=GetNamedLWLockTranche(MULTIMASTER_NAME);
23772372
Mtm->csn=MtmGetCurrentTime();
@@ -2778,7 +2773,7 @@ _PG_init(void)
27782773
"Timeout in milliseconds of sending heartbeat messages",
27792774
"Period of broadcasting heartbeat messages by arbiter to all nodes",
27802775
&MtmHeartbeatSendTimeout,
2781-
1000,
2776+
200,
27822777
1,
27832778
INT_MAX,
27842779
PGC_BACKEND,
@@ -2793,7 +2788,7 @@ _PG_init(void)
27932788
"Timeout in milliseconds of receiving heartbeat messages",
27942789
"If no heartbeat message is received from node within this period, it assumed to be dead",
27952790
&MtmHeartbeatRecvTimeout,
2796-
10000,
2791+
1000,
27972792
1,
27982793
INT_MAX,
27992794
PGC_BACKEND,

‎contrib/mmts/multimaster.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,15 +130,12 @@ typedef enum
130130

131131
typedefenum
132132
{
133-
MTM_INITIALIZATION,/* Initial status */
134-
MTM_OFFLINE,/* Node is excluded from cluster */
135-
MTM_CONNECTED,/* Arbiter is established connections with other nodes */
136-
MTM_ONLINE,/* Ready to receive client's queries */
133+
MTM_DISABLED,/* Node disabled */
137134
MTM_RECOVERY,/* Node is in recovery process */
138-
MTM_RECOVERED,/* Node is recovered by is not yet switched to ONLINE because not all sender/receivers are restarted */
139-
MTM_IN_MINORITY,/* Node is out of quorum */
140-
MTM_OUT_OF_CLIQUE,/* Node is out of cluster by clique detector */
141-
MTM_OUT_OF_SERVICE/* Node is not available totocritical, non-recoverable error */
135+
MTM_RECOVERED,/* Node is recovered by is not yet switched to ONLINE because
136+
* not all sender/receivers are restarted
137+
*/
138+
MTM_ONLINE/* Readytoreceive client's queries */
142139
}MtmNodeStatus;
143140

144141
typedefenum

‎contrib/mmts/pglogical_receiver.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ pglogical_receiver_main(Datum main_arg)
404404
if (rc&WL_POSTMASTER_DEATH)
405405
proc_exit(1);
406406

407-
if (Mtm->status==MTM_OFFLINE|| (Mtm->status==MTM_RECOVERY&&Mtm->recoverySlot!=nodeId))
407+
if (Mtm->status==MTM_DISABLED|| (Mtm->status==MTM_RECOVERY&&Mtm->recoverySlot!=nodeId))
408408
{
409409
ereport(LOG, (MTM_ERRMSG("%s: restart WAL receiver because node was switched to %s mode",worker_proc,MtmNodeStatusMnem[Mtm->status])));
410410
break;

‎contrib/mmts/state.c

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,6 @@ MtmSetClusterStatus(MtmNodeStatus status)
6060
MTM_LOG1("[STATE] Switching status from %s to %s status",
6161
MtmNodeStatusMnem[Mtm->status],MtmNodeStatusMnem[status]);
6262

63-
/* Do some actions on specific status transitions */
64-
if (status==MTM_IN_MINORITY)
65-
{
66-
Mtm->recoverySlot=0;
67-
Mtm->pglogicalReceiverMask=0;
68-
Mtm->pglogicalSenderMask=0;
69-
}
70-
7163
Mtm->status=status;
7264
}
7365

@@ -88,27 +80,21 @@ MtmCheckState(void)
8880
maskToString(Mtm->pglogicalSenderMask,Mtm->nAllNodes),
8981
Mtm->nAllNodes);
9082

91-
/* ANY -> MTM_IN_MINORITY */
92-
if (nConnected<Mtm->nAllNodes/2+1)
83+
/* ANY -> MTM_DISABLED */
84+
if (nConnected<Mtm->nAllNodes/2+1||
85+
!BIT_CHECK(Mtm->clique,MtmNodeId-1) )
9386
{
9487
BIT_SET(Mtm->disabledNodeMask,MtmNodeId-1);
95-
MtmSetClusterStatus(MTM_IN_MINORITY);
96-
return;
97-
}
98-
99-
/* ANY -> CLIQUE_DISABLE */
100-
if (!BIT_CHECK(Mtm->clique,MtmNodeId-1))
101-
{
102-
/* Should be already disabled by clique detector */
103-
// Assert(BIT_CHECK(Mtm->disabledNodeMask, MtmNodeId-1));
104-
MtmSetClusterStatus(MTM_OUT_OF_CLIQUE);
88+
Mtm->recoverySlot=0;
89+
Mtm->pglogicalReceiverMask=0;
90+
Mtm->pglogicalSenderMask=0;
91+
MtmSetClusterStatus(MTM_DISABLED);
10592
return;
10693
}
10794

10895
switch (Mtm->status)
10996
{
110-
caseMTM_IN_MINORITY:
111-
caseMTM_OUT_OF_CLIQUE:
97+
caseMTM_DISABLED:
11298
if ( (nConnected >=Mtm->nAllNodes/2+1)&&/* majority */
11399
BIT_CHECK(Mtm->clique,MtmNodeId-1) )/* in clique */
114100
{
@@ -173,7 +159,7 @@ MtmStateProcessNeighborEvent(int node_id, MtmNeighborEvent ev)
173159

174160
caseMTM_NEIGHBOR_WAL_SENDER_START_RECOVERED:
175161
BIT_SET(Mtm->pglogicalSenderMask,node_id-1);
176-
MtmEnableNode(node_id);
162+
MtmEnableNode(node_id);/// XXXX ?
177163
break;
178164

179165
caseMTM_NEIGHBOR_RECOVERY_CAUGHTUP:
@@ -293,14 +279,12 @@ void MtmOnNodeDisconnect(int nodeId)
293279
if (BIT_CHECK(SELF_CONNECTIVITY_MASK,nodeId-1))
294280
return;
295281

296-
MtmDisableNode(nodeId);
297-
298282
MTM_LOG1("[STATE] Node %i: disconnected",nodeId);
299283

300284
MtmLock(LW_EXCLUSIVE);
301285
BIT_SET(SELF_CONNECTIVITY_MASK,nodeId-1);
302286
BIT_SET(Mtm->reconnectMask,nodeId-1);
303-
287+
// MtmDisableNode(nodeId);
304288
MtmCheckState();
305289
MtmUnlock();
306290

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp