Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit737e9a1

Browse files
committed
Correctly handle connect timeouts
1 parent2135458 commit737e9a1

File tree

3 files changed

+29
-38
lines changed

3 files changed

+29
-38
lines changed

‎contrib/mmts/arbiter.c‎

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -366,14 +366,16 @@ static void MtmCheckHeartbeat()
366366
}
367367

368368

369-
staticintMtmConnectSocket(charconst*host,intport,intmax_attempts)
369+
staticintMtmConnectSocket(charconst*host,intport,inttimeout)
370370
{
371371
structsockaddr_insock_inet;
372372
unsignedaddrs[MAX_ROUTES];
373373
unsignedi,n_addrs=sizeof(addrs) /sizeof(addrs[0]);
374374
MtmHandshakeMessagereq;
375375
MtmArbiterMessageresp;
376376
intsd;
377+
timestamp_tstart=MtmGetSystemTime();
378+
377379

378380
sock_inet.sin_family=AF_INET;
379381
sock_inet.sin_port=htons(port);
@@ -390,7 +392,10 @@ static int MtmConnectSocket(char const* host, int port, int max_attempts)
390392
if (sd<0) {
391393
elog(ERROR,"Arbiter failed to create socket: %d",errno);
392394
}
393-
fcntl(sd,F_SETFL,O_NONBLOCK);
395+
rc=fcntl(sd,F_SETFL,O_NONBLOCK);
396+
if (rc<0) {
397+
elog(ERROR,"Arbiter failed to switch socket to non-blocking mode: %d",errno);
398+
}
394399
busy_socket=sd;
395400
for (i=0;i<n_addrs;++i) {
396401
memcpy(&sock_inet.sin_addr,&addrs[i],sizeofsock_inet.sin_addr);
@@ -405,17 +410,19 @@ static int MtmConnectSocket(char const* host, int port, int max_attempts)
405410
if (rc==0) {
406411
break;
407412
}
408-
if (errno!=EINPROGRESS||max_attempts==0) {
413+
if (errno!=EINPROGRESS||start+MSEC_TO_USEC(timeout)<MtmGetSystemTime()) {
409414
elog(WARNING,"Arbiter failed to connect to %s:%d: error=%d",host,port,errno);
410415
busy_socket=-1;
416+
close(sd);
411417
return-1;
412418
}else {
413-
rc=MtmWaitSocket(sd, true,MtmConnectTimeout);
419+
rc=MtmWaitSocket(sd, true,MtmHeartbeatSendTimeout);
414420
if (rc==1) {
415421
socklen_toptlen=sizeof(int);
416422
if (getsockopt(sd,SOL_SOCKET,SO_ERROR, (void*)&rc,&optlen)<0) {
417423
elog(WARNING,"Arbiter failed to getsockopt for %s:%d: error=%d",host,port,errno);
418424
busy_socket=-1;
425+
close(sd);
419426
return-1;
420427
}
421428
if (rc==0) {
@@ -426,8 +433,8 @@ static int MtmConnectSocket(char const* host, int port, int max_attempts)
426433
}else {
427434
elog(WARNING,"Arbiter waiting socket to %s:%d: rc=%d, error=%d",host,port,rc,errno);
428435
}
429-
max_attempts-=1;
430-
MtmSleep(MSEC_TO_USEC(MtmConnectTimeout));
436+
close(sd);
437+
MtmSleep(MSEC_TO_USEC(MtmHeartbeatSendTimeout));
431438
}
432439
}
433440
MtmSetSocketOptions(sd);
@@ -479,7 +486,7 @@ static void MtmOpenConnections()
479486
}
480487
for (i=0;i<nNodes;i++) {
481488
if (i+1!=MtmNodeId&&i<Mtm->nAllNodes) {
482-
sockets[i]=MtmConnectSocket(Mtm->nodes[i].con.hostName,MtmArbiterPort+i+1,MtmConnectAttempts);
489+
sockets[i]=MtmConnectSocket(Mtm->nodes[i].con.hostName,MtmArbiterPort+i+1,MtmConnectTimeout);
483490
if (sockets[i]<0) {
484491
MtmOnNodeDisconnect(i+1);
485492
}
@@ -511,7 +518,7 @@ static bool MtmSendToNode(int node, void const* buf, int size)
511518
close(sockets[node]);
512519
sockets[node]=-1;
513520
}
514-
sockets[node]=MtmConnectSocket(Mtm->nodes[node].con.hostName,MtmArbiterPort+node+1,MtmReconnectAttempts);
521+
sockets[node]=MtmConnectSocket(Mtm->nodes[node].con.hostName,MtmArbiterPort+node+1,MtmReconnectTimeout);
515522
if (sockets[node]<0) {
516523
MtmOnNodeDisconnect(node+1);
517524
return false;

‎contrib/mmts/multimaster.c‎

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,9 @@ int MtmNodes;
194194
intMtmNodeId;
195195
intMtmReplicationNodeId;
196196
intMtmArbiterPort;
197-
intMtmConnectAttempts;
198197
intMtmConnectTimeout;
198+
intMtmReconnectTimeout;
199199
intMtmRaftPollDelay;
200-
intMtmReconnectAttempts;
201200
intMtmNodeDisableDelay;
202201
intMtmTransSpillThreshold;
203202
intMtmMaxNodes;
@@ -2031,9 +2030,9 @@ _PG_init(void)
20312030
DefineCustomIntVariable(
20322031
"multimaster.connect_timeout",
20332032
"Multimaster nodes connect timeout",
2034-
"Interval in millisecondsbetweenconnectionattempts",
2033+
"Interval in millisecondsfor establishingconnectionwith cluster node",
20352034
&MtmConnectTimeout,
2036-
1000,
2035+
10000,/* 10 seconds */
20372036
1,
20382037
INT_MAX,
20392038
PGC_BACKEND,
@@ -2044,11 +2043,11 @@ _PG_init(void)
20442043
);
20452044

20462045
DefineCustomIntVariable(
2047-
"multimaster.raft_poll_delay",
2048-
"Multimasterdelay of polling cluster state from Raftable after updating local node status",
2049-
"Timeout in millisecondsbefore polling state of nodes",
2050-
&MtmRaftPollDelay,
2051-
1000,
2046+
"multimaster.reconnect_timeout",
2047+
"Multimasternodes reconnect timeout",
2048+
"Interval in millisecondsfor establishing connection with cluster node",
2049+
&MtmReconnectTimeout,
2050+
5000,/* 5 seconds */
20522051
1,
20532052
INT_MAX,
20542053
PGC_BACKEND,
@@ -2059,11 +2058,11 @@ _PG_init(void)
20592058
);
20602059

20612060
DefineCustomIntVariable(
2062-
"multimaster.connect_attempts",
2063-
"Multimasternumber ofconnect attemts",
2064-
"Maximal number of attempt to establish connection with other node after which multimaster is give up",
2065-
&MtmConnectAttempts,
2066-
10,
2061+
"multimaster.raft_poll_delay",
2062+
"Multimasterdelay ofpolling cluster state from Raftable after updating local node status",
2063+
"Timeout in milliseconds before polling state of nodes",
2064+
&MtmRaftPollDelay,
2065+
1000,
20672066
1,
20682067
INT_MAX,
20692068
PGC_BACKEND,
@@ -2073,20 +2072,6 @@ _PG_init(void)
20732072
NULL
20742073
);
20752074

2076-
DefineCustomIntVariable(
2077-
"multimaster.reconnect_attempts",
2078-
"Multimaster number of reconnect attemts",
2079-
"Maximal number of attempt to reestablish connection with other node after which node is considered to be offline",
2080-
&MtmReconnectAttempts,
2081-
10,
2082-
1,
2083-
INT_MAX,
2084-
PGC_BACKEND,
2085-
0,
2086-
NULL,
2087-
NULL,
2088-
NULL
2089-
);
20902075

20912076
MtmSplitConnStrs();
20922077
MtmStartReceivers();

‎contrib/mmts/multimaster.h‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,8 @@ extern int MtmReplicationNodeId;
215215
externintMtmNodes;
216216
externintMtmArbiterPort;
217217
externchar*MtmDatabaseName;
218-
externintMtmConnectAttempts;
219218
externintMtmConnectTimeout;
220-
externintMtmReconnectAttempts;
219+
externintMtmReconnectTimeout;
221220
externintMtmRaftPollDelay;
222221
externintMtmNodeDisableDelay;
223222
externintMtmTransSpillThreshold;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp