Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2332ab5

Browse files
knizhnikkelvich
authored andcommitted
Correctly handle connect timeouts
1 parent4ac3eec commit2332ab5

File tree

3 files changed

+29
-38
lines changed

3 files changed

+29
-38
lines changed

‎arbiter.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -366,14 +366,16 @@ static void MtmCheckHeartbeat()
366366
}
367367

368368

369-
staticintMtmConnectSocket(charconst*host,intport,intmax_attempts)
369+
staticintMtmConnectSocket(charconst*host,intport,inttimeout)
370370
{
371371
structsockaddr_insock_inet;
372372
unsignedaddrs[MAX_ROUTES];
373373
unsignedi,n_addrs=sizeof(addrs) /sizeof(addrs[0]);
374374
MtmHandshakeMessagereq;
375375
MtmArbiterMessageresp;
376376
intsd;
377+
timestamp_tstart=MtmGetSystemTime();
378+
377379

378380
sock_inet.sin_family=AF_INET;
379381
sock_inet.sin_port=htons(port);
@@ -390,7 +392,10 @@ static int MtmConnectSocket(char const* host, int port, int max_attempts)
390392
if (sd<0) {
391393
elog(ERROR,"Arbiter failed to create socket: %d",errno);
392394
}
393-
fcntl(sd,F_SETFL,O_NONBLOCK);
395+
rc=fcntl(sd,F_SETFL,O_NONBLOCK);
396+
if (rc<0) {
397+
elog(ERROR,"Arbiter failed to switch socket to non-blocking mode: %d",errno);
398+
}
394399
busy_socket=sd;
395400
for (i=0;i<n_addrs;++i) {
396401
memcpy(&sock_inet.sin_addr,&addrs[i],sizeofsock_inet.sin_addr);
@@ -405,17 +410,19 @@ static int MtmConnectSocket(char const* host, int port, int max_attempts)
405410
if (rc==0) {
406411
break;
407412
}
408-
if (errno!=EINPROGRESS||max_attempts==0) {
413+
if (errno!=EINPROGRESS||start+MSEC_TO_USEC(timeout)<MtmGetSystemTime()) {
409414
elog(WARNING,"Arbiter failed to connect to %s:%d: error=%d",host,port,errno);
410415
busy_socket=-1;
416+
close(sd);
411417
return-1;
412418
}else {
413-
rc=MtmWaitSocket(sd, true,MtmConnectTimeout);
419+
rc=MtmWaitSocket(sd, true,MtmHeartbeatSendTimeout);
414420
if (rc==1) {
415421
socklen_toptlen=sizeof(int);
416422
if (getsockopt(sd,SOL_SOCKET,SO_ERROR, (void*)&rc,&optlen)<0) {
417423
elog(WARNING,"Arbiter failed to getsockopt for %s:%d: error=%d",host,port,errno);
418424
busy_socket=-1;
425+
close(sd);
419426
return-1;
420427
}
421428
if (rc==0) {
@@ -426,8 +433,8 @@ static int MtmConnectSocket(char const* host, int port, int max_attempts)
426433
}else {
427434
elog(WARNING,"Arbiter waiting socket to %s:%d: rc=%d, error=%d",host,port,rc,errno);
428435
}
429-
max_attempts-=1;
430-
MtmSleep(MSEC_TO_USEC(MtmConnectTimeout));
436+
close(sd);
437+
MtmSleep(MSEC_TO_USEC(MtmHeartbeatSendTimeout));
431438
}
432439
}
433440
MtmSetSocketOptions(sd);
@@ -479,7 +486,7 @@ static void MtmOpenConnections()
479486
}
480487
for (i=0;i<nNodes;i++) {
481488
if (i+1!=MtmNodeId&&i<Mtm->nAllNodes) {
482-
sockets[i]=MtmConnectSocket(Mtm->nodes[i].con.hostName,MtmArbiterPort+i+1,MtmConnectAttempts);
489+
sockets[i]=MtmConnectSocket(Mtm->nodes[i].con.hostName,MtmArbiterPort+i+1,MtmConnectTimeout);
483490
if (sockets[i]<0) {
484491
MtmOnNodeDisconnect(i+1);
485492
}
@@ -511,7 +518,7 @@ static bool MtmSendToNode(int node, void const* buf, int size)
511518
close(sockets[node]);
512519
sockets[node]=-1;
513520
}
514-
sockets[node]=MtmConnectSocket(Mtm->nodes[node].con.hostName,MtmArbiterPort+node+1,MtmReconnectAttempts);
521+
sockets[node]=MtmConnectSocket(Mtm->nodes[node].con.hostName,MtmArbiterPort+node+1,MtmReconnectTimeout);
515522
if (sockets[node]<0) {
516523
MtmOnNodeDisconnect(node+1);
517524
return false;

‎multimaster.c

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,9 @@ int MtmNodes;
193193
intMtmNodeId;
194194
intMtmReplicationNodeId;
195195
intMtmArbiterPort;
196-
intMtmConnectAttempts;
197196
intMtmConnectTimeout;
197+
intMtmReconnectTimeout;
198198
intMtmRaftPollDelay;
199-
intMtmReconnectAttempts;
200199
intMtmNodeDisableDelay;
201200
intMtmTransSpillThreshold;
202201
intMtmMaxNodes;
@@ -2030,9 +2029,9 @@ _PG_init(void)
20302029
DefineCustomIntVariable(
20312030
"multimaster.connect_timeout",
20322031
"Multimaster nodes connect timeout",
2033-
"Interval in millisecondsbetweenconnectionattempts",
2032+
"Interval in millisecondsfor establishingconnectionwith cluster node",
20342033
&MtmConnectTimeout,
2035-
1000,
2034+
10000,/* 10 seconds */
20362035
1,
20372036
INT_MAX,
20382037
PGC_BACKEND,
@@ -2043,11 +2042,11 @@ _PG_init(void)
20432042
);
20442043

20452044
DefineCustomIntVariable(
2046-
"multimaster.raft_poll_delay",
2047-
"Multimasterdelay of polling cluster state from Raftable after updating local node status",
2048-
"Timeout in millisecondsbefore polling state of nodes",
2049-
&MtmRaftPollDelay,
2050-
1000,
2045+
"multimaster.reconnect_timeout",
2046+
"Multimasternodes reconnect timeout",
2047+
"Interval in millisecondsfor establishing connection with cluster node",
2048+
&MtmReconnectTimeout,
2049+
5000,/* 5 seconds */
20512050
1,
20522051
INT_MAX,
20532052
PGC_BACKEND,
@@ -2058,11 +2057,11 @@ _PG_init(void)
20582057
);
20592058

20602059
DefineCustomIntVariable(
2061-
"multimaster.connect_attempts",
2062-
"Multimasternumber ofconnect attemts",
2063-
"Maximal number of attempt to establish connection with other node after which multimaster is give up",
2064-
&MtmConnectAttempts,
2065-
10,
2060+
"multimaster.raft_poll_delay",
2061+
"Multimasterdelay ofpolling cluster state from Raftable after updating local node status",
2062+
"Timeout in milliseconds before polling state of nodes",
2063+
&MtmRaftPollDelay,
2064+
1000,
20662065
1,
20672066
INT_MAX,
20682067
PGC_BACKEND,
@@ -2072,20 +2071,6 @@ _PG_init(void)
20722071
NULL
20732072
);
20742073

2075-
DefineCustomIntVariable(
2076-
"multimaster.reconnect_attempts",
2077-
"Multimaster number of reconnect attemts",
2078-
"Maximal number of attempt to reestablish connection with other node after which node is considered to be offline",
2079-
&MtmReconnectAttempts,
2080-
10,
2081-
1,
2082-
INT_MAX,
2083-
PGC_BACKEND,
2084-
0,
2085-
NULL,
2086-
NULL,
2087-
NULL
2088-
);
20892074

20902075
MtmSplitConnStrs();
20912076
MtmStartReceivers();

‎multimaster.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,8 @@ extern int MtmReplicationNodeId;
215215
externintMtmNodes;
216216
externintMtmArbiterPort;
217217
externchar*MtmDatabaseName;
218-
externintMtmConnectAttempts;
219218
externintMtmConnectTimeout;
220-
externintMtmReconnectAttempts;
219+
externintMtmReconnectTimeout;
221220
externintMtmRaftPollDelay;
222221
externintMtmNodeDisableDelay;
223222
externintMtmTransSpillThreshold;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp