Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3ec747a

Browse files
committed
referee version 2
1 parentabc8752 commit3ec747a

File tree

8 files changed

+218
-27
lines changed

8 files changed

+218
-27
lines changed

‎contrib/mmts/multimaster--1.0.sql

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,27 @@ $$
151151
LANGUAGE plpgsql;
152152

153153
-- select mtm.alter_sequences();
154+
155+
-- referee stuff
156+
CREATETABLEIF NOT EXISTSmtm.referee_decision(keytextprimary keynot null, node_idint);
157+
158+
CREATE OR REPLACEFUNCTIONmtm.referee_get_winner(applicant_idint) RETURNSintAS
159+
$$
160+
DECLARE
161+
winner_idint;
162+
BEGIN
163+
insert intomtm.referee_decisionvalues ('winner', applicant_id);
164+
select node_id into winner_idfrommtm.referee_decisionwhere key='winner';
165+
return winner_id;
166+
EXCEPTION WHEN others THEN
167+
select node_id into winner_idfrommtm.referee_decisionwhere key='winner';
168+
return winner_id;
169+
END
170+
$$
171+
LANGUAGE plpgsql;
172+
173+
CREATE OR REPLACEFUNCTIONmtm.referee_clean() RETURNS voidAS
174+
$$
175+
deletefrommtm.referee_decisionwhere key='winner';
176+
$$
177+
LANGUAGE sql;

‎contrib/mmts/multimaster.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ bool MtmUseRDMA;
254254
boolMtmPreserveCommitOrder;
255255
boolMtmVolksWagenMode;/* Pretend to be normal postgres. This means skip some NOTICE's and use local sequences */
256256
boolMtmMajorNode;
257+
char*MtmRefereeConnStr;
257258

258259
staticchar*MtmConnStrs;
259260
staticchar*MtmRemoteFunctionsList;
@@ -2376,6 +2377,7 @@ static void MtmInitialize()
23762377
Mtm->nAllNodes=MtmNodes;
23772378
Mtm->disabledNodeMask= (((nodemask_t)1 <<MtmNodes)-1);
23782379
Mtm->clique=0;
2380+
Mtm->refereeGrant= false;
23792381
Mtm->stalledNodeMask=0;
23802382
Mtm->stoppedNodeMask=0;
23812383
Mtm->deadNodeMask=0;
@@ -2622,8 +2624,7 @@ static void MtmSplitConnStrs(void)
26222624
}
26232625
pfree(copy);
26242626
}
2625-
if (!MtmReferee)
2626-
{
2627+
26272628
if (MtmNodeId==INT_MAX) {
26282629
if (gethostname(buf,sizeofbuf)!=0) {
26292630
MTM_ELOG(ERROR,"Failed to get host name: %m");
@@ -2683,7 +2684,6 @@ static void MtmSplitConnStrs(void)
26832684
len=end-dbName;
26842685
MtmDatabaseName=pnstrdup(dbName,len);
26852686
}
2686-
}
26872687
MemoryContextSwitchTo(old_context);
26882688
}
26892689

@@ -2980,6 +2980,19 @@ _PG_init(void)
29802980
NULL
29812981
);
29822982

2983+
DefineCustomStringVariable(
2984+
"multimaster.referee_connstring",
2985+
"Referee connection string",
2986+
NULL,
2987+
&MtmRefereeConnStr,
2988+
"",
2989+
PGC_POSTMASTER,
2990+
0,
2991+
NULL,
2992+
NULL,
2993+
NULL
2994+
);
2995+
29832996
DefineCustomBoolVariable(
29842997
"multimaster.use_rdma",
29852998
"Use RDMA sockets",
@@ -3181,8 +3194,6 @@ _PG_init(void)
31813194

31823195
if (MtmReferee)
31833196
{
3184-
MtmSplitConnStrs();
3185-
MtmRefereeInitialize();
31863197
return;
31873198
}
31883199

‎contrib/mmts/multimaster.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ typedef struct
287287
TransactionIdoldestXid;/* XID of oldest transaction visible by any active transaction (local or global) */
288288
nodemask_tdisabledNodeMask;/* Bitmask of disabled nodes */
289289
nodemask_tclique;/* Bitmask of nodes that are connected and we allowed to connect/send wal/receive wal with them */
290+
boolrefereeGrant;/* Referee allowed us to work with half of the nodes */
290291
nodemask_tdeadNodeMask;/* Bitmask of nodes considered as dead by referee */
291292
nodemask_trecoveredNodeMask;/* Bitmask of nodes recoverd after been reported as dead by referee */
292293
nodemask_tstalledNodeMask;/* Bitmask of stalled nodes (node with dropped replication slot which makes it not possible automatic recovery of such node) */
@@ -379,6 +380,7 @@ extern timestamp_t MtmRefreshClusterStatusSchedule;
379380
externMtmConnectionInfo*MtmConnections;
380381
externboolMtmMajorNode;
381382
externboolMtmBackgroundWorker;
383+
externchar*MtmRefereeConnStr;
382384

383385

384386
externvoidMtmArbiterInitialize(void);

‎contrib/mmts/state.c

Lines changed: 108 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ char const* const MtmEventMnem[] =
2525
"MTM_NONRECOVERABLE_ERROR"
2626
};
2727

28+
staticintMtmGetRefereeWinner(void);
2829

2930
// XXXX: allocate in context and clean it
3031
staticchar*
@@ -91,12 +92,14 @@ MtmCheckState(void)
9192
maskToString(Mtm->pglogicalReceiverMask,Mtm->nAllNodes),
9293
maskToString(Mtm->pglogicalSenderMask,Mtm->nAllNodes),
9394
Mtm->nAllNodes,
94-
MtmMajorNode);
95+
(MtmMajorNode||Mtm->refereeGrant));
9596

9697
isEnabledState=
97-
( (nConnected >=Mtm->nAllNodes/2+1)/* majority */
98-
|| (nConnected==Mtm->nAllNodes/2&&MtmMajorNode) )/* or half + major node */
99-
&&BIT_CHECK(Mtm->clique,MtmNodeId-1);/* in clique */
98+
( (nConnected >=Mtm->nAllNodes/2+1)/* majority */
99+
// XXXX: should we restrict major with two nodes setup?
100+
|| (nConnected==Mtm->nAllNodes/2&&MtmMajorNode)/* or half + major node */
101+
|| (nConnected==Mtm->nAllNodes/2&&Mtm->refereeGrant) )/* or half + referee */
102+
&&BIT_CHECK(Mtm->clique,MtmNodeId-1);/* in clique */
100103

101104
/* ANY -> MTM_DISABLED */
102105
if (!isEnabledState)
@@ -135,6 +138,12 @@ MtmCheckState(void)
135138
caseMTM_RECOVERED:
136139
if (nReceivers==nEnabled-1&&nSenders==nEnabled-1&&nEnabled==nConnected)
137140
{
141+
/*
142+
* It should be already cleaned by RECOVERY_CAUGHTUP, but
143+
* in major mode or with referee we can be working alone
144+
* so nobody will clean it.
145+
*/
146+
BIT_CLEAR(Mtm->originLockNodeMask,MtmNodeId-1);
138147
MtmSetClusterStatus(MTM_ONLINE);
139148
return;
140149
}
@@ -376,9 +385,35 @@ MtmRefreshClusterStatus()
376385
* Periodical check that we are still in RECOVERED state.
377386
* See comment to MTM_RECOVERED -> MTM_ONLINE transition in MtmCheckState()
378387
*/
379-
if (Mtm->status==MTM_RECOVERED)
380-
MtmCheckState();
388+
MtmCheckState();
389+
390+
/*
391+
* Check for referee decidion when pnly half of nodes are visible.
392+
*/
393+
if (MtmRefereeConnStr&&*MtmRefereeConnStr&& !Mtm->refereeGrant&&
394+
// XXXX visibility & ~clique?
395+
countZeroBits(SELF_CONNECTIVITY_MASK,Mtm->nAllNodes)==Mtm->nAllNodes/2)
396+
{
397+
intwinner_node_id=MtmGetRefereeWinner();
398+
if (winner_node_id!=-1&&
399+
// XXXX visibility & ~clique?
400+
!BIT_CHECK(SELF_CONNECTIVITY_MASK,winner_node_id-1))
401+
{
402+
MTM_LOG1("[STATE] Referee allowed to proceed with half of the nodes (winner_id = %d)",
403+
winner_node_id);
404+
Mtm->refereeGrant= true;
405+
406+
MtmLock(LW_EXCLUSIVE);
407+
MtmEnableNode(MtmNodeId);
408+
MtmCheckState();
409+
MtmUnlock();
410+
}
411+
}
381412

413+
414+
/*
415+
* Check for clique.
416+
*/
382417
MtmBuildConnectivityMatrix(matrix);
383418
newClique=MtmFindMaxClique(matrix,Mtm->nAllNodes,&cliqueSize);
384419

@@ -436,3 +471,70 @@ MtmRefreshClusterStatus()
436471
MtmCheckState();
437472
MtmUnlock();
438473
}
474+
475+
staticint
476+
MtmGetRefereeWinner(void)
477+
{
478+
intsocket_fd;
479+
PGconn*conn;
480+
PGresult*res;
481+
structtimevaltimeout= {5,0 };
482+
charsql[128];
483+
intwinner_node_id;
484+
485+
conn=PQconnectdb_safe(MtmRefereeConnStr);
486+
if (PQstatus(conn)!=CONNECTION_OK)
487+
{
488+
MTM_ELOG(WARNING,"Could not connect to referee (%s): %s",
489+
MtmRefereeConnStr,PQerrorMessage(conn));
490+
PQfinish(conn);
491+
return-1;
492+
}
493+
494+
socket_fd=PQsocket(conn);
495+
if (socket_fd<0)
496+
{
497+
MTM_ELOG(WARNING,"Referee socket is invalid");
498+
PQfinish(conn);
499+
return-1;
500+
}
501+
502+
if (setsockopt(socket_fd,SOL_SOCKET,SO_RCVTIMEO,
503+
(char*)&timeout,sizeof(timeout))<0)
504+
{
505+
MTM_ELOG(WARNING,"Could not set referee socket timeout: %s",
506+
strerror(errno));
507+
PQfinish(conn);
508+
return-1;
509+
}
510+
511+
sprintf(sql,"select mtm.referee_get_winner(%d)",MtmNodeId);
512+
res=PQexec(conn,sql);
513+
if (PQresultStatus(res)!=PGRES_TUPLES_OK||
514+
PQntuples(res)!=1||
515+
PQnfields(res)!=1)
516+
{
517+
MTM_ELOG(WARNING,"Refusing unexpected result (r=%d, n=%d, w=%d, k=%s) from referee.",
518+
PQresultStatus(res),PQntuples(res),PQnfields(res),PQgetvalue(res,0,0));
519+
PQclear(res);
520+
PQfinish(conn);
521+
return-1;
522+
}
523+
524+
winner_node_id=atoi(PQgetvalue(res,0,0));
525+
526+
if (winner_node_id<1||winner_node_id>Mtm->nAllNodes)
527+
{
528+
MTM_ELOG(WARNING,
529+
"Referee responded with node_id=%d, it's out of our node range",
530+
winner_node_id);
531+
PQclear(res);
532+
PQfinish(conn);
533+
return-1;
534+
}
535+
536+
MTM_LOG1("Got referee response, winner node_id=%d.",winner_node_id);
537+
/* Ok, we finally got it! */
538+
returnwinner_node_id;
539+
}
540+

‎contrib/mmts/tests2/docker-entrypoint.sh

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,21 +57,39 @@ if [ "$1" = 'postgres' ]; then
5757
max_wal_senders = 10
5858
shared_preload_libraries = 'multimaster'
5959
default_transaction_isolation = 'repeatable read'
60-
log_line_prefix = '%m: '
60+
log_line_prefix = '%m: '
61+
# log_statement = all
6162
6263
multimaster.workers = 4
6364
multimaster.max_workers = 16
6465
multimaster.max_nodes = 3
6566
multimaster.volkswagen_mode = 1
6667
multimaster.queue_size=52857600
6768
multimaster.ignore_tables_without_pk = 1
68-
multimaster.node_id =$NODE_ID
69-
multimaster.conn_strings = '$CONNSTRS'
70-
multimaster.major_node =$MAJOR
7169
multimaster.heartbeat_recv_timeout = 1100
7270
multimaster.heartbeat_send_timeout = 250
7371
EOF
7472

73+
if [-n"$NODE_ID" ];then
74+
echo"multimaster.node_id =$NODE_ID">>$PGDATA/postgresql.conf
75+
fi
76+
77+
if [-n"$CONNSTRS" ];then
78+
echo"multimaster.conn_strings = '$CONNSTRS'">>$PGDATA/postgresql.conf
79+
fi
80+
81+
if [-n"$MAJOR" ];then
82+
echo'multimaster.major_node = on'>>$PGDATA/postgresql.conf
83+
fi
84+
85+
if [-n"$REFEREE" ];then
86+
echo'multimaster.referee = on'>>$PGDATA/postgresql.conf
87+
fi
88+
89+
if [-n"$REFEREE_CONNSTR" ];then
90+
echo"multimaster.referee_connstring = '$REFEREE_CONNSTR'">>$PGDATA/postgresql.conf
91+
fi
92+
7593
cat$PGDATA/postgresql.conf
7694

7795
pg_ctl -D"$PGDATA" -m fast -w stop

‎contrib/mmts/tests2/lib/test_helper.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
importunittest
22
importtime
33
importdatetime
4+
importpsycopg2
45

56
TEST_WARMING_TIME=5
67
TEST_DURATION=10
@@ -50,6 +51,7 @@ def performFailure(self, failure):
5051
aggs_failure=self.client.get_aggregates()
5152

5253

54+
# time.sleep(10000)
5355
failure.stop()
5456

5557
print('Eliminate failure at ',datetime.datetime.utcnow())
@@ -59,3 +61,12 @@ def performFailure(self, failure):
5961
aggs=self.client.get_aggregates()
6062

6163
return (aggs_failure,aggs)
64+
65+
defnodeExecute(dsn,statements):
66+
con=psycopg2.connect(dsn)
67+
con.autocommit=True
68+
cur=con.cursor()
69+
forstatementinstatements:
70+
cur.execute(statement)
71+
cur.close()
72+
con.close()

‎contrib/mmts/tests2/support/two_nodes.yml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ services:
1212
POSTGRES_USER:'pg'
1313
POSTGRES_DB:'regression'
1414
NODE_ID:1
15-
MAJOR:'off'
1615
CONNSTRS:>-
1716
dbname=regression user=pg host=node1,
1817
dbname=regression user=pg host=node2
18+
REFEREE_CONNSTR:'dbname=regression user=pg host=referee'
1919
ports:
2020
-"15432:5432"
2121

@@ -29,9 +29,23 @@ services:
2929
POSTGRES_USER:'pg'
3030
POSTGRES_DB:'regression'
3131
NODE_ID:2
32-
MAJOR:'off'
3332
CONNSTRS:>-
3433
dbname=regression user=pg host=node1,
3534
dbname=regression user=pg host=node2
35+
REFEREE_CONNSTR:'dbname=regression user=pg host=referee'
3636
ports:
3737
-"15433:5432"
38+
39+
referee:
40+
container_name:referee
41+
build:../..
42+
privileged:true
43+
ulimits:
44+
core:14294967296
45+
environment:
46+
POSTGRES_USER:'pg'
47+
POSTGRES_DB:'regression'
48+
NODE_ID:1
49+
REFEREE:'on'
50+
ports:
51+
-"15435:5432"

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp