Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf639b4c

Browse files
committed
referee version 2
1 parent426b00d commitf639b4c

File tree

8 files changed

+218
-27
lines changed

8 files changed

+218
-27
lines changed

‎multimaster--1.0.sql

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,27 @@ $$
151151
LANGUAGE plpgsql;
152152

153153
-- select mtm.alter_sequences();
154+
155+
-- referee stuff
156+
CREATETABLEIF NOT EXISTSmtm.referee_decision(keytextprimary keynot null, node_idint);
157+
158+
CREATE OR REPLACEFUNCTIONmtm.referee_get_winner(applicant_idint) RETURNSintAS
159+
$$
160+
DECLARE
161+
winner_idint;
162+
BEGIN
163+
insert intomtm.referee_decisionvalues ('winner', applicant_id);
164+
select node_id into winner_idfrommtm.referee_decisionwhere key='winner';
165+
return winner_id;
166+
EXCEPTION WHEN others THEN
167+
select node_id into winner_idfrommtm.referee_decisionwhere key='winner';
168+
return winner_id;
169+
END
170+
$$
171+
LANGUAGE plpgsql;
172+
173+
CREATE OR REPLACEFUNCTIONmtm.referee_clean() RETURNS voidAS
174+
$$
175+
deletefrommtm.referee_decisionwhere key='winner';
176+
$$
177+
LANGUAGE sql;

‎multimaster.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ bool MtmUseRDMA;
254254
boolMtmPreserveCommitOrder;
255255
boolMtmVolksWagenMode;/* Pretend to be normal postgres. This means skip some NOTICE's and use local sequences */
256256
boolMtmMajorNode;
257+
char*MtmRefereeConnStr;
257258

258259
staticchar*MtmConnStrs;
259260
staticchar*MtmRemoteFunctionsList;
@@ -2372,6 +2373,7 @@ static void MtmInitialize()
23722373
Mtm->nAllNodes=MtmNodes;
23732374
Mtm->disabledNodeMask= (((nodemask_t)1 <<MtmNodes)-1);
23742375
Mtm->clique=0;
2376+
Mtm->refereeGrant= false;
23752377
Mtm->stalledNodeMask=0;
23762378
Mtm->stoppedNodeMask=0;
23772379
Mtm->deadNodeMask=0;
@@ -2618,8 +2620,7 @@ static void MtmSplitConnStrs(void)
26182620
}
26192621
pfree(copy);
26202622
}
2621-
if (!MtmReferee)
2622-
{
2623+
26232624
if (MtmNodeId==INT_MAX) {
26242625
if (gethostname(buf,sizeofbuf)!=0) {
26252626
MTM_ELOG(ERROR,"Failed to get host name: %m");
@@ -2679,7 +2680,6 @@ static void MtmSplitConnStrs(void)
26792680
len=end-dbName;
26802681
MtmDatabaseName=pnstrdup(dbName,len);
26812682
}
2682-
}
26832683
MemoryContextSwitchTo(old_context);
26842684
}
26852685

@@ -2976,6 +2976,19 @@ _PG_init(void)
29762976
NULL
29772977
);
29782978

2979+
DefineCustomStringVariable(
2980+
"multimaster.referee_connstring",
2981+
"Referee connection string",
2982+
NULL,
2983+
&MtmRefereeConnStr,
2984+
"",
2985+
PGC_POSTMASTER,
2986+
0,
2987+
NULL,
2988+
NULL,
2989+
NULL
2990+
);
2991+
29792992
DefineCustomBoolVariable(
29802993
"multimaster.use_rdma",
29812994
"Use RDMA sockets",
@@ -3177,8 +3190,6 @@ _PG_init(void)
31773190

31783191
if (MtmReferee)
31793192
{
3180-
MtmSplitConnStrs();
3181-
MtmRefereeInitialize();
31823193
return;
31833194
}
31843195

‎multimaster.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ typedef struct
287287
TransactionIdoldestXid;/* XID of oldest transaction visible by any active transaction (local or global) */
288288
nodemask_tdisabledNodeMask;/* Bitmask of disabled nodes */
289289
nodemask_tclique;/* Bitmask of nodes that are connected and we allowed to connect/send wal/receive wal with them */
290+
boolrefereeGrant;/* Referee allowed us to work with half of the nodes */
290291
nodemask_tdeadNodeMask;/* Bitmask of nodes considered as dead by referee */
291292
nodemask_trecoveredNodeMask;/* Bitmask of nodes recoverd after been reported as dead by referee */
292293
nodemask_tstalledNodeMask;/* Bitmask of stalled nodes (node with dropped replication slot which makes it not possible automatic recovery of such node) */
@@ -379,6 +380,7 @@ extern timestamp_t MtmRefreshClusterStatusSchedule;
379380
externMtmConnectionInfo*MtmConnections;
380381
externboolMtmMajorNode;
381382
externboolMtmBackgroundWorker;
383+
externchar*MtmRefereeConnStr;
382384

383385

384386
externvoidMtmArbiterInitialize(void);

‎state.c

Lines changed: 108 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ char const* const MtmEventMnem[] =
2525
"MTM_NONRECOVERABLE_ERROR"
2626
};
2727

28+
staticintMtmGetRefereeWinner(void);
2829

2930
// XXXX: allocate in context and clean it
3031
staticchar*
@@ -91,12 +92,14 @@ MtmCheckState(void)
9192
maskToString(Mtm->pglogicalReceiverMask,Mtm->nAllNodes),
9293
maskToString(Mtm->pglogicalSenderMask,Mtm->nAllNodes),
9394
Mtm->nAllNodes,
94-
MtmMajorNode);
95+
(MtmMajorNode||Mtm->refereeGrant));
9596

9697
isEnabledState=
97-
( (nConnected >=Mtm->nAllNodes/2+1)/* majority */
98-
|| (nConnected==Mtm->nAllNodes/2&&MtmMajorNode) )/* or half + major node */
99-
&&BIT_CHECK(Mtm->clique,MtmNodeId-1);/* in clique */
98+
( (nConnected >=Mtm->nAllNodes/2+1)/* majority */
99+
// XXXX: should we restrict major with two nodes setup?
100+
|| (nConnected==Mtm->nAllNodes/2&&MtmMajorNode)/* or half + major node */
101+
|| (nConnected==Mtm->nAllNodes/2&&Mtm->refereeGrant) )/* or half + referee */
102+
&&BIT_CHECK(Mtm->clique,MtmNodeId-1);/* in clique */
100103

101104
/* ANY -> MTM_DISABLED */
102105
if (!isEnabledState)
@@ -135,6 +138,12 @@ MtmCheckState(void)
135138
caseMTM_RECOVERED:
136139
if (nReceivers==nEnabled-1&&nSenders==nEnabled-1&&nEnabled==nConnected)
137140
{
141+
/*
142+
* It should be already cleaned by RECOVERY_CAUGHTUP, but
143+
* in major mode or with referee we can be working alone
144+
* so nobody will clean it.
145+
*/
146+
BIT_CLEAR(Mtm->originLockNodeMask,MtmNodeId-1);
138147
MtmSetClusterStatus(MTM_ONLINE);
139148
return;
140149
}
@@ -376,9 +385,35 @@ MtmRefreshClusterStatus()
376385
* Periodical check that we are still in RECOVERED state.
377386
* See comment to MTM_RECOVERED -> MTM_ONLINE transition in MtmCheckState()
378387
*/
379-
if (Mtm->status==MTM_RECOVERED)
380-
MtmCheckState();
388+
MtmCheckState();
389+
390+
/*
391+
* Check for referee decidion when pnly half of nodes are visible.
392+
*/
393+
if (MtmRefereeConnStr&&*MtmRefereeConnStr&& !Mtm->refereeGrant&&
394+
// XXXX visibility & ~clique?
395+
countZeroBits(SELF_CONNECTIVITY_MASK,Mtm->nAllNodes)==Mtm->nAllNodes/2)
396+
{
397+
intwinner_node_id=MtmGetRefereeWinner();
398+
if (winner_node_id!=-1&&
399+
// XXXX visibility & ~clique?
400+
!BIT_CHECK(SELF_CONNECTIVITY_MASK,winner_node_id-1))
401+
{
402+
MTM_LOG1("[STATE] Referee allowed to proceed with half of the nodes (winner_id = %d)",
403+
winner_node_id);
404+
Mtm->refereeGrant= true;
405+
406+
MtmLock(LW_EXCLUSIVE);
407+
MtmEnableNode(MtmNodeId);
408+
MtmCheckState();
409+
MtmUnlock();
410+
}
411+
}
381412

413+
414+
/*
415+
* Check for clique.
416+
*/
382417
MtmBuildConnectivityMatrix(matrix);
383418
newClique=MtmFindMaxClique(matrix,Mtm->nAllNodes,&cliqueSize);
384419

@@ -436,3 +471,70 @@ MtmRefreshClusterStatus()
436471
MtmCheckState();
437472
MtmUnlock();
438473
}
474+
475+
staticint
476+
MtmGetRefereeWinner(void)
477+
{
478+
intsocket_fd;
479+
PGconn*conn;
480+
PGresult*res;
481+
structtimevaltimeout= {5,0 };
482+
charsql[128];
483+
intwinner_node_id;
484+
485+
conn=PQconnectdb_safe(MtmRefereeConnStr);
486+
if (PQstatus(conn)!=CONNECTION_OK)
487+
{
488+
MTM_ELOG(WARNING,"Could not connect to referee (%s): %s",
489+
MtmRefereeConnStr,PQerrorMessage(conn));
490+
PQfinish(conn);
491+
return-1;
492+
}
493+
494+
socket_fd=PQsocket(conn);
495+
if (socket_fd<0)
496+
{
497+
MTM_ELOG(WARNING,"Referee socket is invalid");
498+
PQfinish(conn);
499+
return-1;
500+
}
501+
502+
if (setsockopt(socket_fd,SOL_SOCKET,SO_RCVTIMEO,
503+
(char*)&timeout,sizeof(timeout))<0)
504+
{
505+
MTM_ELOG(WARNING,"Could not set referee socket timeout: %s",
506+
strerror(errno));
507+
PQfinish(conn);
508+
return-1;
509+
}
510+
511+
sprintf(sql,"select mtm.referee_get_winner(%d)",MtmNodeId);
512+
res=PQexec(conn,sql);
513+
if (PQresultStatus(res)!=PGRES_TUPLES_OK||
514+
PQntuples(res)!=1||
515+
PQnfields(res)!=1)
516+
{
517+
MTM_ELOG(WARNING,"Refusing unexpected result (r=%d, n=%d, w=%d, k=%s) from referee.",
518+
PQresultStatus(res),PQntuples(res),PQnfields(res),PQgetvalue(res,0,0));
519+
PQclear(res);
520+
PQfinish(conn);
521+
return-1;
522+
}
523+
524+
winner_node_id=atoi(PQgetvalue(res,0,0));
525+
526+
if (winner_node_id<1||winner_node_id>Mtm->nAllNodes)
527+
{
528+
MTM_ELOG(WARNING,
529+
"Referee responded with node_id=%d, it's out of our node range",
530+
winner_node_id);
531+
PQclear(res);
532+
PQfinish(conn);
533+
return-1;
534+
}
535+
536+
MTM_LOG1("Got referee response, winner node_id=%d.",winner_node_id);
537+
/* Ok, we finally got it! */
538+
returnwinner_node_id;
539+
}
540+

‎tests2/docker-entrypoint.sh

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,21 +58,39 @@ if [ "$1" = 'postgres' ]; then
5858
max_wal_senders = 10
5959
shared_preload_libraries = 'multimaster'
6060
default_transaction_isolation = 'repeatable read'
61-
log_line_prefix = '%m: '
61+
log_line_prefix = '%m: '
62+
# log_statement = all
6263
6364
multimaster.workers = 4
6465
multimaster.max_workers = 16
6566
multimaster.max_nodes = 3
6667
multimaster.volkswagen_mode = 1
6768
multimaster.queue_size=52857600
6869
multimaster.ignore_tables_without_pk = 1
69-
multimaster.node_id =$NODE_ID
70-
multimaster.conn_strings = '$CONNSTRS'
71-
multimaster.major_node =$MAJOR
7270
multimaster.heartbeat_recv_timeout = 1100
7371
multimaster.heartbeat_send_timeout = 250
7472
EOF
7573

74+
if [-n"$NODE_ID" ];then
75+
echo"multimaster.node_id =$NODE_ID">>$PGDATA/postgresql.conf
76+
fi
77+
78+
if [-n"$CONNSTRS" ];then
79+
echo"multimaster.conn_strings = '$CONNSTRS'">>$PGDATA/postgresql.conf
80+
fi
81+
82+
if [-n"$MAJOR" ];then
83+
echo'multimaster.major_node = on'>>$PGDATA/postgresql.conf
84+
fi
85+
86+
if [-n"$REFEREE" ];then
87+
echo'multimaster.referee = on'>>$PGDATA/postgresql.conf
88+
fi
89+
90+
if [-n"$REFEREE_CONNSTR" ];then
91+
echo"multimaster.referee_connstring = '$REFEREE_CONNSTR'">>$PGDATA/postgresql.conf
92+
fi
93+
7694
cat$PGDATA/postgresql.conf
7795

7896
pg_ctl -D"$PGDATA" -m fast -w stop

‎tests2/lib/test_helper.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
importunittest
22
importtime
33
importdatetime
4+
importpsycopg2
45

56
TEST_WARMING_TIME=5
67
TEST_DURATION=10
@@ -50,6 +51,7 @@ def performFailure(self, failure):
5051
aggs_failure=self.client.get_aggregates()
5152

5253

54+
# time.sleep(10000)
5355
failure.stop()
5456

5557
print('Eliminate failure at ',datetime.datetime.utcnow())
@@ -59,3 +61,12 @@ def performFailure(self, failure):
5961
aggs=self.client.get_aggregates()
6062

6163
return (aggs_failure,aggs)
64+
65+
defnodeExecute(dsn,statements):
66+
con=psycopg2.connect(dsn)
67+
con.autocommit=True
68+
cur=con.cursor()
69+
forstatementinstatements:
70+
cur.execute(statement)
71+
cur.close()
72+
con.close()

‎tests2/support/two_nodes.yml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ services:
1212
POSTGRES_USER:'pg'
1313
POSTGRES_DB:'regression'
1414
NODE_ID:1
15-
MAJOR:'off'
1615
CONNSTRS:>-
1716
dbname=regression user=pg host=node1,
1817
dbname=regression user=pg host=node2
18+
REFEREE_CONNSTR:'dbname=regression user=pg host=referee'
1919
ports:
2020
-"15432:5432"
2121

@@ -29,9 +29,23 @@ services:
2929
POSTGRES_USER:'pg'
3030
POSTGRES_DB:'regression'
3131
NODE_ID:2
32-
MAJOR:'off'
3332
CONNSTRS:>-
3433
dbname=regression user=pg host=node1,
3534
dbname=regression user=pg host=node2
35+
REFEREE_CONNSTR:'dbname=regression user=pg host=referee'
3636
ports:
3737
-"15433:5432"
38+
39+
referee:
40+
container_name:referee
41+
build:../..
42+
privileged:true
43+
ulimits:
44+
core:14294967296
45+
environment:
46+
POSTGRES_USER:'pg'
47+
POSTGRES_DB:'regression'
48+
NODE_ID:1
49+
REFEREE:'on'
50+
ports:
51+
-"15435:5432"

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp