Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitab55d74

Browse files
committed
Fix multiple crasher bugs in partitioned-table replication logic.
apply_handle_tuple_routing(), having detected and reported thatthe tuple it needed to update didn't exist, tried to update thattuple anyway, leading to a null-pointer dereference.logicalrep_partition_open() failed to ensure that theLogicalRepPartMapEntry it built for a partition was fullyindependent of that for the partition root, leading totrouble if the root entry was later freed or rebuilt.Meanwhile, on the publisher's side, pgoutput_change() sometimesattempted to apply execute_attr_map_tuple() to a NULL tuple.The first of these was reported by Sergey Bernikov in bug #17055;I found the other two while developing some test cases for thissadly under-tested code.Diagnosis and patch for the first issue by Amit Langote; patchesfor the others by me; new test cases by me. Back-patch to v13where this logic came in.Discussion:https://postgr.es/m/17055-9ba800ec8522668b@postgresql.org
1 parent4efcf47 commitab55d74

File tree

5 files changed

+146
-29
lines changed

5 files changed

+146
-29
lines changed

‎src/backend/replication/logical/relation.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,9 @@ logicalrep_partmap_init(void)
620620
* logicalrep_partition_open
621621
*
622622
* Returned entry reuses most of the values of the root table's entry, save
623-
* the attribute map, which can be different for the partition.
623+
* the attribute map, which can be different for the partition. However,
624+
* we must physically copy all the data, in case the root table's entry
625+
* gets freed/rebuilt.
624626
*
625627
* Note there's no logicalrep_partition_close, because the caller closes the
626628
* component relation.
@@ -656,7 +658,7 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root,
656658

657659
part_entry->partoid=partOid;
658660

659-
/* Remote relation isused as-is from the root entry. */
661+
/* Remote relation iscopied as-is from the root entry. */
660662
entry=&part_entry->relmapentry;
661663
entry->remoterel.remoteid=remoterel->remoteid;
662664
entry->remoterel.nspname=pstrdup(remoterel->nspname);
@@ -699,7 +701,12 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root,
699701
}
700702
}
701703
else
702-
entry->attrmap=attrmap;
704+
{
705+
/* Lacking copy_attmap, do this the hard way. */
706+
entry->attrmap=make_attrmap(attrmap->maplen);
707+
memcpy(entry->attrmap->attnums,attrmap->attnums,
708+
attrmap->maplen*sizeof(AttrNumber));
709+
}
703710

704711
entry->updatable=root->updatable;
705712

‎src/backend/replication/logical/worker.c

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,12 +1477,13 @@ apply_handle_update_internal(ApplyExecutionData *edata,
14771477
else
14781478
{
14791479
/*
1480-
* The tuple to be updated could not be found.
1480+
* The tuple to be updated could not be found. Do nothing except for
1481+
* emitting a log message.
14811482
*
1482-
*TODO what to do here, change the log levelto LOG perhaps?
1483+
*XXX should this be promotedtoereport(LOG) perhaps?
14831484
*/
14841485
elog(DEBUG1,
1485-
"logical replication did not find rowfor update "
1486+
"logical replication did not find rowto be updated "
14861487
"in replication target relation \"%s\"",
14871488
RelationGetRelationName(localrel));
14881489
}
@@ -1589,9 +1590,14 @@ apply_handle_delete_internal(ApplyExecutionData *edata,
15891590
}
15901591
else
15911592
{
1592-
/* The tuple to be deleted could not be found. */
1593+
/*
1594+
* The tuple to be deleted could not be found. Do nothing except for
1595+
* emitting a log message.
1596+
*
1597+
* XXX should this be promoted to ereport(LOG) perhaps?
1598+
*/
15931599
elog(DEBUG1,
1594-
"logical replication did not find rowfor delete "
1600+
"logical replication did not find rowto be deleted "
15951601
"in replication target relation \"%s\"",
15961602
RelationGetRelationName(localrel));
15971603
}
@@ -1728,30 +1734,30 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
17281734
found=FindReplTupleInLocalRel(estate,partrel,
17291735
&part_entry->remoterel,
17301736
remoteslot_part,&localslot);
1731-
1732-
oldctx=MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1733-
if (found)
1734-
{
1735-
/* Apply the update. */
1736-
slot_modify_data(remoteslot_part,localslot,
1737-
part_entry,
1738-
newtup);
1739-
MemoryContextSwitchTo(oldctx);
1740-
}
1741-
else
1737+
if (!found)
17421738
{
17431739
/*
1744-
* The tuple to be updated could not be found.
1740+
* The tuple to be updated could not be found. Do nothing
1741+
* except for emitting a log message.
17451742
*
1746-
* TODO what to do here, change the log level to LOG
1747-
* perhaps?
1743+
* XXX should this be promoted to ereport(LOG) perhaps?
17481744
*/
17491745
elog(DEBUG1,
1750-
"logical replication did not find rowfor update "
1751-
"in replication target relation \"%s\"",
1746+
"logical replication did not find rowto be updated "
1747+
"in replication target relation's partition \"%s\"",
17521748
RelationGetRelationName(partrel));
1749+
return;
17531750
}
17541751

1752+
/*
1753+
* Apply the update to the local tuple, putting the result in
1754+
* remoteslot_part.
1755+
*/
1756+
oldctx=MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1757+
slot_modify_data(remoteslot_part,localslot,part_entry,
1758+
newtup);
1759+
MemoryContextSwitchTo(oldctx);
1760+
17551761
/*
17561762
* Does the updated tuple still satisfy the current
17571763
* partition's constraint?

‎src/backend/replication/pgoutput/pgoutput.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,11 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
612612
/* Convert tuples if needed. */
613613
if (relentry->map)
614614
{
615-
oldtuple=execute_attr_map_tuple(oldtuple,relentry->map);
616-
newtuple=execute_attr_map_tuple(newtuple,relentry->map);
615+
if (oldtuple)
616+
oldtuple=execute_attr_map_tuple(oldtuple,
617+
relentry->map);
618+
newtuple=execute_attr_map_tuple(newtuple,
619+
relentry->map);
617620
}
618621
}
619622

‎src/test/subscription/t/001_rep_changes.pl

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
use warnings;
77
use PostgresNode;
88
use TestLib;
9-
use Test::Moretests=>28;
9+
use Test::Moretests=>31;
1010

1111
# Initialize publisher node
1212
my$node_publisher = get_new_node('publisher');
@@ -118,7 +118,7 @@
118118
"INSERT INTO tab_mixed VALUES (2, 'bar', 2.2)");
119119

120120
$node_publisher->safe_psql('postgres',
121-
"INSERT INTO tab_full_pk VALUES (1, 'foo')");
121+
"INSERT INTO tab_full_pk VALUES (1, 'foo'), (2, 'baz')");
122122

123123
$node_publisher->safe_psql('postgres',
124124
"INSERT INTO tab_nothing VALUES (generate_series(1,20))");
@@ -297,6 +297,38 @@
297297
local|2.2|bar|2),
298298
'update works with different column order and subscriber local values');
299299

300+
$result =$node_subscriber->safe_psql('postgres',
301+
"SELECT * FROM tab_full_pk ORDER BY a");
302+
is($result,qq(1|bar
303+
2|baz),
304+
'update works with REPLICA IDENTITY FULL and a primary key');
305+
306+
# Check that subscriber handles cases where update/delete target tuple
307+
# is missing. We have to look for the DEBUG1 log messages about that,
308+
# so temporarily bump up the log verbosity.
309+
$node_subscriber->append_conf('postgresql.conf',"log_min_messages = debug1");
310+
$node_subscriber->reload;
311+
312+
$node_subscriber->safe_psql('postgres',"DELETE FROM tab_full_pk");
313+
314+
$node_publisher->safe_psql('postgres',
315+
"UPDATE tab_full_pk SET b = 'quux' WHERE a = 1");
316+
$node_publisher->safe_psql('postgres',"DELETE FROM tab_full_pk WHERE a = 2");
317+
318+
$node_publisher->wait_for_catchup('tap_sub');
319+
320+
my$logfile = slurp_file($node_subscriber->logfile());
321+
ok($logfile =~
322+
qr/logical replication did not find row to be updated in replication target relation "tab_full_pk"/,
323+
'update target row is missing');
324+
ok($logfile =~
325+
qr/logical replication did not find row to be deleted in replication target relation "tab_full_pk"/,
326+
'delete target row is missing');
327+
328+
$node_subscriber->append_conf('postgresql.conf',
329+
"log_min_messages = warning");
330+
$node_subscriber->reload;
331+
300332
# check behavior with toasted values
301333

302334
$node_publisher->safe_psql('postgres',

‎src/test/subscription/t/013_partition.pl

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
use warnings;
77
use PostgresNode;
88
use TestLib;
9-
use Test::Moretests=>56;
9+
use Test::Moretests=>62;
1010

1111
# setup
1212

@@ -347,6 +347,46 @@ BEGIN
347347
$node_subscriber2->safe_psql('postgres',"SELECT a FROM tab1 ORDER BY 1");
348348
is($result,qq(),'truncate of tab1 replicated');
349349

350+
# Check that subscriber handles cases where update/delete target tuple
351+
# is missing. We have to look for the DEBUG1 log messages about that,
352+
# so temporarily bump up the log verbosity.
353+
$node_subscriber1->append_conf('postgresql.conf',
354+
"log_min_messages = debug1");
355+
$node_subscriber1->reload;
356+
357+
$node_publisher->safe_psql('postgres',
358+
"INSERT INTO tab1 VALUES (1, 'foo'), (4, 'bar'), (10, 'baz')");
359+
360+
$node_publisher->wait_for_catchup('sub1');
361+
$node_publisher->wait_for_catchup('sub2');
362+
363+
$node_subscriber1->safe_psql('postgres',"DELETE FROM tab1");
364+
365+
$node_publisher->safe_psql('postgres',
366+
"UPDATE tab1 SET b = 'quux' WHERE a = 4");
367+
$node_publisher->safe_psql('postgres',"DELETE FROM tab1");
368+
369+
$node_publisher->wait_for_catchup('sub1');
370+
$node_publisher->wait_for_catchup('sub2');
371+
372+
my$logfile = slurp_file($node_subscriber1->logfile());
373+
ok($logfile =~
374+
qr/logical replication did not find row to be updated in replication target relation's partition "tab1_2_2"/,
375+
'update target row is missing in tab1_2_2');
376+
ok($logfile =~
377+
qr/logical replication did not find row to be deleted in replication target relation "tab1_1"/,
378+
'delete target row is missing in tab1_1');
379+
ok($logfile =~
380+
qr/logical replication did not find row to be deleted in replication target relation "tab1_2_2"/,
381+
'delete target row is missing in tab1_2_2');
382+
ok($logfile =~
383+
qr/logical replication did not find row to be deleted in replication target relation "tab1_def"/,
384+
'delete target row is missing in tab1_def');
385+
386+
$node_subscriber1->append_conf('postgresql.conf',
387+
"log_min_messages = warning");
388+
$node_subscriber1->reload;
389+
350390
# Tests for replication using root table identity and schema
351391

352392
# publisher
@@ -650,3 +690,32 @@ BEGIN
650690
pub_tab2|3|yyy
651691
pub_tab2|5|zzz
652692
xxx_c|6|aaa),'inserts into tab2 replicated');
693+
694+
# Check that subscriber handles cases where update/delete target tuple
695+
# is missing. We have to look for the DEBUG1 log messages about that,
696+
# so temporarily bump up the log verbosity.
697+
$node_subscriber1->append_conf('postgresql.conf',
698+
"log_min_messages = debug1");
699+
$node_subscriber1->reload;
700+
701+
$node_subscriber1->safe_psql('postgres',"DELETE FROM tab2");
702+
703+
$node_publisher->safe_psql('postgres',
704+
"UPDATE tab2 SET b = 'quux' WHERE a = 5");
705+
$node_publisher->safe_psql('postgres',"DELETE FROM tab2 WHERE a = 1");
706+
707+
$node_publisher->wait_for_catchup('sub_viaroot');
708+
$node_publisher->wait_for_catchup('sub2');
709+
710+
$logfile = slurp_file($node_subscriber1->logfile());
711+
ok($logfile =~
712+
qr/logical replication did not find row to be updated in replication target relation's partition "tab2_1"/,
713+
'update target row is missing in tab2_1');
714+
ok($logfile =~
715+
qr/logical replication did not find row to be deleted in replication target relation "tab2_1"/,
716+
'delete target row is missing in tab2_1');
717+
718+
# No need for this until more tests are added.
719+
# $node_subscriber1->append_conf('postgresql.conf',
720+
# "log_min_messages = warning");
721+
# $node_subscriber1->reload;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp