Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb270713

Browse files
committed
Fix multiple crasher bugs in partitioned-table replication logic.
apply_handle_tuple_routing(), having detected and reported thatthe tuple it needed to update didn't exist, tried to update thattuple anyway, leading to a null-pointer dereference.logicalrep_partition_open() failed to ensure that theLogicalRepPartMapEntry it built for a partition was fullyindependent of that for the partition root, leading totrouble if the root entry was later freed or rebuilt.Meanwhile, on the publisher's side, pgoutput_change() sometimesattempted to apply execute_attr_map_tuple() to a NULL tuple.The first of these was reported by Sergey Bernikov in bug #17055;I found the other two while developing some test cases for thissadly under-tested code.Diagnosis and patch for the first issue by Amit Langote; patchesfor the others by me; new test cases by me. Back-patch to v13where this logic came in.Discussion:https://postgr.es/m/17055-9ba800ec8522668b@postgresql.org
1 parent218b101 commitb270713

File tree

5 files changed

+147
-29
lines changed

5 files changed

+147
-29
lines changed

‎src/backend/replication/logical/relation.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,9 @@ logicalrep_partmap_init(void)
589589
* logicalrep_partition_open
590590
*
591591
* Returned entry reuses most of the values of the root table's entry, save
592-
* the attribute map, which can be different for the partition.
592+
* the attribute map, which can be different for the partition. However,
593+
* we must physically copy all the data, in case the root table's entry
594+
* gets freed/rebuilt.
593595
*
594596
* Note there's no logicalrep_partition_close, because the caller closes the
595597
* the component relation.
@@ -625,7 +627,7 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root,
625627

626628
part_entry->partoid=partOid;
627629

628-
/* Remote relation isused as-is from the root entry. */
630+
/* Remote relation iscopied as-is from the root entry. */
629631
entry=&part_entry->relmapentry;
630632
entry->remoterel.remoteid=remoterel->remoteid;
631633
entry->remoterel.nspname=pstrdup(remoterel->nspname);
@@ -668,7 +670,12 @@ logicalrep_partition_open(LogicalRepRelMapEntry *root,
668670
}
669671
}
670672
else
671-
entry->attrmap=attrmap;
673+
{
674+
/* Lacking copy_attmap, do this the hard way. */
675+
entry->attrmap=make_attrmap(attrmap->maplen);
676+
memcpy(entry->attrmap->attnums,attrmap->attnums,
677+
attrmap->maplen*sizeof(AttrNumber));
678+
}
672679

673680
entry->updatable=root->updatable;
674681

‎src/backend/replication/logical/worker.c

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -898,12 +898,13 @@ apply_handle_update_internal(ResultRelInfo *relinfo,
898898
else
899899
{
900900
/*
901-
* The tuple to be updated could not be found.
901+
* The tuple to be updated could not be found. Do nothing except for
902+
* emitting a log message.
902903
*
903-
*TODO what to do here, change the log levelto LOG perhaps?
904+
*XXX should this be promotedtoereport(LOG) perhaps?
904905
*/
905906
elog(DEBUG1,
906-
"logical replication did not find rowfor update "
907+
"logical replication did not find rowto be updated "
907908
"in replication target relation \"%s\"",
908909
RelationGetRelationName(localrel));
909910
}
@@ -1001,9 +1002,14 @@ apply_handle_delete_internal(ResultRelInfo *relinfo, EState *estate,
10011002
}
10021003
else
10031004
{
1004-
/* The tuple to be deleted could not be found. */
1005+
/*
1006+
* The tuple to be deleted could not be found. Do nothing except for
1007+
* emitting a log message.
1008+
*
1009+
* XXX should this be promoted to ereport(LOG) perhaps?
1010+
*/
10051011
elog(DEBUG1,
1006-
"logical replicationcould not find rowfor delete "
1012+
"logical replicationdid not find rowto be deleted "
10071013
"in replication target relation \"%s\"",
10081014
RelationGetRelationName(localrel));
10091015
}
@@ -1145,30 +1151,31 @@ apply_handle_tuple_routing(ApplyExecutionData *edata,
11451151
found=FindReplTupleInLocalRel(estate,partrel,
11461152
&part_entry->remoterel,
11471153
remoteslot_part,&localslot);
1148-
1149-
oldctx=MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1150-
if (found)
1151-
{
1152-
/* Apply the update. */
1153-
slot_modify_cstrings(remoteslot_part,localslot,
1154-
part_entry,
1155-
newtup->values,newtup->changed);
1156-
MemoryContextSwitchTo(oldctx);
1157-
}
1158-
else
1154+
if (!found)
11591155
{
11601156
/*
1161-
* The tuple to be updated could not be found.
1157+
* The tuple to be updated could not be found. Do nothing
1158+
* except for emitting a log message.
11621159
*
1163-
* TODO what to do here, change the log level to LOG
1164-
* perhaps?
1160+
* XXX should this be promoted to ereport(LOG) perhaps?
11651161
*/
11661162
elog(DEBUG1,
1167-
"logical replication did not find rowfor update "
1168-
"in replication target relation \"%s\"",
1163+
"logical replication did not find rowto be updated "
1164+
"in replication target relation's partition \"%s\"",
11691165
RelationGetRelationName(partrel));
1166+
return;
11701167
}
11711168

1169+
/*
1170+
* Apply the update to the local tuple, putting the result in
1171+
* remoteslot_part.
1172+
*/
1173+
oldctx=MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1174+
slot_modify_cstrings(remoteslot_part,localslot,
1175+
part_entry,
1176+
newtup->values,newtup->changed);
1177+
MemoryContextSwitchTo(oldctx);
1178+
11721179
/*
11731180
* Does the updated tuple still satisfy the current
11741181
* partition's constraint?

‎src/backend/replication/pgoutput/pgoutput.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,8 +452,11 @@ pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
452452
/* Convert tuples if needed. */
453453
if (relentry->map)
454454
{
455-
oldtuple=execute_attr_map_tuple(oldtuple,relentry->map);
456-
newtuple=execute_attr_map_tuple(newtuple,relentry->map);
455+
if (oldtuple)
456+
oldtuple=execute_attr_map_tuple(oldtuple,
457+
relentry->map);
458+
newtuple=execute_attr_map_tuple(newtuple,
459+
relentry->map);
457460
}
458461
}
459462

‎src/test/subscription/t/001_rep_changes.pl

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use warnings;
44
use PostgresNode;
55
use TestLib;
6-
use Test::Moretests=>24;
6+
use Test::Moretests=>27;
77

88
# Initialize publisher node
99
my$node_publisher = get_new_node('publisher');
@@ -115,7 +115,7 @@
115115
"INSERT INTO tab_mixed VALUES (2, 'bar', 2.2)");
116116

117117
$node_publisher->safe_psql('postgres',
118-
"INSERT INTO tab_full_pk VALUES (1, 'foo')");
118+
"INSERT INTO tab_full_pk VALUES (1, 'foo'), (2, 'baz')");
119119

120120
$node_publisher->safe_psql('postgres',
121121
"INSERT INTO tab_nothing VALUES (generate_series(1,20))");
@@ -197,6 +197,38 @@
197197
local|2.2|bar|2),
198198
'update works with different column order and subscriber local values');
199199

200+
$result =$node_subscriber->safe_psql('postgres',
201+
"SELECT * FROM tab_full_pk ORDER BY a");
202+
is($result,qq(1|bar
203+
2|baz),
204+
'update works with REPLICA IDENTITY FULL and a primary key');
205+
206+
# Check that subscriber handles cases where update/delete target tuple
207+
# is missing. We have to look for the DEBUG1 log messages about that,
208+
# so temporarily bump up the log verbosity.
209+
$node_subscriber->append_conf('postgresql.conf',"log_min_messages = debug1");
210+
$node_subscriber->reload;
211+
212+
$node_subscriber->safe_psql('postgres',"DELETE FROM tab_full_pk");
213+
214+
$node_publisher->safe_psql('postgres',
215+
"UPDATE tab_full_pk SET b = 'quux' WHERE a = 1");
216+
$node_publisher->safe_psql('postgres',"DELETE FROM tab_full_pk WHERE a = 2");
217+
218+
$node_publisher->wait_for_catchup('tap_sub');
219+
220+
my$logfile = slurp_file($node_subscriber->logfile());
221+
ok($logfile =~
222+
qr/logical replication did not find row to be updated in replication target relation "tab_full_pk"/,
223+
'update target row is missing');
224+
ok($logfile =~
225+
qr/logical replication did not find row to be deleted in replication target relation "tab_full_pk"/,
226+
'delete target row is missing');
227+
228+
$node_subscriber->append_conf('postgresql.conf',
229+
"log_min_messages = warning");
230+
$node_subscriber->reload;
231+
200232
# check behavior with toasted values
201233

202234
$node_publisher->safe_psql('postgres',

‎src/test/subscription/t/013_partition.pl

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use warnings;
44
use PostgresNode;
55
use TestLib;
6-
use Test::Moretests=>56;
6+
use Test::Moretests=>62;
77

88
# setup
99

@@ -344,6 +344,46 @@ BEGIN
344344
$node_subscriber2->safe_psql('postgres',"SELECT a FROM tab1 ORDER BY 1");
345345
is($result,qq(),'truncate of tab1 replicated');
346346

347+
# Check that subscriber handles cases where update/delete target tuple
348+
# is missing. We have to look for the DEBUG1 log messages about that,
349+
# so temporarily bump up the log verbosity.
350+
$node_subscriber1->append_conf('postgresql.conf',
351+
"log_min_messages = debug1");
352+
$node_subscriber1->reload;
353+
354+
$node_publisher->safe_psql('postgres',
355+
"INSERT INTO tab1 VALUES (1, 'foo'), (4, 'bar'), (10, 'baz')");
356+
357+
$node_publisher->wait_for_catchup('sub1');
358+
$node_publisher->wait_for_catchup('sub2');
359+
360+
$node_subscriber1->safe_psql('postgres',"DELETE FROM tab1");
361+
362+
$node_publisher->safe_psql('postgres',
363+
"UPDATE tab1 SET b = 'quux' WHERE a = 4");
364+
$node_publisher->safe_psql('postgres',"DELETE FROM tab1");
365+
366+
$node_publisher->wait_for_catchup('sub1');
367+
$node_publisher->wait_for_catchup('sub2');
368+
369+
my$logfile = slurp_file($node_subscriber1->logfile());
370+
ok($logfile =~
371+
qr/logical replication did not find row to be updated in replication target relation's partition "tab1_2_2"/,
372+
'update target row is missing in tab1_2_2');
373+
ok($logfile =~
374+
qr/logical replication did not find row to be deleted in replication target relation "tab1_1"/,
375+
'delete target row is missing in tab1_1');
376+
ok($logfile =~
377+
qr/logical replication did not find row to be deleted in replication target relation "tab1_2_2"/,
378+
'delete target row is missing in tab1_2_2');
379+
ok($logfile =~
380+
qr/logical replication did not find row to be deleted in replication target relation "tab1_def"/,
381+
'delete target row is missing in tab1_def');
382+
383+
$node_subscriber1->append_conf('postgresql.conf',
384+
"log_min_messages = warning");
385+
$node_subscriber1->reload;
386+
347387
# Tests for replication using root table identity and schema
348388

349389
# publisher
@@ -647,3 +687,32 @@ BEGIN
647687
pub_tab2|3|yyy
648688
pub_tab2|5|zzz
649689
xxx_c|6|aaa),'inserts into tab2 replicated');
690+
691+
# Check that subscriber handles cases where update/delete target tuple
692+
# is missing. We have to look for the DEBUG1 log messages about that,
693+
# so temporarily bump up the log verbosity.
694+
$node_subscriber1->append_conf('postgresql.conf',
695+
"log_min_messages = debug1");
696+
$node_subscriber1->reload;
697+
698+
$node_subscriber1->safe_psql('postgres',"DELETE FROM tab2");
699+
700+
$node_publisher->safe_psql('postgres',
701+
"UPDATE tab2 SET b = 'quux' WHERE a = 5");
702+
$node_publisher->safe_psql('postgres',"DELETE FROM tab2 WHERE a = 1");
703+
704+
$node_publisher->wait_for_catchup('sub_viaroot');
705+
$node_publisher->wait_for_catchup('sub2');
706+
707+
$logfile = slurp_file($node_subscriber1->logfile());
708+
ok($logfile =~
709+
qr/logical replication did not find row to be updated in replication target relation's partition "tab2_1"/,
710+
'update target row is missing in tab2_1');
711+
ok($logfile =~
712+
qr/logical replication did not find row to be deleted in replication target relation "tab2_1"/,
713+
'delete target row is missing in tab2_1');
714+
715+
# No need for this until more tests are added.
716+
# $node_subscriber1->append_conf('postgresql.conf',
717+
# "log_min_messages = warning");
718+
# $node_subscriber1->reload;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp