Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8149e9f

Browse files
committed
Properly detoast data in brin_form_tuple
brin_form_tuple failed to consider the values may be toasted, insertingthe toast pointer into the index. This may easily result in indexcorruption, as the toast data may be deleted and cleaned up by vacuum.The cleanup however does not care about indexes, leaving invalid toastpointers behind, which triggers errors like this: ERROR: missing chunk number 0 for toast value 16433 in pg_toast_16426A less severe consequence are inconsistent failures due to the index rowbeing too large, depending on whether brin_form_tuple operated on plainor toasted version of the row. For example CREATE TABLE t (val TEXT); INSERT INTO t VALUES ('... long value ...') CREATE INDEX idx ON t USING brin (val);would likely succeed, as the row would likely include toast pointer.Switching the order of INSERT and CREATE INDEX would likely fail: ERROR: index row size 8712 exceeds maximum 8152 for index "idx"because this happens before the row values are toasted.The bug exists since PostgreSQL 9.5 where BRIN indexes were introduced.So backpatch all the way back.Author: Tomas VondraReviewed-by: Alvaro HerreraBackpatch-through: 9.5Discussion:https://postgr.es/m/20201001184133.oq5uq75sb45pu3aw@developmentDiscussion:https://postgr.es/m/20201104010544.zexj52mlldagzowv%40development
1 parentf078110 commit8149e9f

File tree

3 files changed

+170
-1
lines changed

3 files changed

+170
-1
lines changed

‎src/backend/access/brin/brin_tuple.c

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include"access/brin_tuple.h"
3636
#include"access/tupdesc.h"
3737
#include"access/tupmacs.h"
38+
#include"access/tuptoaster.h"
3839
#include"utils/datum.h"
3940
#include"utils/memutils.h"
4041

@@ -100,6 +101,12 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
100101
Sizelen,
101102
hoff,
102103
data_len;
104+
inti;
105+
106+
#ifdefTOAST_INDEX_HACK
107+
Datum*untoasted_values;
108+
intnuntoasted=0;
109+
#endif
103110

104111
Assert(brdesc->bd_totalstored>0);
105112

@@ -108,6 +115,10 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
108115
phony_nullbitmap= (bits8*)
109116
palloc(sizeof(bits8)*BITMAPLEN(brdesc->bd_totalstored));
110117

118+
#ifdefTOAST_INDEX_HACK
119+
untoasted_values= (Datum*)palloc(sizeof(Datum)*brdesc->bd_totalstored);
120+
#endif
121+
111122
/*
112123
* Set up the values/nulls arrays for heap_fill_tuple
113124
*/
@@ -139,10 +150,83 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
139150
if (tuple->bt_columns[keyno].bv_hasnulls)
140151
anynulls= true;
141152

153+
/*
154+
* Now obtain the values of each stored datum. Note that some values
155+
* might be toasted, and we cannot rely on the original heap values
156+
* sticking around forever, so we must detoast them. Also try to
157+
* compress them.
158+
*/
142159
for (datumno=0;
143160
datumno<brdesc->bd_info[keyno]->oi_nstored;
144161
datumno++)
145-
values[idxattno++]=tuple->bt_columns[keyno].bv_values[datumno];
162+
{
163+
Datumvalue=tuple->bt_columns[keyno].bv_values[datumno];
164+
165+
#ifdefTOAST_INDEX_HACK
166+
167+
/* We must look at the stored type, not at the index descriptor. */
168+
TypeCacheEntry*atttype=brdesc->bd_info[keyno]->oi_typcache[datumno];
169+
170+
/* Do we need to free the value at the end? */
171+
boolfree_value= false;
172+
173+
/* For non-varlena types we don't need to do anything special */
174+
if (atttype->typlen!=-1)
175+
{
176+
values[idxattno++]=value;
177+
continue;
178+
}
179+
180+
/*
181+
* Do nothing if value is not of varlena type. We don't need to
182+
* care about NULL values here, thanks to bv_allnulls above.
183+
*
184+
* If value is stored EXTERNAL, must fetch it so we are not
185+
* depending on outside storage.
186+
*
187+
* XXX Is this actually true? Could it be that the summary is
188+
* NULL even for range with non-NULL data? E.g. degenerate bloom
189+
* filter may be thrown away, etc.
190+
*/
191+
if (VARATT_IS_EXTERNAL(DatumGetPointer(value)))
192+
{
193+
value=PointerGetDatum(heap_tuple_fetch_attr((structvarlena*)
194+
DatumGetPointer(value)));
195+
free_value= true;
196+
}
197+
198+
/*
199+
* If value is above size target, and is of a compressible datatype,
200+
* try to compress it in-line.
201+
*/
202+
if (!VARATT_IS_EXTENDED(DatumGetPointer(value))&&
203+
VARSIZE(DatumGetPointer(value))>TOAST_INDEX_TARGET&&
204+
(atttype->typstorage=='x'||atttype->typstorage=='m'))
205+
{
206+
Datumcvalue=toast_compress_datum(value);
207+
208+
if (DatumGetPointer(cvalue)!=NULL)
209+
{
210+
/* successful compression */
211+
if (free_value)
212+
pfree(DatumGetPointer(value));
213+
214+
value=cvalue;
215+
free_value= true;
216+
}
217+
}
218+
219+
/*
220+
* If we untoasted / compressed the value, we need to free it
221+
* after forming the index tuple.
222+
*/
223+
if (free_value)
224+
untoasted_values[nuntoasted++]=value;
225+
226+
#endif
227+
228+
values[idxattno++]=value;
229+
}
146230
}
147231

148232
/* Assert we did not overrun temp arrays */
@@ -194,6 +278,11 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
194278
pfree(nulls);
195279
pfree(phony_nullbitmap);
196280

281+
#ifdefTOAST_INDEX_HACK
282+
for (i=0;i<nuntoasted;i++)
283+
pfree(DatumGetPointer(untoasted_values[i]));
284+
#endif
285+
197286
/*
198287
* Now fill in the real null bitmasks. allnulls first.
199288
*/

‎src/test/regress/expected/brin.out

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,3 +507,44 @@ EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;
507507
Filter: (b = 1)
508508
(2 rows)
509509

510+
-- make sure data are properly de-toasted in BRIN index
511+
CREATE TABLE brintest_3 (a text, b text, c text, d text);
512+
-- long random strings (~2000 chars each, so ~6kB for min/max on two
513+
-- columns) to trigger toasting
514+
WITH rand_value AS (SELECT string_agg(md5(i::text),'') AS val FROM generate_series(1,60) s(i))
515+
INSERT INTO brintest_3
516+
SELECT val, val, val, val FROM rand_value;
517+
CREATE INDEX brin_test_toast_idx ON brintest_3 USING brin (b, c);
518+
DELETE FROM brintest_3;
519+
-- We need to wait a bit for all transactions to complete, so that the
520+
-- vacuum actually removes the TOAST rows. Creating an index concurrently
521+
-- is a one way to achieve that, because it does exactly such wait.
522+
CREATE INDEX CONCURRENTLY brin_test_temp_idx ON brintest_3(a);
523+
DROP INDEX brin_test_temp_idx;
524+
-- vacuum the table, to discard TOAST data
525+
VACUUM brintest_3;
526+
-- retry insert with a different random-looking (but deterministic) value
527+
-- the value is different, and so should replace either min or max in the
528+
-- brin summary
529+
WITH rand_value AS (SELECT string_agg(md5((-i)::text),'') AS val FROM generate_series(1,60) s(i))
530+
INSERT INTO brintest_3
531+
SELECT val, val, val, val FROM rand_value;
532+
-- now try some queries, accessing the brin index
533+
SET enable_seqscan = off;
534+
EXPLAIN (COSTS OFF)
535+
SELECT * FROM brintest_3 WHERE b < '0';
536+
QUERY PLAN
537+
------------------------------------------------
538+
Bitmap Heap Scan on brintest_3
539+
Recheck Cond: (b < '0'::text)
540+
-> Bitmap Index Scan on brin_test_toast_idx
541+
Index Cond: (b < '0'::text)
542+
(4 rows)
543+
544+
SELECT * FROM brintest_3 WHERE b < '0';
545+
a | b | c | d
546+
---+---+---+---
547+
(0 rows)
548+
549+
DROP TABLE brintest_3;
550+
RESET enable_seqscan;

‎src/test/regress/sql/brin.sql

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,3 +460,42 @@ VACUUM ANALYZE brin_test;
460460
EXPLAIN (COSTS OFF)SELECT*FROM brin_testWHERE a=1;
461461
-- Ensure brin index is not used when values are not correlated
462462
EXPLAIN (COSTS OFF)SELECT*FROM brin_testWHERE b=1;
463+
464+
-- make sure data are properly de-toasted in BRIN index
465+
CREATETABLEbrintest_3 (atext, btext, ctext, dtext);
466+
467+
-- long random strings (~2000 chars each, so ~6kB for min/max on two
468+
-- columns) to trigger toasting
469+
WITH rand_valueAS (SELECT string_agg(md5(i::text),'')AS valFROM generate_series(1,60) s(i))
470+
INSERT INTO brintest_3
471+
SELECT val, val, val, valFROM rand_value;
472+
473+
CREATEINDEXbrin_test_toast_idxON brintest_3 USING brin (b, c);
474+
DELETEFROM brintest_3;
475+
476+
-- We need to wait a bit for all transactions to complete, so that the
477+
-- vacuum actually removes the TOAST rows. Creating an index concurrently
478+
-- is a one way to achieve that, because it does exactly such wait.
479+
CREATEINDEXCONCURRENTLY brin_test_temp_idxON brintest_3(a);
480+
DROPINDEX brin_test_temp_idx;
481+
482+
-- vacuum the table, to discard TOAST data
483+
VACUUM brintest_3;
484+
485+
-- retry insert with a different random-looking (but deterministic) value
486+
-- the value is different, and so should replace either min or max in the
487+
-- brin summary
488+
WITH rand_valueAS (SELECT string_agg(md5((-i)::text),'')AS valFROM generate_series(1,60) s(i))
489+
INSERT INTO brintest_3
490+
SELECT val, val, val, valFROM rand_value;
491+
492+
-- now try some queries, accessing the brin index
493+
SET enable_seqscan= off;
494+
495+
EXPLAIN (COSTS OFF)
496+
SELECT*FROM brintest_3WHERE b<'0';
497+
498+
SELECT*FROM brintest_3WHERE b<'0';
499+
500+
DROPTABLE brintest_3;
501+
RESET enable_seqscan;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp