Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc8ba697

Browse files
committed
Fix logic bug in gistchoose and gistRelocateBuildBuffersOnSplit.
Every time the best-tuple-found-so-far changes, we need to reset allthe penalty values in which_grow[] to the penalties for the new besttuple. The old code failed to do this, resulting in inferior indexquality.The original patch from Alexander Korotkov was just two lines; I tookthe liberty of fleshing that out by adding a bunch of comments that Ihope will make this logic easier for others to understand than it wasfor me.
1 parentd1a4db8 commitc8ba697

File tree

2 files changed

+80
-8
lines changed

2 files changed

+80
-8
lines changed

‎src/backend/access/gist/gistbuildbuffers.c

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -625,8 +625,13 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
625625
}
626626

627627
/*
628-
* Loop through all index tuples on the buffer on the splitted page,
629-
* moving them to buffers on the new pages.
628+
* Loop through all index tuples on the buffer on the page being split,
629+
* moving them to buffers on the new pages. We try to move each tuple
630+
* the page that will result in the lowest penalty for the leading column
631+
* or, in the case of a tie, the lowest penalty for the earliest column
632+
* that is not tied.
633+
*
634+
* The guts of this loop are very similar to gistchoose().
630635
*/
631636
while (gistPopItupFromNodeBuffer(gfbb,&oldBuf,&itup))
632637
{
@@ -637,14 +642,18 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
637642
IndexTuplenewtup;
638643
RelocationBufferInfo*targetBufferInfo;
639644

640-
/*
641-
* Choose which page this tuple should go to.
642-
*/
643645
gistDeCompressAtt(giststate,r,
644646
itup,NULL, (OffsetNumber)0,entry,isnull);
645647

646648
which=-1;
647649
*which_grow=-1.0f;
650+
651+
/*
652+
* Loop over possible target pages. We'll exit early if we find an index key that
653+
* can accommodate the new key with no penalty on any column. sum_grow is used to
654+
* track this condition. It doesn't need to be exactly accurate, just >0 whenever
655+
* we want the loop to continue and equal to 0 when we want it to terminate.
656+
*/
648657
sum_grow=1.0f;
649658

650659
for (i=0;i<splitPagesCount&&sum_grow;i++)
@@ -653,6 +662,8 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
653662
RelocationBufferInfo*splitPageInfo=&relocationBuffersInfos[i];
654663

655664
sum_grow=0.0f;
665+
666+
/* Loop over index attributes. */
656667
for (j=0;j<r->rd_att->natts;j++)
657668
{
658669
floatusize;
@@ -664,16 +675,36 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
664675

665676
if (which_grow[j]<0||usize<which_grow[j])
666677
{
678+
/*
679+
* We get here in two cases. First, we may have just discovered that the
680+
* current tuple is the best one we've seen so far; that is, for the first
681+
* column for which the penalty is not equal to the best tuple seen so far,
682+
* this one has a lower penalty than the previously-seen one. But, when
683+
* a new best tuple is found, we must record the best penalty value for
684+
* all the remaining columns. We'll end up here for each remaining index
685+
* column in that case, too.
686+
*/
667687
which=i;
668688
which_grow[j]=usize;
669-
if (j<r->rd_att->natts-1&&i==0)
689+
if (j<r->rd_att->natts-1)
670690
which_grow[j+1]=-1;
671691
sum_grow+=which_grow[j];
672692
}
673693
elseif (which_grow[j]==usize)
694+
{
695+
/*
696+
* The current tuple is exactly as good for this column as the best tuple
697+
* seen so far. The next iteration of this loop will compare the next
698+
* column.
699+
*/
674700
sum_grow+=usize;
701+
}
675702
else
676703
{
704+
/*
705+
* The current tuple is worse for this column than the best tuple seen so
706+
* far. Skip the remaining columns and move on to the next tuple, if any.
707+
*/
677708
sum_grow=1;
678709
break;
679710
}

‎src/backend/access/gist/gistutil.c

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,12 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
363363
}
364364

365365
/*
366-
* find entry with lowest penalty
366+
* Search a page for the entry with lowest penalty.
367+
*
368+
* The index may have multiple columns, and there's a penalty value for each column.
369+
* The penalty associated with a column which appears earlier in the index definition is
370+
* strictly more important than the penalty of column which appears later in the index
371+
* definition.
367372
*/
368373
OffsetNumber
369374
gistchoose(Relationr,Pagep,IndexTupleit,/* it has compressed entry */
@@ -389,12 +394,28 @@ gistchoose(Relation r, Page p, IndexTuple it,/* it has compressed entry */
389394
Assert(maxoff >=FirstOffsetNumber);
390395
Assert(!GistPageIsLeaf(p));
391396

397+
/*
398+
* Loop over tuples on page.
399+
*
400+
* We'll exit early if we find an index key that can accommodate the new key with no
401+
* penalty on any column. sum_grow is used to track this condition. Normally, it is the
402+
* sum of the penalties we've seen for this column so far, which is not a very useful
403+
* quantity in general because the penalties for each column are only considered
404+
* independently, but all we really care about is whether or not it's greater than zero.
405+
* Since penalties can't be negative, the sum of the penalties will be greater than
406+
* zero if and only if at least one penalty was greater than zero. To make things just
407+
* a bit more complicated, we arbitrarily set sum_grow to 1.0 whenever we want to force
408+
* the at least one more iteration of this outer loop. Any non-zero value would serve
409+
* just as well.
410+
*/
392411
for (i=FirstOffsetNumber;i <=maxoff&&sum_grow;i=OffsetNumberNext(i))
393412
{
394413
intj;
395414
IndexTupleitup= (IndexTuple)PageGetItem(p,PageGetItemId(p,i));
396415

397416
sum_grow=0;
417+
418+
/* Loop over indexed attribtues. */
398419
for (j=0;j<r->rd_att->natts;j++)
399420
{
400421
Datumdatum;
@@ -409,16 +430,36 @@ gistchoose(Relation r, Page p, IndexTuple it,/* it has compressed entry */
409430

410431
if (which_grow[j]<0||usize<which_grow[j])
411432
{
433+
/*
434+
* We get here in two cases. First, we may have just discovered that the
435+
* current tuple is the best one we've seen so far; that is, for the first
436+
* column for which the penalty is not equal to the best tuple seen so far,
437+
* this one has a lower penalty than the previously-seen one. But, when
438+
* a new best tuple is found, we must record the best penalty value for
439+
* all the remaining columns. We'll end up here for each remaining index
440+
* column in that case, too.
441+
*/
412442
which=i;
413443
which_grow[j]=usize;
414-
if (j<r->rd_att->natts-1&&i==FirstOffsetNumber)
444+
if (j<r->rd_att->natts-1)
415445
which_grow[j+1]=-1;
416446
sum_grow+=which_grow[j];
417447
}
418448
elseif (which_grow[j]==usize)
449+
{
450+
/*
451+
* The current tuple is exactly as good for this column as the best tuple
452+
* seen so far. The next iteration of this loop will compare the next
453+
* column.
454+
*/
419455
sum_grow+=usize;
456+
}
420457
else
421458
{
459+
/*
460+
* The current tuple is worse for this column than the best tuple seen so
461+
* far. Skip the remaining columns and move on to the next tuple, if any.
462+
*/
422463
sum_grow=1;
423464
break;
424465
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp