NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit2a6ebe7

committed

Fix contrib/seg's GiST picksplit method.

This patch replaces Guttman's generalized split method with a simplesort-by-center-points algorithm. Since the data is only one-dimensionalwe don't really need the slow and none-too-stable Guttman method.This is in part a bug fix, since seg has the same size_alpha versussize_beta typo that was recently fixed in contrib/cube. It seemsprudent to apply this rather aggressive fix only in HEAD, though.Back branches will just get the typo fix.Alexander Korotkov, reviewed by Yeb Havinga

1 parent290f160 commit2a6ebe7Copy full SHA for 2a6ebe7

File tree

1 file changed

+78

-116

lines changed

contrib/seg
- seg.c

1 file changed

+78

-116

lines changed

`‎contrib/seg/seg.c`

Lines changed: 78 additions & 116 deletions

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,16 @@ extern void seg_scanner_finish(void);`
`33`	`33`	`extern int seg_yydebug;`
`34`	`34`	`*/`
`35`	`35`
	`36`	`+/*`
	`37`	`+ * Auxiliary data structure for picksplit method.`
	`38`	`+ */`
	`39`	`+typedefstruct`
	`40`	`+{`
	`41`	`+floatcenter;`
	`42`	`+OffsetNumberindex;`
	`43`	`+SEG*data;`
	`44`	`+}gseg_picksplit_item;`
	`45`	`+`
`36`	`46`	`/*`
`37`	`47`	`** Input/Output routines`
`38`	`48`	`*/`
`@@ -292,152 +302,104 @@ gseg_penalty(GISTENTRY origentry, GISTENTRY newentry, float *result)`
`292`	`302`	`return (result);`
`293`	`303`	`}`
`294`	`304`
	`305`	`+/*`
	`306`	`+ * Compare function for gseg_picksplit_item: sort by center.`
	`307`	`+ */`
	`308`	`+staticint`
	`309`	`+gseg_picksplit_item_cmp(constvoida,constvoidb)`
	`310`	`+{`
	`311`	`+constgseg_picksplit_itemi1= (constgseg_picksplit_item)a;`
	`312`	`+constgseg_picksplit_itemi2= (constgseg_picksplit_item)b;`
`295`	`313`
	`314`	`+if (i1->center<i2->center)`
	`315`	`+return-1;`
	`316`	`+elseif (i1->center==i2->center)`
	`317`	`+return0;`
	`318`	`+else`
	`319`	`+return1;`
	`320`	`+}`
`296`	`321`
`297`	`322`	`/*`
`298`		`-** The GiST PickSplit method for segments`
`299`		`-** We use Guttman's poly time split algorithm`
`300`		`-*/`
	`323`	`+ * The GiST PickSplit method for segments`
	`324`	`+ *`
	`325`	`+ * We used to use Guttman's split algorithm here, but since the data is 1-D`
	`326`	`+ * it's easier and more robust to just sort the segments by center-point and`
	`327`	`+ * split at the middle.`
	`328`	`+ */`
`301`	`329`	`GIST_SPLITVEC*`
`302`	`330`	`gseg_picksplit(GistEntryVector*entryvec,`
`303`	`331`	`GIST_SPLITVEC*v)`
`304`	`332`	`{`
`305`		`-OffsetNumberi,`
`306`		`-j;`
`307`		`-SEG*datum_alpha,`
`308`		`-*datum_beta;`
	`333`	`+inti;`
`309`	`334`	`SEG*datum_l,`
`310`		`-*datum_r;`
`311`		`-SEG*union_d,`
`312`		`-*union_dl,`
`313`		`-*union_dr;`
`314`		`-SEG*inter_d;`
`315`		`-boolfirsttime;`
`316`		`-floatsize_alpha,`
`317`		`-size_beta,`
`318`		`-size_union,`
`319`		`-size_inter;`
`320`		`-floatsize_waste,`
`321`		`-waste;`
`322`		`-floatsize_l,`
`323`		`-size_r;`
`324`		`-intnbytes;`
`325`		`-OffsetNumberseed_1=1,`
`326`		`-seed_2=2;`
	`335`	`+*datum_r,`
	`336`	`+*seg;`
	`337`	`+gseg_picksplit_item*sort_items;`
`327`	`338`	`OffsetNumber*left,`
`328`	`339`	`*right;`
`329`	`340`	`OffsetNumbermaxoff;`
	`341`	`+OffsetNumberfirstright;`
`330`	`342`
`331`	`343`	`#ifdefGIST_DEBUG`
`332`	`344`	`fprintf(stderr,"picksplit\n");`
`333`	`345`	`#endif`
`334`	`346`
`335`		`-maxoff=entryvec->n-2;`
`336`		`-nbytes= (maxoff+2)*sizeof(OffsetNumber);`
`337`		`-v->spl_left= (OffsetNumber*)palloc(nbytes);`
`338`		`-v->spl_right= (OffsetNumber*)palloc(nbytes);`
	`347`	`+/* Valid items in entryvec->vector[] are indexed 1..maxoff */`
	`348`	`+maxoff=entryvec->n-1;`
`339`	`349`
`340`		`-firsttime= true;`
`341`		`-waste=0.0;`
`342`		`-`
`343`		`-for (i=FirstOffsetNumber;i<maxoff;i=OffsetNumberNext(i))`
	`350`	`+/*`
	`351`	`+ * Prepare the auxiliary array and sort it.`
	`352`	`+ */`
	`353`	`+sort_items= (gseg_picksplit_item*)`
	`354`	`+palloc(maxoff*sizeof(gseg_picksplit_item));`
	`355`	`+for (i=1;i <=maxoff;i++)`
`344`	`356`	`{`
`345`		`-datum_alpha= (SEG*)DatumGetPointer(entryvec->vector[i].key);`
`346`		`-for (j=OffsetNumberNext(i);j <=maxoff;j=OffsetNumberNext(j))`
`347`		`-{`
`348`		`-datum_beta= (SEG*)DatumGetPointer(entryvec->vector[j].key);`
`349`		`-`
`350`		`-/* compute the wasted space by unioning these guys */`
`351`		`-/* size_waste = size_union - size_inter; */`
`352`		`-union_d=seg_union(datum_alpha,datum_beta);`
`353`		`-rt_seg_size(union_d,&size_union);`
`354`		`-inter_d=seg_inter(datum_alpha,datum_beta);`
`355`		`-rt_seg_size(inter_d,&size_inter);`
`356`		`-size_waste=size_union-size_inter;`
`357`		`-`
`358`		`-/*`
`359`		`- * are these a more promising split that what we've already seen?`
`360`		`- */`
`361`		`-if (size_waste>waste\|\|firsttime)`
`362`		`-{`
`363`		`-waste=size_waste;`
`364`		`-seed_1=i;`
`365`		`-seed_2=j;`
`366`		`-firsttime= false;`
`367`		`-}`
`368`		`-}`
	`357`	`+seg= (SEG*)DatumGetPointer(entryvec->vector[i].key);`
	`358`	`+/* center calculation is done this way to avoid possible overflow */`
	`359`	`+sort_items[i-1].center=seg->lower0.5f+seg->upper0.5f;`
	`360`	`+sort_items[i-1].index=i;`
	`361`	`+sort_items[i-1].data=seg;`
`369`	`362`	`}`
	`363`	`+qsort(sort_items,maxoff,sizeof(gseg_picksplit_item),`
	`364`	`+gseg_picksplit_item_cmp);`
`370`	`365`
	`366`	`+/* sort items below "firstright" will go into the left side */`
	`367`	`+firstright=maxoff /2;`
	`368`	`+`
	`369`	`+v->spl_left= (OffsetNumber)palloc(maxoffsizeof(OffsetNumber));`
	`370`	`+v->spl_right= (OffsetNumber)palloc(maxoffsizeof(OffsetNumber));`
`371`	`371`	`left=v->spl_left;`
`372`	`372`	`v->spl_nleft=0;`
`373`	`373`	`right=v->spl_right;`
`374`	`374`	`v->spl_nright=0;`
`375`	`375`
`376`		`-datum_alpha= (SEG*)DatumGetPointer(entryvec->vector[seed_1].key);`
`377`		`-datum_l=seg_union(datum_alpha,datum_alpha);`
`378`		`-rt_seg_size(datum_l,&size_l);`
`379`		`-datum_beta= (SEG*)DatumGetPointer(entryvec->vector[seed_2].key);`
`380`		`-datum_r=seg_union(datum_beta,datum_beta);`
`381`		`-rt_seg_size(datum_r,&size_r);`
`382`		`-`
`383`	`376`	`/*`
`384`		`- * Now split up the regions between the two seeds.An important property`
`385`		`- * of this split algorithm is that the split vector v has the indices of`
`386`		`- * items to be split in order in its left and right vectors. We exploit`
`387`		`- * this property by doing a merge in the code that actually splits the`
`388`		`- * page.`
`389`		`- *`
`390`		`- * For efficiency, we also place the new index tuple in this loop. This is`
`391`		`- * handled at the very end, when we have placed all the existing tuples`
`392`		`- * and i == maxoff + 1.`
	`377`	`+ * Emit segments to the left output page, and compute its bounding box.`
`393`	`378`	`*/`
`394`		`-`
`395`		`-maxoff=OffsetNumberNext(maxoff);`
`396`		`-for (i=FirstOffsetNumber;i <=maxoff;i=OffsetNumberNext(i))`
	`379`	`+datum_l= (SEG*)palloc(sizeof(SEG));`
	`380`	`+memcpy(datum_l,sort_items[0].data,sizeof(SEG));`
	`381`	`+*left++=sort_items[0].index;`
	`382`	`+v->spl_nleft++;`
	`383`	`+for (i=1;i<firstright;i++)`
`397`	`384`	`{`
`398`		`-/*`
`399`		`- * If we've already decided where to place this item, just put it on`
`400`		`- * the right list.Otherwise, we need to figure out which page needs`
`401`		`- * the least enlargement in order to store the item.`
`402`		`- */`
`403`		`-`
`404`		`-if (i==seed_1)`
`405`		`-{`
`406`		`-*left++=i;`
`407`		`-v->spl_nleft++;`
`408`		`-continue;`
`409`		`-}`
`410`		`-elseif (i==seed_2)`
`411`		`-{`
`412`		`-*right++=i;`
`413`		`-v->spl_nright++;`
`414`		`-continue;`
`415`		`-}`
`416`		`-`
`417`		`-/* okay, which page needs least enlargement? */`
`418`		`-datum_alpha= (SEG*)DatumGetPointer(entryvec->vector[i].key);`
`419`		`-union_dl=seg_union(datum_l,datum_alpha);`
`420`		`-union_dr=seg_union(datum_r,datum_alpha);`
`421`		`-rt_seg_size(union_dl,&size_alpha);`
`422`		`-rt_seg_size(union_dr,&size_beta);`
	`385`	`+datum_l=seg_union(datum_l,sort_items[i].data);`
	`386`	`+*left++=sort_items[i].index;`
	`387`	`+v->spl_nleft++;`
	`388`	`+}`
`423`	`389`
`424`		`-/* pick which page to add it to */`
`425`		`-if (size_alpha-size_l<size_beta-size_r)`
`426`		`-{`
`427`		`-datum_l=union_dl;`
`428`		`-size_l=size_alpha;`
`429`		`-*left++=i;`
`430`		`-v->spl_nleft++;`
`431`		`-}`
`432`		`-else`
`433`		`-{`
`434`		`-datum_r=union_dr;`
`435`		`-size_r=size_alpha;`
`436`		`-*right++=i;`
`437`		`-v->spl_nright++;`
`438`		`-}`
	`390`	`+/*`
	`391`	`+ * Likewise for the right page.`
	`392`	`+ */`
	`393`	`+datum_r= (SEG*)palloc(sizeof(SEG));`
	`394`	`+memcpy(datum_r,sort_items[firstright].data,sizeof(SEG));`
	`395`	`+*right++=sort_items[firstright].index;`
	`396`	`+v->spl_nright++;`
	`397`	`+for (i=firstright+1;i<maxoff;i++)`
	`398`	`+{`
	`399`	`+datum_r=seg_union(datum_r,sort_items[i].data);`
	`400`	`+*right++=sort_items[i].index;`
	`401`	`+v->spl_nright++;`
`439`	`402`	`}`
`440`		`-left=right=FirstOffsetNumber;/* sentinel value, see dosplit() */`
`441`	`403`
`442`	`404`	`v->spl_ldatum=PointerGetDatum(datum_l);`
`443`	`405`	`v->spl_rdatum=PointerGetDatum(datum_r);`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit2a6ebe7

File tree

1 file changed

1 file changed

`‎contrib/seg/seg.c`

0 commit comments