Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc66e4f1

Browse files
committed
Improve GiST range-contained-by searches by adding a flag for empty ranges.
In the original implementation, a range-contained-by search had to scanthe entire index because an empty range could be lurking anywhere.Improve that by adding a flag to upper GiST entries that says whether therepresented subtree contains any empty ranges.Also, make a simple mod to the penalty function to discourage empty rangesfrom getting pushed into subtrees without any. This needs more work, andthe picksplit function should be taught about it too, but that code can beimproved without causing an on-disk compatibility break; so we'll leave itfor another day.Since we're breaking on-disk compatibility of range values anyway, I tookthe opportunity to reorganize the range flags bits; the unusedRANGE_xB_NULL bits are now adjacent, which might open the door for usingthem in some other way later.In passing, remove the GiST range opclass entry for <>, which doesn't seemlike it can really be indexed usefully.Alexander Korotkov, with some editorializing by Tom
1 parent08da2d2 commitc66e4f1

File tree

6 files changed

+118
-56
lines changed

6 files changed

+118
-56
lines changed

‎src/backend/utils/adt/rangetypes.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,6 +1622,24 @@ range_get_flags(RangeType *range)
16221622
return*((char*)range+VARSIZE(range)-1);
16231623
}
16241624

1625+
/*
1626+
* range_set_contain_empty: set the RANGE_CONTAIN_EMPTY bit in the value.
1627+
*
1628+
* This is only needed in GiST operations, so we don't include a provision
1629+
* for setting it in range_serialize; rather, this function must be applied
1630+
* afterwards.
1631+
*/
1632+
void
1633+
range_set_contain_empty(RangeType*range)
1634+
{
1635+
char*flagsp;
1636+
1637+
/* flag byte is datum's last byte */
1638+
flagsp= (char*)range+VARSIZE(range)-1;
1639+
1640+
*flagsp |=RANGE_CONTAIN_EMPTY;
1641+
}
1642+
16251643
/*
16261644
* This both serializes and canonicalizes (if applicable) the range.
16271645
* This should be used by most callers.

‎src/backend/utils/adt/rangetypes_gist.c

Lines changed: 85 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include"access/gist.h"
1818
#include"access/skey.h"
1919
#include"utils/builtins.h"
20+
#include"utils/datum.h"
2021
#include"utils/rangetypes.h"
2122

2223

@@ -32,7 +33,11 @@
3233
#defineRANGESTRAT_CONTAINED_BY8
3334
#defineRANGESTRAT_CONTAINS_ELEM16
3435
#defineRANGESTRAT_EQ18
35-
#defineRANGESTRAT_NE19
36+
37+
/* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */
38+
#definerangeCopy(r) \
39+
((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \
40+
false, -1)))
3641

3742
/*
3843
* Auxiliary structure for picksplit method.
@@ -145,6 +150,16 @@ range_gist_penalty(PG_FUNCTION_ARGS)
145150

146151
subtype_diff=&typcache->rng_subdiff_finfo;
147152

153+
/*
154+
* If new is or contains empty, and orig doesn't, apply infinite penalty.
155+
* We really don't want to pollute an empty-free subtree with empties.
156+
*/
157+
if (RangeIsOrContainsEmpty(new)&& !RangeIsOrContainsEmpty(orig))
158+
{
159+
*penalty=get_float4_infinity();
160+
PG_RETURN_POINTER(penalty);
161+
}
162+
148163
/*
149164
* We want to compare the size of "orig" to size of "orig union new".
150165
* The penalty will be the sum of the reduction in the lower bound plus
@@ -163,30 +178,9 @@ range_gist_penalty(PG_FUNCTION_ARGS)
163178
}
164179
elseif (empty1)
165180
{
166-
if (lower2.infinite||upper2.infinite)
167-
{
168-
/* from empty to infinite */
169-
*penalty=get_float4_infinity();
170-
PG_RETURN_POINTER(penalty);
171-
}
172-
elseif (OidIsValid(subtype_diff->fn_oid))
173-
{
174-
/* from empty to upper2-lower2 */
175-
*penalty=DatumGetFloat8(FunctionCall2Coll(subtype_diff,
176-
typcache->rng_collation,
177-
upper2.val,
178-
lower2.val));
179-
/* upper2 must be >= lower2 */
180-
if (*penalty<0)
181-
*penalty=0;/* subtype_diff is broken */
182-
PG_RETURN_POINTER(penalty);
183-
}
184-
else
185-
{
186-
/* wild guess */
187-
*penalty=1.0;
188-
PG_RETURN_POINTER(penalty);
189-
}
181+
/* infinite penalty for pushing non-empty into all-empty subtree */
182+
*penalty=get_float4_infinity();
183+
PG_RETURN_POINTER(penalty);
190184
}
191185

192186
/* if orig isn't empty, s_union can't be either */
@@ -334,15 +328,27 @@ range_gist_picksplit(PG_FUNCTION_ARGS)
334328
Datum
335329
range_gist_same(PG_FUNCTION_ARGS)
336330
{
337-
/* Datumr1 =PG_GETARG_DATUM(0); */
338-
/* Datumr2 =PG_GETARG_DATUM(1); */
331+
RangeType*r1=PG_GETARG_RANGE(0);
332+
RangeType*r2=PG_GETARG_RANGE(1);
339333
bool*result= (bool*)PG_GETARG_POINTER(2);
340334

341335
/*
342-
* We can safely call range_eq using our fcinfo directly; it won't notice
343-
* the third argument. This allows it to use fn_extra for caching.
336+
* range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to
337+
* check that for ourselves. More generally, if the entries have been
338+
* properly normalized, then unequal flags bytes must mean unequal ranges
339+
* ... so let's just test all the flag bits at once.
344340
*/
345-
*result=DatumGetBool(range_eq(fcinfo));
341+
if (range_get_flags(r1)!=range_get_flags(r2))
342+
*result= false;
343+
else
344+
{
345+
/*
346+
* We can safely call range_eq using our fcinfo directly; it won't
347+
* notice the third argument. This allows it to use fn_extra for
348+
* caching.
349+
*/
350+
*result=DatumGetBool(range_eq(fcinfo));
351+
}
346352

347353
PG_RETURN_POINTER(result);
348354
}
@@ -356,27 +362,53 @@ range_gist_same(PG_FUNCTION_ARGS)
356362
/*
357363
* Return the smallest range that contains r1 and r2
358364
*
359-
* XXX would it be better to redefine range_union as working this way?
365+
* This differs from regular range_union in two critical ways:
366+
* 1. It won't throw an error for non-adjacent r1 and r2, but just absorb
367+
* the intervening values into the result range.
368+
* 2. We track whether any empty range has been union'd into the result,
369+
* so that contained_by searches can be indexed. Note that this means
370+
* that *all* unions formed within the GiST index must go through here.
360371
*/
361372
staticRangeType*
362373
range_super_union(TypeCacheEntry*typcache,RangeType*r1,RangeType*r2)
363374
{
375+
RangeType*result;
364376
RangeBoundlower1,
365377
lower2;
366378
RangeBoundupper1,
367379
upper2;
368380
boolempty1,
369381
empty2;
382+
charflags1,
383+
flags2;
370384
RangeBound*result_lower;
371385
RangeBound*result_upper;
372386

373387
range_deserialize(typcache,r1,&lower1,&upper1,&empty1);
374388
range_deserialize(typcache,r2,&lower2,&upper2,&empty2);
389+
flags1=range_get_flags(r1);
390+
flags2=range_get_flags(r2);
375391

376392
if (empty1)
393+
{
394+
/* We can return r2 as-is if it already is or contains empty */
395+
if (flags2& (RANGE_EMPTY |RANGE_CONTAIN_EMPTY))
396+
returnr2;
397+
/* Else we'd better copy it (modify-in-place isn't safe) */
398+
r2=rangeCopy(r2);
399+
range_set_contain_empty(r2);
377400
returnr2;
401+
}
378402
if (empty2)
403+
{
404+
/* We can return r1 as-is if it already is or contains empty */
405+
if (flags1& (RANGE_EMPTY |RANGE_CONTAIN_EMPTY))
406+
returnr1;
407+
/* Else we'd better copy it (modify-in-place isn't safe) */
408+
r1=rangeCopy(r1);
409+
range_set_contain_empty(r1);
379410
returnr1;
411+
}
380412

381413
if (range_cmp_bounds(typcache,&lower1,&lower2) <=0)
382414
result_lower=&lower1;
@@ -389,12 +421,19 @@ range_super_union(TypeCacheEntry *typcache, RangeType * r1, RangeType * r2)
389421
result_upper=&upper2;
390422

391423
/* optimization to avoid constructing a new range */
392-
if (result_lower==&lower1&&result_upper==&upper1)
424+
if (result_lower==&lower1&&result_upper==&upper1&&
425+
((flags1&RANGE_CONTAIN_EMPTY)|| !(flags2&RANGE_CONTAIN_EMPTY)))
393426
returnr1;
394-
if (result_lower==&lower2&&result_upper==&upper2)
427+
if (result_lower==&lower2&&result_upper==&upper2&&
428+
((flags2&RANGE_CONTAIN_EMPTY)|| !(flags1&RANGE_CONTAIN_EMPTY)))
395429
returnr2;
396430

397-
returnmake_range(typcache,result_lower,result_upper, false);
431+
result=make_range(typcache,result_lower,result_upper, false);
432+
433+
if ((flags1&RANGE_CONTAIN_EMPTY)|| (flags2&RANGE_CONTAIN_EMPTY))
434+
range_set_contain_empty(result);
435+
436+
returnresult;
398437
}
399438

400439
/*
@@ -484,21 +523,26 @@ range_gist_consistent_int(FmgrInfo *flinfo, StrategyNumber strategy,
484523
break;
485524
caseRANGESTRAT_CONTAINED_BY:
486525
/*
487-
*Ideally we'd apply range_overlaps here, but at present it
488-
*might fail to findempty ranges in the index, which should
489-
*be reported as being contained by anything. This needs work.
526+
*Empty ranges are contained by anything, so if key is or
527+
*contains anyempty ranges, we must descend into it. Otherwise,
528+
*descend only if key overlaps the query.
490529
*/
491-
return true;
530+
if (RangeIsOrContainsEmpty(key))
531+
return true;
532+
proc=range_overlaps;
492533
break;
493534
caseRANGESTRAT_CONTAINS_ELEM:
494535
proc=range_contains_elem;
495536
break;
496537
caseRANGESTRAT_EQ:
538+
/*
539+
* If query is empty, descend only if the key is or contains any
540+
* empty ranges. Otherwise, descend if key contains query.
541+
*/
542+
if (RangeIsEmpty(DatumGetRangeType(query)))
543+
returnRangeIsOrContainsEmpty(key);
497544
proc=range_contains;
498545
break;
499-
caseRANGESTRAT_NE:
500-
return true;
501-
break;
502546
default:
503547
elog(ERROR,"unrecognized range strategy: %d",strategy);
504548
proc=NULL;/* keep compiler quiet */
@@ -555,9 +599,6 @@ range_gist_consistent_leaf(FmgrInfo *flinfo, StrategyNumber strategy,
555599
caseRANGESTRAT_EQ:
556600
proc=range_eq;
557601
break;
558-
caseRANGESTRAT_NE:
559-
proc=range_ne;
560-
break;
561602
default:
562603
elog(ERROR,"unrecognized range strategy: %d",strategy);
563604
proc=NULL;/* keep compiler quiet */

‎src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/*yyyymmddN */
56-
#defineCATALOG_VERSION_NO201111241
56+
#defineCATALOG_VERSION_NO201111271
5757

5858
#endif

‎src/include/catalog/pg_amop.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,5 @@ DATA(insert (3919 3831 3831 7 s3890 783 0 ));
736736
DATA(insert (3919383138318s38927830 ));
737737
DATA(insert (39193831228316s38897830 ));
738738
DATA(insert (39193831383118s38827830 ));
739-
DATA(insert (39193831383119s38837830 ));
740739

741740
#endif/* PG_AMOP_H */

‎src/include/utils/rangetypes.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,15 @@ typedef struct
3333
#defineRangeTypeGetOid(r) ((r)->rangetypid)
3434

3535
/* A range's flags byte contains these bits: */
36-
#defineRANGE_EMPTY0x01/* range is empty */
37-
#defineRANGE_LB_INC0x02/* lower bound is inclusive (vs exclusive) */
38-
#defineRANGE_LB_NULL0x04/* lower bound is null (NOT CURRENTLY USED) */
39-
#defineRANGE_LB_INF0x08/* lower bound is +/- infinity */
40-
#defineRANGE_UB_INC0x10/* upper bound is inclusive (vs exclusive) */
41-
#defineRANGE_UB_NULL0x20/* upper bound is null (NOT CURRENTLY USED) */
42-
#defineRANGE_UB_INF0x40/* upper bound is +/- infinity */
36+
#defineRANGE_EMPTY0x01/* range is empty */
37+
#defineRANGE_LB_INC0x02/* lower bound is inclusive */
38+
#defineRANGE_UB_INC0x04/* upper bound is inclusive */
39+
#defineRANGE_LB_INF0x08/* lower bound is -infinity */
40+
#defineRANGE_UB_INF0x10/* upper bound is +infinity */
41+
#defineRANGE_LB_NULL0x20/* lower bound is null (NOT USED) */
42+
#defineRANGE_UB_NULL0x40/* upper bound is null (NOT USED) */
43+
#defineRANGE_CONTAIN_EMPTY0x80/* marks a GiST internal-page entry whose
44+
* subtree contains some empty ranges */
4345

4446
#defineRANGE_HAS_LBOUND(flags) (!((flags) & (RANGE_EMPTY | \
4547
RANGE_LB_NULL | \
@@ -49,7 +51,9 @@ typedef struct
4951
RANGE_UB_NULL | \
5052
RANGE_UB_INF)))
5153

52-
#defineRangeIsEmpty(r) (range_get_flags(r) & RANGE_EMPTY)
54+
#defineRangeIsEmpty(r) ((range_get_flags(r) & RANGE_EMPTY) != 0)
55+
#defineRangeIsOrContainsEmpty(r) \
56+
((range_get_flags(r) & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY)) != 0)
5357

5458

5559
/* Internal representation of either bound of a range (not what's on disk) */
@@ -152,6 +156,7 @@ extern void range_deserialize(TypeCacheEntry *typcache, RangeType *range,
152156
RangeBound*lower,RangeBound*upper,
153157
bool*empty);
154158
externcharrange_get_flags(RangeType*range);
159+
externvoidrange_set_contain_empty(RangeType*range);
155160
externRangeType*make_range(TypeCacheEntry*typcache,RangeBound*lower,
156161
RangeBound*upper,boolempty);
157162
externintrange_cmp_bounds(TypeCacheEntry*typcache,RangeBound*b1,

‎src/test/regress/expected/opr_sanity.out

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,6 @@ ORDER BY 1, 2, 3;
10411041
783 | 15 | <->
10421042
783 | 16 | @>
10431043
783 | 18 | =
1044-
783 | 19 | <>
10451044
783 | 27 | @>
10461045
783 | 28 | <@
10471046
783 | 47 | @>
@@ -1054,7 +1053,7 @@ ORDER BY 1, 2, 3;
10541053
2742 | 2 | @@@
10551054
2742 | 3 | <@
10561055
2742 | 4 | =
1057-
(44 rows)
1056+
(43 rows)
10581057

10591058
-- Check that all opclass search operators have selectivity estimators.
10601059
-- This is not absolutely required, but it seems a reasonable thing

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp