Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc6fbe6d

Browse files
committed
Add selectivity estimation functions for intarray operators.
Uriy Zhuravlev and Alexander Korotkov, reviewed by Jeff Janes, some cleanupby me.
1 parent4348738 commitc6fbe6d

File tree

7 files changed

+443
-17
lines changed

7 files changed

+443
-17
lines changed

‎contrib/intarray/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
MODULE_big = _int
44
OBJS = _int_bool.o _int_gist.o _int_op.o _int_tool.o\
5-
_intbig_gist.o _int_gin.o$(WIN32RES)
5+
_intbig_gist.o _int_gin.o_int_selfuncs.o$(WIN32RES)
66

77
EXTENSION = intarray
8-
DATA = intarray--1.0.sql intarray--unpackaged--1.0.sql
8+
DATA = intarray--1.1.sql intarray--1.0--1.1.sql intarray--unpackaged--1.0.sql
99
PGFILEDESC = "intarray - functions and operators for arrays of integers"
1010

1111
REGRESS = _int

‎contrib/intarray/_int_selfuncs.c

Lines changed: 341 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,341 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* _int_selfuncs.c
4+
* Functions for selectivity estimation of intarray operators
5+
*
6+
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
7+
* Portions Copyright (c) 1994, Regents of the University of California
8+
*
9+
*
10+
* IDENTIFICATION
11+
* contrib/intarray/_int_selfuncs.c
12+
*
13+
*-------------------------------------------------------------------------
14+
*/
15+
#include"postgres.h"
16+
#include"_int.h"
17+
18+
#include"access/htup_details.h"
19+
#include"catalog/pg_operator.h"
20+
#include"catalog/pg_statistic.h"
21+
#include"catalog/pg_type.h"
22+
#include"utils/selfuncs.h"
23+
#include"utils/syscache.h"
24+
#include"utils/lsyscache.h"
25+
#include"miscadmin.h"
26+
27+
PG_FUNCTION_INFO_V1(_int_overlap_sel);
28+
PG_FUNCTION_INFO_V1(_int_contains_sel);
29+
PG_FUNCTION_INFO_V1(_int_contained_sel);
30+
PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
31+
PG_FUNCTION_INFO_V1(_int_contains_joinsel);
32+
PG_FUNCTION_INFO_V1(_int_contained_joinsel);
33+
PG_FUNCTION_INFO_V1(_int_matchsel);
34+
35+
Datum_int_overlap_sel(PG_FUNCTION_ARGS);
36+
Datum_int_contains_sel(PG_FUNCTION_ARGS);
37+
Datum_int_contained_sel(PG_FUNCTION_ARGS);
38+
Datum_int_overlap_joinsel(PG_FUNCTION_ARGS);
39+
Datum_int_contains_joinsel(PG_FUNCTION_ARGS);
40+
Datum_int_contained_joinsel(PG_FUNCTION_ARGS);
41+
Datum_int_matchsel(PG_FUNCTION_ARGS);
42+
43+
44+
staticSelectivityint_query_opr_selec(ITEM*item,Datum*values,float4*freqs,
45+
intnmncelems,float4minfreq);
46+
staticintcompare_val_int4(constvoid*a,constvoid*b);
47+
48+
/*
49+
* Wrappers around the default array selectivity estimation functions.
50+
*
51+
* The default array selectivity operators for the @>, && and @< operators
52+
* work fine for integer arrays. However, if we tried to just use arraycontsel
53+
* and arracontjoinsel directly as the cost estimator functions for our
54+
* operators, they would not work as intended, because they look at the
55+
* operator's OID. Our operators behave exactly like the built-in anyarray
56+
* versions, but we must tell the cost estimator functions which built-in
57+
* operators they correspond to. These wrappers just replace the operator
58+
* OID with the corresponding built-in operator's OID, and call the built-in
59+
* function.
60+
*/
61+
62+
Datum
63+
_int_overlap_sel(PG_FUNCTION_ARGS)
64+
{
65+
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
66+
PG_GETARG_DATUM(0),
67+
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
68+
PG_GETARG_DATUM(2),
69+
PG_GETARG_DATUM(3)));
70+
}
71+
72+
Datum
73+
_int_contains_sel(PG_FUNCTION_ARGS)
74+
{
75+
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
76+
PG_GETARG_DATUM(0),
77+
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
78+
PG_GETARG_DATUM(2),
79+
PG_GETARG_DATUM(3)));
80+
}
81+
82+
Datum
83+
_int_contained_sel(PG_FUNCTION_ARGS)
84+
{
85+
PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
86+
PG_GETARG_DATUM(0),
87+
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
88+
PG_GETARG_DATUM(2),
89+
PG_GETARG_DATUM(3)));
90+
}
91+
92+
Datum
93+
_int_overlap_joinsel(PG_FUNCTION_ARGS)
94+
{
95+
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
96+
PG_GETARG_DATUM(0),
97+
ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
98+
PG_GETARG_DATUM(2),
99+
PG_GETARG_DATUM(3),
100+
PG_GETARG_DATUM(4)));
101+
}
102+
103+
Datum
104+
_int_contains_joinsel(PG_FUNCTION_ARGS)
105+
{
106+
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
107+
PG_GETARG_DATUM(0),
108+
ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
109+
PG_GETARG_DATUM(2),
110+
PG_GETARG_DATUM(3),
111+
PG_GETARG_DATUM(4)));
112+
}
113+
114+
Datum
115+
_int_contained_joinsel(PG_FUNCTION_ARGS)
116+
{
117+
PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
118+
PG_GETARG_DATUM(0),
119+
ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
120+
PG_GETARG_DATUM(2),
121+
PG_GETARG_DATUM(3),
122+
PG_GETARG_DATUM(4)));
123+
}
124+
125+
126+
/*
127+
* _int_matchsel -- restriction selectivity function for intarray @@ query_int
128+
*/
129+
Datum
130+
_int_matchsel(PG_FUNCTION_ARGS)
131+
{
132+
PlannerInfo*root= (PlannerInfo*)PG_GETARG_POINTER(0);
133+
134+
List*args= (List*)PG_GETARG_POINTER(2);
135+
intvarRelid=PG_GETARG_INT32(3);
136+
VariableStatDatavardata;
137+
Node*other;
138+
boolvaronleft;
139+
Selectivityselec;
140+
QUERYTYPE*query;
141+
Datum*mcelems=NULL;
142+
float4*mcefreqs=NULL;
143+
intnmcelems=0;
144+
float4minfreq=0.0;
145+
float4nullfrac=0.0;
146+
Form_pg_statisticstats;
147+
Datum*values=NULL;
148+
intnvalues=0;
149+
float4*numbers=NULL;
150+
intnnumbers=0;
151+
152+
/*
153+
* If expression is not "variable @@ something" or "something @@ variable"
154+
* then punt and return a default estimate.
155+
*/
156+
if (!get_restriction_variable(root,args,varRelid,
157+
&vardata,&other,&varonleft))
158+
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
159+
160+
/*
161+
* Variable should be int[]. We don't support cases where variable is
162+
* query_int.
163+
*/
164+
if (vardata.vartype!=INT4ARRAYOID)
165+
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
166+
167+
/*
168+
* Can't do anything useful if the something is not a constant, either.
169+
*/
170+
if (!IsA(other,Const))
171+
{
172+
ReleaseVariableStats(vardata);
173+
PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
174+
}
175+
176+
/*
177+
* The "@@" operator is strict, so we can cope with NULL right away.
178+
*/
179+
if (((Const*)other)->constisnull)
180+
{
181+
ReleaseVariableStats(vardata);
182+
PG_RETURN_FLOAT8(0.0);
183+
}
184+
185+
/* The caller made sure the const is a query, so get it now */
186+
query=DatumGetQueryTypeP(((Const*)other)->constvalue);
187+
188+
/* Empty query matches nothing */
189+
if (query->size==0)
190+
{
191+
ReleaseVariableStats(vardata);
192+
return (Selectivity)0.0;
193+
}
194+
195+
/*
196+
* Get the statistics for the intarray column.
197+
*
198+
* We're interested in the Most-Common-Elements list, and the NULL
199+
* fraction.
200+
*/
201+
if (HeapTupleIsValid(vardata.statsTuple))
202+
{
203+
stats= (Form_pg_statistic)GETSTRUCT(vardata.statsTuple);
204+
nullfrac=stats->stanullfrac;
205+
206+
/*
207+
* For an int4 array, the default array type analyze function will
208+
* collect a Most Common Elements list, which is an array of int4s.
209+
*/
210+
if (get_attstatsslot(vardata.statsTuple,
211+
INT4OID,-1,
212+
STATISTIC_KIND_MCELEM,InvalidOid,
213+
NULL,
214+
&values,&nvalues,
215+
&numbers,&nnumbers))
216+
{
217+
/*
218+
* There should be three more Numbers than Values, because the
219+
* last three (for intarray) cells are taken for minimal, maximal
220+
* and nulls frequency. Punt if not.
221+
*/
222+
if (nnumbers==nvalues+3)
223+
{
224+
/* Grab the lowest frequency. */
225+
minfreq=numbers[nnumbers- (nnumbers-nvalues)];
226+
227+
mcelems=values;
228+
mcefreqs=numbers;
229+
nmcelems=nvalues;
230+
}
231+
}
232+
}
233+
234+
/* Process the logical expression in the query, using the stats */
235+
selec=int_query_opr_selec(GETQUERY(query)+query->size-1,
236+
mcelems,mcefreqs,nmcelems,minfreq);
237+
238+
/* MCE stats count only non-null rows, so adjust for null rows. */
239+
selec *= (1.0-nullfrac);
240+
241+
free_attstatsslot(INT4OID,values,nvalues,numbers,nnumbers);
242+
ReleaseVariableStats(vardata);
243+
244+
CLAMP_PROBABILITY(selec);
245+
246+
PG_RETURN_FLOAT8((float8)selec);
247+
}
248+
249+
/*
250+
* Estimate selectivity of single intquery operator
251+
*/
252+
staticSelectivity
253+
int_query_opr_selec(ITEM*item,Datum*mcelems,float4*mcefreqs,
254+
intnmcelems,float4minfreq)
255+
{
256+
Selectivityselec;
257+
258+
/* since this function recurses, it could be driven to stack overflow */
259+
check_stack_depth();
260+
261+
if (item->type==VAL)
262+
{
263+
Datum*searchres;
264+
265+
if (mcelems==NULL)
266+
return (Selectivity)DEFAULT_EQ_SEL;
267+
268+
searchres= (Datum*)bsearch(&item->val,mcelems,nmcelems,
269+
sizeof(Datum),compare_val_int4);
270+
if (searchres)
271+
{
272+
/*
273+
* The element is in MCELEM. Return precise selectivity (or at
274+
* least as precise as ANALYZE could find out).
275+
*/
276+
selec=mcefreqs[searchres-mcelems];
277+
}
278+
else
279+
{
280+
/*
281+
* The element is not in MCELEM. Punt, but assume that the
282+
* selectivity cannot be more than minfreq / 2.
283+
*/
284+
selec=Min(DEFAULT_EQ_SEL,minfreq /2);
285+
}
286+
}
287+
elseif (item->type==OPR)
288+
{
289+
/* Current query node is an operator */
290+
Selectivitys1,
291+
s2;
292+
293+
s1=int_query_opr_selec(item-1,mcelems,mcefreqs,nmcelems,
294+
minfreq);
295+
switch (item->val)
296+
{
297+
case (int32)'!':
298+
selec=1.0-s1;
299+
break;
300+
301+
case (int32)'&':
302+
s2=int_query_opr_selec(item+item->left,mcelems,mcefreqs,
303+
nmcelems,minfreq);
304+
selec=s1*s2;
305+
break;
306+
307+
case (int32)'|':
308+
s2=int_query_opr_selec(item+item->left,mcelems,mcefreqs,
309+
nmcelems,minfreq);
310+
selec=s1+s2-s1*s2;
311+
break;
312+
313+
default:
314+
elog(ERROR,"unrecognized operator: %d",item->val);
315+
selec=0;/* keep compiler quiet */
316+
break;
317+
}
318+
}
319+
else
320+
{
321+
elog(ERROR,"unrecognized int query item type: %u",item->type);
322+
selec=0;/* keep compiler quiet */
323+
}
324+
325+
/* Clamp intermediate results to stay sane despite roundoff error */
326+
CLAMP_PROBABILITY(selec);
327+
328+
returnselec;
329+
}
330+
331+
/*
332+
* Comparison function for binary search in mcelem array.
333+
*/
334+
staticint
335+
compare_val_int4(constvoid*a,constvoid*b)
336+
{
337+
int32key=*(int32*)a;
338+
constDatum*t= (constDatum*)b;
339+
340+
returnkey-DatumGetInt32(*t);
341+
}

‎contrib/intarray/expected/_int.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ SELECT '1&(2&(4&(5|!6)))'::query_int;
368368

369369
CREATE TABLE test__int( a int[] );
370370
\copy test__int from 'data/test__int.data'
371+
ANALYZE test__int;
371372
SELECT count(*) from test__int WHERE a && '{23,50}';
372373
count
373374
-------

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp