Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite80252d

Browse files
committed
Add width_bucket(anyelement, anyarray).
This provides a convenient method of classifying input values into bucketsthat are not necessarily equal-width. It works on any sortable data type.The choice of function name is a bit debatable, perhaps, but showing thatthere's a relationship to the SQL standard's width_bucket() function seemsmore attractive than the other proposals.Petr Jelinek, reviewed by Pavel Stehule
1 parent220bb39 commite80252d

File tree

7 files changed

+458
-12
lines changed

7 files changed

+458
-12
lines changed

‎doc/src/sgml/func.sgml

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -901,25 +901,40 @@
901901
<indexterm>
902902
<primary>width_bucket</primary>
903903
</indexterm>
904-
<literal><function>width_bucket(<parameter>op</parameter> <type>numeric</type>, <parameter>b1</parameter> <type>numeric</type>, <parameter>b2</parameter> <type>numeric</type>, <parameter>count</parameter> <type>int</type>)</function></literal>
905-
</entry>
904+
<literal><function>width_bucket(<parameter>operand</parameter> <type>dp</type>, <parameter>b1</parameter> <type>dp</type>, <parameter>b2</parameter> <type>dp</type>, <parameter>count</parameter> <type>int</type>)</function></literal></entry>
906905
<entry><type>int</type></entry>
907-
<entry>return the bucket to which <parameter>operand</> would
908-
be assigned in an equidepth histogram with <parameter>count</>
909-
buckets, in the range <parameter>b1</> to <parameter>b2</></entry>
906+
<entry>return the bucket number to which <parameter>operand</> would
907+
be assigned in a histogram having <parameter>count</> equal-width
908+
buckets spanning the range <parameter>b1</> to <parameter>b2</>;
909+
returns <literal>0</> or <literal><parameter>count</>+1</literal> for
910+
an input outside the range</entry>
910911
<entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
911912
<entry><literal>3</literal></entry>
912913
</row>
913914

914915
<row>
915-
<entry><literal><function>width_bucket(<parameter>op</parameter> <type>dp</type>, <parameter>b1</parameter> <type>dp</type>, <parameter>b2</parameter> <type>dp</type>, <parameter>count</parameter> <type>int</type>)</function></literal></entry>
916+
<entry><literal><function>width_bucket(<parameter>operand</parameter> <type>numeric</type>, <parameter>b1</parameter> <type>numeric</type>, <parameter>b2</parameter> <type>numeric</type>, <parameter>count</parameter> <type>int</type>)</function></literal></entry>
916917
<entry><type>int</type></entry>
917-
<entry>return the bucket to which <parameter>operand</> would
918-
be assigned in an equidepth histogram with <parameter>count</>
919-
buckets, in the range <parameter>b1</> to <parameter>b2</></entry>
918+
<entry>return the bucket number to which <parameter>operand</> would
919+
be assigned in a histogram having <parameter>count</> equal-width
920+
buckets spanning the range <parameter>b1</> to <parameter>b2</>;
921+
returns <literal>0</> or <literal><parameter>count</>+1</literal> for
922+
an input outside the range</entry>
920923
<entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
921924
<entry><literal>3</literal></entry>
922925
</row>
926+
927+
<row>
928+
<entry><literal><function>width_bucket(<parameter>operand</parameter> <type>anyelement</type>, <parameter>thresholds</parameter> <type>anyarray</type>)</function></literal></entry>
929+
<entry><type>int</type></entry>
930+
<entry>return the bucket number to which <parameter>operand</> would
931+
be assigned given an array listing the lower bounds of the buckets;
932+
returns <literal>0</> for an input less than the first lower bound;
933+
the <parameter>thresholds</> array <emphasis>must be sorted</>,
934+
smallest first, or unexpected results will be obtained</entry>
935+
<entry><literal>width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[])</literal></entry>
936+
<entry><literal>2</literal></entry>
937+
</row>
923938
</tbody>
924939
</tgroup>
925940
</table>

‎src/backend/utils/adt/arrayfuncs.c

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
#include"postgres.h"
1616

1717
#include<ctype.h>
18+
#include<math.h>
1819

1920
#include"access/htup_details.h"
21+
#include"catalog/pg_type.h"
2022
#include"funcapi.h"
2123
#include"libpq/pqformat.h"
2224
#include"utils/array.h"
@@ -130,6 +132,15 @@ static ArrayType *array_replace_internal(ArrayType *array,
130132
Datumreplace,boolreplace_isnull,
131133
boolremove,Oidcollation,
132134
FunctionCallInfofcinfo);
135+
staticintwidth_bucket_array_float8(Datumoperand,ArrayType*thresholds);
136+
staticintwidth_bucket_array_fixed(Datumoperand,
137+
ArrayType*thresholds,
138+
Oidcollation,
139+
TypeCacheEntry*typentry);
140+
staticintwidth_bucket_array_variable(Datumoperand,
141+
ArrayType*thresholds,
142+
Oidcollation,
143+
TypeCacheEntry*typentry);
133144

134145

135146
/*
@@ -5502,3 +5513,235 @@ array_replace(PG_FUNCTION_ARGS)
55025513
fcinfo);
55035514
PG_RETURN_ARRAYTYPE_P(array);
55045515
}
5516+
5517+
/*
5518+
* Implements width_bucket(anyelement, anyarray).
5519+
*
5520+
* 'thresholds' is an array containing lower bound values for each bucket;
5521+
* these must be sorted from smallest to largest, or bogus results will be
5522+
* produced. If N thresholds are supplied, the output is from 0 to N:
5523+
* 0 is for inputs < first threshold, N is for inputs >= last threshold.
5524+
*/
5525+
Datum
5526+
width_bucket_array(PG_FUNCTION_ARGS)
5527+
{
5528+
Datumoperand=PG_GETARG_DATUM(0);
5529+
ArrayType*thresholds=PG_GETARG_ARRAYTYPE_P(1);
5530+
Oidcollation=PG_GET_COLLATION();
5531+
Oidelement_type=ARR_ELEMTYPE(thresholds);
5532+
intresult;
5533+
5534+
/* Check input */
5535+
if (ARR_NDIM(thresholds)>1)
5536+
ereport(ERROR,
5537+
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
5538+
errmsg("thresholds must be one-dimensional array")));
5539+
5540+
if (array_contains_nulls(thresholds))
5541+
ereport(ERROR,
5542+
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5543+
errmsg("thresholds array must not contain NULLs")));
5544+
5545+
/* We have a dedicated implementation for float8 data */
5546+
if (element_type==FLOAT8OID)
5547+
result=width_bucket_array_float8(operand,thresholds);
5548+
else
5549+
{
5550+
TypeCacheEntry*typentry;
5551+
5552+
/* Cache information about the input type */
5553+
typentry= (TypeCacheEntry*)fcinfo->flinfo->fn_extra;
5554+
if (typentry==NULL||
5555+
typentry->type_id!=element_type)
5556+
{
5557+
typentry=lookup_type_cache(element_type,
5558+
TYPECACHE_CMP_PROC_FINFO);
5559+
if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
5560+
ereport(ERROR,
5561+
(errcode(ERRCODE_UNDEFINED_FUNCTION),
5562+
errmsg("could not identify a comparison function for type %s",
5563+
format_type_be(element_type))));
5564+
fcinfo->flinfo->fn_extra= (void*)typentry;
5565+
}
5566+
5567+
/*
5568+
* We have separate implementation paths for fixed- and variable-width
5569+
* types, since indexing the array is a lot cheaper in the first case.
5570+
*/
5571+
if (typentry->typlen>0)
5572+
result=width_bucket_array_fixed(operand,thresholds,
5573+
collation,typentry);
5574+
else
5575+
result=width_bucket_array_variable(operand,thresholds,
5576+
collation,typentry);
5577+
}
5578+
5579+
/* Avoid leaking memory when handed toasted input. */
5580+
PG_FREE_IF_COPY(thresholds,1);
5581+
5582+
PG_RETURN_INT32(result);
5583+
}
5584+
5585+
/*
5586+
* width_bucket_array for float8 data.
5587+
*/
5588+
staticint
5589+
width_bucket_array_float8(Datumoperand,ArrayType*thresholds)
5590+
{
5591+
float8op=DatumGetFloat8(operand);
5592+
float8*thresholds_data;
5593+
intleft;
5594+
intright;
5595+
5596+
/*
5597+
* Since we know the array contains no NULLs, we can just index it
5598+
* directly.
5599+
*/
5600+
thresholds_data= (float8*)ARR_DATA_PTR(thresholds);
5601+
5602+
left=0;
5603+
right=ArrayGetNItems(ARR_NDIM(thresholds),ARR_DIMS(thresholds));
5604+
5605+
/*
5606+
* If the probe value is a NaN, it's greater than or equal to all possible
5607+
* threshold values (including other NaNs), so we need not search. Note
5608+
* that this would give the same result as searching even if the array
5609+
* contains multiple NaNs (as long as they're correctly sorted), since the
5610+
* loop logic will find the rightmost of multiple equal threshold values.
5611+
*/
5612+
if (isnan(op))
5613+
returnright;
5614+
5615+
/* Find the bucket */
5616+
while (left<right)
5617+
{
5618+
intmid= (left+right) /2;
5619+
5620+
if (isnan(thresholds_data[mid])||op<thresholds_data[mid])
5621+
right=mid;
5622+
else
5623+
left=mid+1;
5624+
}
5625+
5626+
returnleft;
5627+
}
5628+
5629+
/*
5630+
* width_bucket_array for generic fixed-width data types.
5631+
*/
5632+
staticint
5633+
width_bucket_array_fixed(Datumoperand,
5634+
ArrayType*thresholds,
5635+
Oidcollation,
5636+
TypeCacheEntry*typentry)
5637+
{
5638+
char*thresholds_data;
5639+
inttyplen=typentry->typlen;
5640+
booltypbyval=typentry->typbyval;
5641+
FunctionCallInfoDatalocfcinfo;
5642+
intleft;
5643+
intright;
5644+
5645+
/*
5646+
* Since we know the array contains no NULLs, we can just index it
5647+
* directly.
5648+
*/
5649+
thresholds_data= (char*)ARR_DATA_PTR(thresholds);
5650+
5651+
InitFunctionCallInfoData(locfcinfo,&typentry->cmp_proc_finfo,2,
5652+
collation,NULL,NULL);
5653+
5654+
/* Find the bucket */
5655+
left=0;
5656+
right=ArrayGetNItems(ARR_NDIM(thresholds),ARR_DIMS(thresholds));
5657+
while (left<right)
5658+
{
5659+
intmid= (left+right) /2;
5660+
char*ptr;
5661+
int32cmpresult;
5662+
5663+
ptr=thresholds_data+mid*typlen;
5664+
5665+
locfcinfo.arg[0]=operand;
5666+
locfcinfo.arg[1]=fetch_att(ptr,typbyval,typlen);
5667+
locfcinfo.argnull[0]= false;
5668+
locfcinfo.argnull[1]= false;
5669+
locfcinfo.isnull= false;
5670+
5671+
cmpresult=DatumGetInt32(FunctionCallInvoke(&locfcinfo));
5672+
5673+
if (cmpresult<0)
5674+
right=mid;
5675+
else
5676+
left=mid+1;
5677+
}
5678+
5679+
returnleft;
5680+
}
5681+
5682+
/*
5683+
* width_bucket_array for generic variable-width data types.
5684+
*/
5685+
staticint
5686+
width_bucket_array_variable(Datumoperand,
5687+
ArrayType*thresholds,
5688+
Oidcollation,
5689+
TypeCacheEntry*typentry)
5690+
{
5691+
char*thresholds_data;
5692+
inttyplen=typentry->typlen;
5693+
booltypbyval=typentry->typbyval;
5694+
chartypalign=typentry->typalign;
5695+
FunctionCallInfoDatalocfcinfo;
5696+
intleft;
5697+
intright;
5698+
5699+
thresholds_data= (char*)ARR_DATA_PTR(thresholds);
5700+
5701+
InitFunctionCallInfoData(locfcinfo,&typentry->cmp_proc_finfo,2,
5702+
collation,NULL,NULL);
5703+
5704+
/* Find the bucket */
5705+
left=0;
5706+
right=ArrayGetNItems(ARR_NDIM(thresholds),ARR_DIMS(thresholds));
5707+
while (left<right)
5708+
{
5709+
intmid= (left+right) /2;
5710+
char*ptr;
5711+
inti;
5712+
int32cmpresult;
5713+
5714+
/* Locate mid'th array element by advancing from left element */
5715+
ptr=thresholds_data;
5716+
for (i=left;i<mid;i++)
5717+
{
5718+
ptr=att_addlength_pointer(ptr,typlen,ptr);
5719+
ptr= (char*)att_align_nominal(ptr,typalign);
5720+
}
5721+
5722+
locfcinfo.arg[0]=operand;
5723+
locfcinfo.arg[1]=fetch_att(ptr,typbyval,typlen);
5724+
locfcinfo.argnull[0]= false;
5725+
locfcinfo.argnull[1]= false;
5726+
locfcinfo.isnull= false;
5727+
5728+
cmpresult=DatumGetInt32(FunctionCallInvoke(&locfcinfo));
5729+
5730+
if (cmpresult<0)
5731+
right=mid;
5732+
else
5733+
{
5734+
left=mid+1;
5735+
5736+
/*
5737+
* Move the thresholds pointer to match new "left" index, so we
5738+
* don't have to seek over those elements again. This trick
5739+
* ensures we do only O(N) array indexing work, not O(N^2).
5740+
*/
5741+
ptr=att_addlength_pointer(ptr,typlen,ptr);
5742+
thresholds_data= (char*)att_align_nominal(ptr,typalign);
5743+
}
5744+
}
5745+
5746+
returnleft;
5747+
}

‎src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/*yyyymmddN */
56-
#defineCATALOG_VERSION_NO201408281
56+
#defineCATALOG_VERSION_NO201409091
5757

5858
#endif

‎src/include/catalog/pg_proc.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,7 @@ DATA(insert OID = 308 ( float84le PGNSP PGUID 12 1 0 0 0 f f f t t f i 2 0
514514
DATA(insertOID=309 (float84gtPGNSPPGUID121000fffttfi2016"701 700"_null__null__null__null_float84gt_null__null__null_ ));
515515
DATA(insertOID=310 (float84gePGNSPPGUID121000fffttfi2016"701 700"_null__null__null__null_float84ge_null__null__null_ ));
516516
DATA(insertOID=320 (width_bucketPGNSPPGUID121000fffftfi4023"701 701 701 23"_null__null__null__null_width_bucket_float8_null__null__null_ ));
517-
DESCR("bucket number of operand inequidepth histogram");
517+
DESCR("bucket number of operand inequal-width histogram");
518518

519519
DATA(insertOID=311 (float8PGNSPPGUID121000fffftfi10701"700"_null__null__null__null_ftod_null__null__null_ ));
520520
DESCR("convert float4 to float8");
@@ -885,6 +885,8 @@ DATA(insert OID = 2334 ( array_agg_finalfn PGNSP PGUID 12 1 0 0 0 f f f f f f
885885
DESCR("aggregate final function");
886886
DATA(insertOID=2335 (array_aggPGNSPPGUID121000tfffffi102277"2283"_null__null__null__null_aggregate_dummy_null__null__null_ ));
887887
DESCR("concatenate aggregate input into an array");
888+
DATA(insertOID=3218 (width_bucketPGNSPPGUID121000fffftfi2023"2283 2277"_null__null__null__null_width_bucket_array_null__null__null_ ));
889+
DESCR("bucket number of operand given a sorted array of bucket lower bounds");
888890
DATA(insertOID=3816 (array_typanalyzePGNSPPGUID121000fffftfs1016"2281"_null__null__null__null_array_typanalyze_null__null__null_ ));
889891
DESCR("array typanalyze");
890892
DATA(insertOID=3817 (arraycontselPGNSPPGUID121000fffftfs40701"2281 26 2281 23"_null__null__null__null_arraycontsel_null__null__null_ ));
@@ -2301,7 +2303,7 @@ DESCR("trunc(x/y)");
23012303
DATA(insertOID=1980 (numeric_div_truncPGNSPPGUID121000fffftfi201700"1700 1700"_null__null__null__null_numeric_div_trunc_null__null__null_ ));
23022304
DESCR("trunc(x/y)");
23032305
DATA(insertOID=2170 (width_bucketPGNSPPGUID121000fffftfi4023"1700 1700 1700 23"_null__null__null__null_width_bucket_numeric_null__null__null_ ));
2304-
DESCR("bucket number of operand inequidepth histogram");
2306+
DESCR("bucket number of operand inequal-width histogram");
23052307

23062308
DATA(insertOID=1747 (time_pl_intervalPGNSPPGUID121000fffftfi201083"1083 1186"_null__null__null__null_time_pl_interval_null__null__null_ ));
23072309
DATA(insertOID=1748 (time_mi_intervalPGNSPPGUID121000fffftfi201083"1083 1186"_null__null__null__null_time_mi_interval_null__null__null_ ));

‎src/include/utils/array.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ extern Datum array_fill_with_lower_bounds(PG_FUNCTION_ARGS);
214214
externDatumarray_unnest(PG_FUNCTION_ARGS);
215215
externDatumarray_remove(PG_FUNCTION_ARGS);
216216
externDatumarray_replace(PG_FUNCTION_ARGS);
217+
externDatumwidth_bucket_array(PG_FUNCTION_ARGS);
217218

218219
externDatumarray_ref(ArrayType*array,intnSubscripts,int*indx,
219220
intarraytyplen,intelmlen,boolelmbyval,charelmalign,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp