Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit888f2ea

Browse files
committed
Add array_sample() and array_shuffle() functions.
These are useful in Monte Carlo applications.Martin Kalcher, reviewed/adjusted by Daniel Gustafsson and myselfDiscussion:https://postgr.es/m/9d160a44-7675-51e8-60cf-6d64b76db831@aboutsource.net
1 parentcd82e5c commit888f2ea

File tree

6 files changed

+284
-2
lines changed

6 files changed

+284
-2
lines changed

‎doc/src/sgml/func.sgml

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16053,7 +16053,7 @@ SELECT js,
1605316053
js IS JSON ARRAY "array?"
1605416054
FROM (VALUES
1605516055
('123'), ('"abc"'), ('{"a": "b"}'), ('[1,2]'),('abc')) foo(js);
16056-
js | json? | scalar? | object? | array?
16056+
js | json? | scalar? | object? | array?
1605716057
------------+-------+---------+---------+--------
1605816058
123 | t | t | f | f
1605916059
"abc" | t | t | f | f
@@ -18777,6 +18777,48 @@ SELECT NULLIF(value, '(none)') ...
1877718777
</para></entry>
1877818778
</row>
1877918779

18780+
<row>
18781+
<entry role="func_table_entry"><para role="func_signature">
18782+
<indexterm>
18783+
<primary>array_sample</primary>
18784+
</indexterm>
18785+
<function>array_sample</function> ( <parameter>array</parameter> <type>anyarray</type>, <parameter>n</parameter> <type>integer</type> )
18786+
<returnvalue>anyarray</returnvalue>
18787+
</para>
18788+
<para>
18789+
Returns an array of <parameter>n</parameter> items randomly selected
18790+
from <parameter>array</parameter>. <parameter>n</parameter> may not
18791+
exceed the length of <parameter>array</parameter>'s first dimension.
18792+
If <parameter>array</parameter> is multi-dimensional,
18793+
an <quote>item</quote> is a slice having a given first subscript.
18794+
</para>
18795+
<para>
18796+
<literal>array_sample(ARRAY[1,2,3,4,5,6], 3)</literal>
18797+
<returnvalue>{2,6,1}</returnvalue>
18798+
</para>
18799+
<para>
18800+
<literal>array_sample(ARRAY[[1,2],[3,4],[5,6]], 2)</literal>
18801+
<returnvalue>{{5,6},{1,2}}</returnvalue>
18802+
</para></entry>
18803+
</row>
18804+
18805+
<row>
18806+
<entry role="func_table_entry"><para role="func_signature">
18807+
<indexterm>
18808+
<primary>array_shuffle</primary>
18809+
</indexterm>
18810+
<function>array_shuffle</function> ( <type>anyarray</type> )
18811+
<returnvalue>anyarray</returnvalue>
18812+
</para>
18813+
<para>
18814+
Randomly shuffles the first dimension of the array.
18815+
</para>
18816+
<para>
18817+
<literal>array_shuffle(ARRAY[[1,2],[3,4],[5,6]])</literal>
18818+
<returnvalue>{{5,6},{1,2},{3,4}}</returnvalue>
18819+
</para></entry>
18820+
</row>
18821+
1878018822
<row>
1878118823
<entry role="func_table_entry"><para role="func_signature">
1878218824
<indexterm id="function-array-to-string">

‎src/backend/utils/adt/array_userfuncs.c

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include"catalog/pg_type.h"
1616
#include"libpq/pqformat.h"
1717
#include"common/int.h"
18+
#include"common/pg_prng.h"
1819
#include"port/pg_bitutils.h"
1920
#include"utils/array.h"
2021
#include"utils/datum.h"
@@ -1525,3 +1526,168 @@ array_positions(PG_FUNCTION_ARGS)
15251526

15261527
PG_RETURN_DATUM(makeArrayResult(astate,CurrentMemoryContext));
15271528
}
1529+
1530+
/*
1531+
* array_shuffle_n
1532+
*Return a copy of array with n randomly chosen items.
1533+
*
1534+
* The number of items must not exceed the size of the first dimension of the
1535+
* array. We preserve the first dimension's lower bound if keep_lb,
1536+
* else it's set to 1. Lower-order dimensions are preserved in any case.
1537+
*
1538+
* NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info
1539+
* from the system catalogs, given only the elmtyp. However, the caller is
1540+
* in a better position to cache this info across multiple calls.
1541+
*/
1542+
staticArrayType*
1543+
array_shuffle_n(ArrayType*array,intn,boolkeep_lb,
1544+
Oidelmtyp,TypeCacheEntry*typentry)
1545+
{
1546+
ArrayType*result;
1547+
intndim,
1548+
*dims,
1549+
*lbs,
1550+
nelm,
1551+
nitem,
1552+
rdims[MAXDIM],
1553+
rlbs[MAXDIM];
1554+
int16elmlen;
1555+
boolelmbyval;
1556+
charelmalign;
1557+
Datum*elms,
1558+
*ielms;
1559+
bool*nuls,
1560+
*inuls;
1561+
1562+
ndim=ARR_NDIM(array);
1563+
dims=ARR_DIMS(array);
1564+
lbs=ARR_LBOUND(array);
1565+
1566+
elmlen=typentry->typlen;
1567+
elmbyval=typentry->typbyval;
1568+
elmalign=typentry->typalign;
1569+
1570+
/* If the target array is empty, exit fast */
1571+
if (ndim<1||dims[0]<1||n<1)
1572+
returnconstruct_empty_array(elmtyp);
1573+
1574+
deconstruct_array(array,elmtyp,elmlen,elmbyval,elmalign,
1575+
&elms,&nuls,&nelm);
1576+
1577+
nitem=dims[0];/* total number of items */
1578+
nelm /=nitem;/* number of elements per item */
1579+
1580+
Assert(n <=nitem);/* else it's caller error */
1581+
1582+
/*
1583+
* Shuffle array using Fisher-Yates algorithm. Scan the array and swap
1584+
* current item (nelm datums starting at ielms) with a randomly chosen
1585+
* later item (nelm datums starting at jelms) in each iteration. We can
1586+
* stop once we've done n iterations; then first n items are the result.
1587+
*/
1588+
ielms=elms;
1589+
inuls=nuls;
1590+
for (inti=0;i<n;i++)
1591+
{
1592+
intj= (int)pg_prng_uint64_range(&pg_global_prng_state,i,nitem-1)*nelm;
1593+
Datum*jelms=elms+j;
1594+
bool*jnuls=nuls+j;
1595+
1596+
/* Swap i'th and j'th items; advance ielms/inuls to next item */
1597+
for (intk=0;k<nelm;k++)
1598+
{
1599+
Datumelm=*ielms;
1600+
boolnul=*inuls;
1601+
1602+
*ielms++=*jelms;
1603+
*inuls++=*jnuls;
1604+
*jelms++=elm;
1605+
*jnuls++=nul;
1606+
}
1607+
}
1608+
1609+
/* Set up dimensions of the result */
1610+
memcpy(rdims,dims,ndim*sizeof(int));
1611+
memcpy(rlbs,lbs,ndim*sizeof(int));
1612+
rdims[0]=n;
1613+
if (!keep_lb)
1614+
rlbs[0]=1;
1615+
1616+
result=construct_md_array(elms,nuls,ndim,rdims,rlbs,
1617+
elmtyp,elmlen,elmbyval,elmalign);
1618+
1619+
pfree(elms);
1620+
pfree(nuls);
1621+
1622+
returnresult;
1623+
}
1624+
1625+
/*
1626+
* array_shuffle
1627+
*
1628+
* Returns an array with the same dimensions as the input array, with its
1629+
* first-dimension elements in random order.
1630+
*/
1631+
Datum
1632+
array_shuffle(PG_FUNCTION_ARGS)
1633+
{
1634+
ArrayType*array=PG_GETARG_ARRAYTYPE_P(0);
1635+
ArrayType*result;
1636+
Oidelmtyp;
1637+
TypeCacheEntry*typentry;
1638+
1639+
/*
1640+
* There is no point in shuffling empty arrays or arrays with less than
1641+
* two items.
1642+
*/
1643+
if (ARR_NDIM(array)<1||ARR_DIMS(array)[0]<2)
1644+
PG_RETURN_ARRAYTYPE_P(array);
1645+
1646+
elmtyp=ARR_ELEMTYPE(array);
1647+
typentry= (TypeCacheEntry*)fcinfo->flinfo->fn_extra;
1648+
if (typentry==NULL||typentry->type_id!=elmtyp)
1649+
{
1650+
typentry=lookup_type_cache(elmtyp,0);
1651+
fcinfo->flinfo->fn_extra= (void*)typentry;
1652+
}
1653+
1654+
result=array_shuffle_n(array,ARR_DIMS(array)[0], true,elmtyp,typentry);
1655+
1656+
PG_RETURN_ARRAYTYPE_P(result);
1657+
}
1658+
1659+
/*
1660+
* array_sample
1661+
*
1662+
* Returns an array of n randomly chosen first-dimension elements
1663+
* from the input array.
1664+
*/
1665+
Datum
1666+
array_sample(PG_FUNCTION_ARGS)
1667+
{
1668+
ArrayType*array=PG_GETARG_ARRAYTYPE_P(0);
1669+
intn=PG_GETARG_INT32(1);
1670+
ArrayType*result;
1671+
Oidelmtyp;
1672+
TypeCacheEntry*typentry;
1673+
intnitem;
1674+
1675+
nitem= (ARR_NDIM(array)<1) ?0 :ARR_DIMS(array)[0];
1676+
1677+
if (n<0||n>nitem)
1678+
ereport(ERROR,
1679+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1680+
errmsg("sample size must be between 0 and %d",nitem)));
1681+
1682+
elmtyp=ARR_ELEMTYPE(array);
1683+
typentry= (TypeCacheEntry*)fcinfo->flinfo->fn_extra;
1684+
if (typentry==NULL||typentry->type_id!=elmtyp)
1685+
{
1686+
typentry=lookup_type_cache(elmtyp,0);
1687+
fcinfo->flinfo->fn_extra= (void*)typentry;
1688+
}
1689+
1690+
result=array_shuffle_n(array,n, false,elmtyp,typentry);
1691+
1692+
PG_RETURN_ARRAYTYPE_P(result);
1693+
}

‎src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@
5757
*/
5858

5959
/*yyyymmddN */
60-
#defineCATALOG_VERSION_NO202304051
60+
#defineCATALOG_VERSION_NO202304071
6161

6262
#endif

‎src/include/catalog/pg_proc.dat

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,6 +1717,12 @@
17171717
{ oid => '6172', descr => 'remove last N elements of array',
17181718
proname => 'trim_array', prorettype => 'anyarray',
17191719
proargtypes => 'anyarray int4', prosrc => 'trim_array' },
1720+
{ oid => '8464', descr => 'shuffle array',
1721+
proname => 'array_shuffle', provolatile => 'v', prorettype => 'anyarray',
1722+
proargtypes => 'anyarray', prosrc => 'array_shuffle' },
1723+
{ oid => '8465', descr => 'take samples from array',
1724+
proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray',
1725+
proargtypes => 'anyarray int4', prosrc => 'array_sample' },
17201726
{ oid => '3816', descr => 'array typanalyze',
17211727
proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
17221728
proargtypes => 'internal', prosrc => 'array_typanalyze' },

‎src/test/regress/expected/arrays.out

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2472,3 +2472,57 @@ SELECT trim_array(ARRAY[1, 2, 3], 10); -- fail
24722472
ERROR: number of elements to trim must be between 0 and 3
24732473
SELECT trim_array(ARRAY[]::int[], 1); -- fail
24742474
ERROR: number of elements to trim must be between 0 and 0
2475+
-- array_shuffle
2476+
SELECT array_shuffle('{1,2,3,4,5,6}'::int[]) <@ '{1,2,3,4,5,6}';
2477+
?column?
2478+
----------
2479+
t
2480+
(1 row)
2481+
2482+
SELECT array_shuffle('{1,2,3,4,5,6}'::int[]) @> '{1,2,3,4,5,6}';
2483+
?column?
2484+
----------
2485+
t
2486+
(1 row)
2487+
2488+
SELECT array_dims(array_shuffle('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]));
2489+
array_dims
2490+
-------------
2491+
[-1:2][2:3]
2492+
(1 row)
2493+
2494+
SELECT array_dims(array_shuffle('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[]));
2495+
array_dims
2496+
-----------------
2497+
[1:3][1:2][1:2]
2498+
(1 row)
2499+
2500+
-- array_sample
2501+
SELECT array_sample('{1,2,3,4,5,6}'::int[], 3) <@ '{1,2,3,4,5,6}';
2502+
?column?
2503+
----------
2504+
t
2505+
(1 row)
2506+
2507+
SELECT array_length(array_sample('{1,2,3,4,5,6}'::int[], 3), 1);
2508+
array_length
2509+
--------------
2510+
3
2511+
(1 row)
2512+
2513+
SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[], 3));
2514+
array_dims
2515+
------------
2516+
[1:3][2:3]
2517+
(1 row)
2518+
2519+
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
2520+
array_dims
2521+
-----------------
2522+
[1:2][1:2][1:2]
2523+
(1 row)
2524+
2525+
SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
2526+
ERROR: sample size must be between 0 and 6
2527+
SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
2528+
ERROR: sample size must be between 0 and 6

‎src/test/regress/sql/arrays.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,3 +761,17 @@ FROM
761761
SELECT trim_array(ARRAY[1,2,3],-1);-- fail
762762
SELECT trim_array(ARRAY[1,2,3],10);-- fail
763763
SELECT trim_array(ARRAY[]::int[],1);-- fail
764+
765+
-- array_shuffle
766+
SELECT array_shuffle('{1,2,3,4,5,6}'::int[])<@'{1,2,3,4,5,6}';
767+
SELECT array_shuffle('{1,2,3,4,5,6}'::int[]) @>'{1,2,3,4,5,6}';
768+
SELECT array_dims(array_shuffle('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]));
769+
SELECT array_dims(array_shuffle('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[]));
770+
771+
-- array_sample
772+
SELECT array_sample('{1,2,3,4,5,6}'::int[],3)<@'{1,2,3,4,5,6}';
773+
SELECT array_length(array_sample('{1,2,3,4,5,6}'::int[],3),1);
774+
SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[],3));
775+
SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[],2));
776+
SELECT array_sample('{1,2,3,4,5,6}'::int[],-1);-- fail
777+
SELECT array_sample('{1,2,3,4,5,6}'::int[],7);--fail

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp