Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb34e37b

Browse files
committed
Add sortsupport routines for text.
This provides a small but worthwhile speedup when sorting text, at leastin cases to which the sortsupport machinery applies.Robert Haas and Peter Geoghegan
1 parenta4287a6 commitb34e37b

File tree

5 files changed

+215
-11
lines changed

5 files changed

+215
-11
lines changed

‎src/backend/utils/adt/varlena.c

Lines changed: 210 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
#include"utils/builtins.h"
2929
#include"utils/bytea.h"
3030
#include"utils/lsyscache.h"
31+
#include"utils/memutils.h"
3132
#include"utils/pg_locale.h"
33+
#include"utils/sortsupport.h"
3234

3335

3436
/* GUC variable */
@@ -50,12 +52,32 @@ typedef struct
5052
intskiptable[256];/* skip distance for given mismatched char */
5153
}TextPositionState;
5254

55+
typedefstruct
56+
{
57+
char*buf1;/* 1st string */
58+
char*buf2;/* 2nd string */
59+
intbuflen1;
60+
intbuflen2;
61+
#ifdefHAVE_LOCALE_T
62+
pg_locale_tlocale;
63+
#endif
64+
}TextSortSupport;
65+
66+
/*
67+
* This should be large enough that most strings will fit, but small enough
68+
* that we feel comfortable putting it on the stack
69+
*/
70+
#defineTEXTBUFLEN1024
71+
5372
#defineDatumGetUnknownP(X)((unknown *) PG_DETOAST_DATUM(X))
5473
#defineDatumGetUnknownPCopy(X)((unknown *) PG_DETOAST_DATUM_COPY(X))
5574
#definePG_GETARG_UNKNOWN_P(n)DatumGetUnknownP(PG_GETARG_DATUM(n))
5675
#definePG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
5776
#definePG_RETURN_UNKNOWN_P(x)PG_RETURN_POINTER(x)
5877

78+
staticvoidbtsortsupport_worker(SortSupportssup,Oidcollid);
79+
staticintbttextfastcmp_c(Datumx,Datumy,SortSupportssup);
80+
staticintbttextfastcmp_locale(Datumx,Datumy,SortSupportssup);
5981
staticint32text_length(Datumstr);
6082
statictext*text_catenate(text*t1,text*t2);
6183
statictext*text_substring(Datumstr,
@@ -1356,10 +1378,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
13561378
}
13571379
else
13581380
{
1359-
#defineSTACKBUFLEN1024
1360-
1361-
chara1buf[STACKBUFLEN];
1362-
chara2buf[STACKBUFLEN];
1381+
chara1buf[TEXTBUFLEN];
1382+
chara2buf[TEXTBUFLEN];
13631383
char*a1p,
13641384
*a2p;
13651385

@@ -1393,24 +1413,24 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
13931413
inta2len;
13941414
intr;
13951415

1396-
if (len1 >=STACKBUFLEN /2)
1416+
if (len1 >=TEXTBUFLEN /2)
13971417
{
13981418
a1len=len1*2+2;
13991419
a1p=palloc(a1len);
14001420
}
14011421
else
14021422
{
1403-
a1len=STACKBUFLEN;
1423+
a1len=TEXTBUFLEN;
14041424
a1p=a1buf;
14051425
}
1406-
if (len2 >=STACKBUFLEN /2)
1426+
if (len2 >=TEXTBUFLEN /2)
14071427
{
14081428
a2len=len2*2+2;
14091429
a2p=palloc(a2len);
14101430
}
14111431
else
14121432
{
1413-
a2len=STACKBUFLEN;
1433+
a2len=TEXTBUFLEN;
14141434
a2p=a2buf;
14151435
}
14161436

@@ -1475,11 +1495,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
14751495
}
14761496
#endif/* WIN32 */
14771497

1478-
if (len1 >=STACKBUFLEN)
1498+
if (len1 >=TEXTBUFLEN)
14791499
a1p= (char*)palloc(len1+1);
14801500
else
14811501
a1p=a1buf;
1482-
if (len2 >=STACKBUFLEN)
1502+
if (len2 >=TEXTBUFLEN)
14831503
a2p= (char*)palloc(len2+1);
14841504
else
14851505
a2p=a2buf;
@@ -1683,6 +1703,186 @@ bttextcmp(PG_FUNCTION_ARGS)
16831703
PG_RETURN_INT32(result);
16841704
}
16851705

1706+
Datum
1707+
bttextsortsupport(PG_FUNCTION_ARGS)
1708+
{
1709+
SortSupportssup= (SortSupport)PG_GETARG_POINTER(0);
1710+
Oidcollid=ssup->ssup_collation;
1711+
MemoryContextoldcontext;
1712+
1713+
oldcontext=MemoryContextSwitchTo(ssup->ssup_cxt);
1714+
1715+
btsortsupport_worker(ssup,collid);
1716+
1717+
MemoryContextSwitchTo(oldcontext);
1718+
1719+
PG_RETURN_VOID();
1720+
}
1721+
1722+
staticvoid
1723+
btsortsupport_worker(SortSupportssup,Oidcollid)
1724+
{
1725+
TextSortSupport*tss;
1726+
1727+
/*
1728+
* If LC_COLLATE = C, we can make things quite a bit faster by using
1729+
* memcmp() rather than strcoll(). To minimize the per-comparison
1730+
* overhead, we make this decision just once for the whole sort.
1731+
*/
1732+
if (lc_collate_is_c(collid))
1733+
{
1734+
ssup->comparator=bttextfastcmp_c;
1735+
return;
1736+
}
1737+
1738+
/*
1739+
* WIN32 requires complex hacks when the database encoding is UTF-8 (except
1740+
* when using the "C" collation). For now, we don't optimize that case.
1741+
*/
1742+
#ifdefWIN32
1743+
if (GetDatabaseEncoding()==PG_UTF8)
1744+
return;
1745+
#endif
1746+
1747+
/*
1748+
* We may need a collation-sensitive comparison. To make things faster,
1749+
* we'll figure out the collation based on the locale id and cache the
1750+
* result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
1751+
* prepare one or two palloc'd buffers to use as temporary workspace. In
1752+
* the ad-hoc comparison case we only use palloc'd buffers when we need
1753+
* more space than we're comfortable allocating on the stack, but here we
1754+
* can keep the buffers around for the whole sort, so it makes sense to
1755+
* allocate them once and use them unconditionally.
1756+
*/
1757+
tss=palloc(sizeof(TextSortSupport));
1758+
#ifdefHAVE_LOCALE_T
1759+
tss->locale=0;
1760+
#endif
1761+
1762+
if (collid!=DEFAULT_COLLATION_OID)
1763+
{
1764+
if (!OidIsValid(collid))
1765+
{
1766+
/*
1767+
* This typically means that the parser could not resolve a
1768+
* conflict of implicit collations, so report it that way.
1769+
*/
1770+
ereport(ERROR,
1771+
(errcode(ERRCODE_INDETERMINATE_COLLATION),
1772+
errmsg("could not determine which collation to use for string comparison"),
1773+
errhint("Use the COLLATE clause to set the collation explicitly.")));
1774+
}
1775+
#ifdefHAVE_LOCALE_T
1776+
tss->locale=pg_newlocale_from_collation(collid);
1777+
#endif
1778+
}
1779+
1780+
tss->buf1=palloc(TEXTBUFLEN);
1781+
tss->buflen1=TEXTBUFLEN;
1782+
tss->buf2=palloc(TEXTBUFLEN);
1783+
tss->buflen2=TEXTBUFLEN;
1784+
1785+
ssup->ssup_extra=tss;
1786+
ssup->comparator=bttextfastcmp_locale;
1787+
}
1788+
1789+
/*
1790+
* sortsupport comparison func (for C locale case)
1791+
*/
1792+
staticint
1793+
bttextfastcmp_c(Datumx,Datumy,SortSupportssup)
1794+
{
1795+
text*arg1=DatumGetTextPP(x);
1796+
text*arg2=DatumGetTextPP(y);
1797+
char*a1p,
1798+
*a2p;
1799+
intlen1,
1800+
len2,
1801+
result;
1802+
1803+
a1p=VARDATA_ANY(arg1);
1804+
a2p=VARDATA_ANY(arg2);
1805+
1806+
len1=VARSIZE_ANY_EXHDR(arg1);
1807+
len2=VARSIZE_ANY_EXHDR(arg2);
1808+
1809+
result=memcmp(a1p,a2p,Min(len1,len2));
1810+
if ((result==0)&& (len1!=len2))
1811+
result= (len1<len2) ?-1 :1;
1812+
1813+
/* We can't afford to leak memory here. */
1814+
if (PointerGetDatum(arg1)!=x)
1815+
pfree(arg1);
1816+
if (PointerGetDatum(arg2)!=y)
1817+
pfree(arg2);
1818+
1819+
returnresult;
1820+
}
1821+
1822+
/*
1823+
* sortsupport comparison func (for locale case)
1824+
*/
1825+
staticint
1826+
bttextfastcmp_locale(Datumx,Datumy,SortSupportssup)
1827+
{
1828+
text*arg1=DatumGetTextPP(x);
1829+
text*arg2=DatumGetTextPP(y);
1830+
TextSortSupport*tss= (TextSortSupport*)ssup->ssup_extra;
1831+
1832+
/* working state */
1833+
char*a1p,
1834+
*a2p;
1835+
intlen1,
1836+
len2,
1837+
result;
1838+
1839+
a1p=VARDATA_ANY(arg1);
1840+
a2p=VARDATA_ANY(arg2);
1841+
1842+
len1=VARSIZE_ANY_EXHDR(arg1);
1843+
len2=VARSIZE_ANY_EXHDR(arg2);
1844+
1845+
if (len1 >=tss->buflen1)
1846+
{
1847+
pfree(tss->buf1);
1848+
tss->buflen1=Max(len1+1,Min(tss->buflen1*2,MaxAllocSize));
1849+
tss->buf1=MemoryContextAlloc(ssup->ssup_cxt,tss->buflen1);
1850+
}
1851+
if (len2 >=tss->buflen2)
1852+
{
1853+
pfree(tss->buf2);
1854+
tss->buflen1=Max(len2+1,Min(tss->buflen2*2,MaxAllocSize));
1855+
tss->buf2=MemoryContextAlloc(ssup->ssup_cxt,tss->buflen2);
1856+
}
1857+
1858+
memcpy(tss->buf1,a1p,len1);
1859+
tss->buf1[len1]='\0';
1860+
memcpy(tss->buf2,a2p,len2);
1861+
tss->buf2[len2]='\0';
1862+
1863+
#ifdefHAVE_LOCALE_T
1864+
if (tss->locale)
1865+
result=strcoll_l(tss->buf1,tss->buf2,tss->locale);
1866+
else
1867+
#endif
1868+
result=strcoll(tss->buf1,tss->buf2);
1869+
1870+
/*
1871+
* In some locales strcoll() can claim that nonidentical strings are equal.
1872+
* Believing that would be bad news for a number of reasons, so we follow
1873+
* Perl's lead and sort "equal" strings according to strcmp().
1874+
*/
1875+
if (result==0)
1876+
result=strcmp(tss->buf1,tss->buf2);
1877+
1878+
/* We can't afford to leak memory here. */
1879+
if (PointerGetDatum(arg1)!=x)
1880+
pfree(arg1);
1881+
if (PointerGetDatum(arg2)!=y)
1882+
pfree(arg2);
1883+
1884+
returnresult;
1885+
}
16861886

16871887
Datum
16881888
text_larger(PG_FUNCTION_ARGS)

‎src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/*yyyymmddN */
56-
#defineCATALOG_VERSION_NO201407151
56+
#defineCATALOG_VERSION_NO201408141
5757

5858
#endif

‎src/include/catalog/pg_amproc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ DATA(insert (1989 26 26 1 356 ));
122122
DATA(insert (1989262623134 ));
123123
DATA(insert (199130301404 ));
124124
DATA(insert (199425251360 ));
125+
DATA(insert (1994252523255 ));
125126
DATA(insert (19961083108311107 ));
126127
DATA(insert (20001266126611358 ));
127128
DATA(insert (20021562156211672 ));

‎src/include/catalog/pg_proc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,8 @@ DATA(insert OID = 3135 ( btnamesortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i
614614
DESCR("sort support");
615615
DATA(insertOID=360 (bttextcmpPGNSPPGUID121000fffftfi2023"25 25"_null__null__null__null_bttextcmp_null__null__null_ ));
616616
DESCR("less-equal-greater");
617+
DATA(insertOID=3255 (bttextsortsupportPGNSPPGUID121000fffftfi102278"2281"_null__null__null__null_bttextsortsupport_null__null__null_ ));
618+
DESCR("sort support");
617619
DATA(insertOID=377 (cash_cmpPGNSPPGUID121000fffftfi2023"790 790"_null__null__null__null_cash_cmp_null__null__null_ ));
618620
DESCR("less-equal-greater");
619621
DATA(insertOID=380 (btreltimecmpPGNSPPGUID121000fffftfi2023"703 703"_null__null__null__null_btreltimecmp_null__null__null_ ));

‎src/include/utils/builtins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ extern Datum bttintervalcmp(PG_FUNCTION_ARGS);
316316
externDatumbtcharcmp(PG_FUNCTION_ARGS);
317317
externDatumbtnamecmp(PG_FUNCTION_ARGS);
318318
externDatumbttextcmp(PG_FUNCTION_ARGS);
319+
externDatumbttextsortsupport(PG_FUNCTION_ARGS);
319320

320321
/*
321322
*Per-opclass sort support functions for new btrees. Like the

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp