Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0e57b4d

Browse files
committed
Speed up text sorts where the same strings occur multiple times.
Cache strxfrm() blobs across calls made to the text SortSupportabbreviation routine. This can speed up sorting if the same stringneeds to be abbreviated many times in a row.Also, cache the result of the previous strcoll() comparison, so thatif we're asked to compare the same strings agin, we do need to callstrcoll() again.Perhaps surprisingly, these optimizations don't seem to hurt even whenthey don't help. memcmp() is really cheap compared to strcoll() orstrxfrm().Peter Geoghegan, reviewed by me.
1 parentbfb54ff commit0e57b4d

File tree

1 file changed

+71
-4
lines changed

1 file changed

+71
-4
lines changed

‎src/backend/utils/adt/varlena.c

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ typedef struct
6262
char*buf2;/* 2nd string, or abbreviation strxfrm() buf */
6363
intbuflen1;
6464
intbuflen2;
65+
intlast_len1;/* Length of last buf1 string/strxfrm() blob */
66+
intlast_len2;/* Length of last buf2 string/strxfrm() blob */
67+
intlast_returned;/* Last comparison result (cache) */
6568
boolcollate_c;
6669
hyperLogLogStateabbr_card;/* Abbreviated key cardinality state */
6770
hyperLogLogStatefull_card;/* Full key cardinality state */
@@ -1832,9 +1835,20 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
18321835
tss->buflen1=TEXTBUFLEN;
18331836
tss->buf2=palloc(TEXTBUFLEN);
18341837
tss->buflen2=TEXTBUFLEN;
1838+
/* Start with invalid values */
1839+
tss->last_len1=-1;
1840+
tss->last_len2=-1;
18351841
#ifdefHAVE_LOCALE_T
18361842
tss->locale=locale;
18371843
#endif
1844+
/*
1845+
* To avoid somehow confusing a strxfrm() blob and an original string
1846+
* within bttextfastcmp_locale(), initialize last returned cache to a
1847+
* sentinel value. A platform-specific actual strcoll() return value
1848+
* of INT_MIN seems unlikely, but if that occurs it will not cause
1849+
* wrong answers.
1850+
*/
1851+
tss->last_returned=INT_MIN;
18381852
tss->collate_c=collate_c;
18391853
ssup->ssup_extra=tss;
18401854

@@ -1897,6 +1911,7 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
18971911
{
18981912
text*arg1=DatumGetTextPP(x);
18991913
text*arg2=DatumGetTextPP(y);
1914+
boolarg1_match;
19001915
TextSortSupport*tss= (TextSortSupport*)ssup->ssup_extra;
19011916

19021917
/* working state */
@@ -1915,6 +1930,11 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19151930
/* Fast pre-check for equality, as discussed in varstr_cmp() */
19161931
if (len1==len2&&memcmp(a1p,a2p,len1)==0)
19171932
{
1933+
/*
1934+
* No change in buf1 or buf2 contents, so avoid changing last_len1 or
1935+
* last_len2. Existing contents of buffers might still be used by next
1936+
* call.
1937+
*/
19181938
result=0;
19191939
gotodone;
19201940
}
@@ -1932,10 +1952,43 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19321952
tss->buf2=MemoryContextAlloc(ssup->ssup_cxt,tss->buflen2);
19331953
}
19341954

1935-
memcpy(tss->buf1,a1p,len1);
1936-
tss->buf1[len1]='\0';
1937-
memcpy(tss->buf2,a2p,len2);
1938-
tss->buf2[len2]='\0';
1955+
/*
1956+
* We're likely to be asked to compare the same strings repeatedly, and
1957+
* memcmp() is so much cheaper than strcoll() that it pays to try to cache
1958+
* comparisons, even though in general there is no reason to think that
1959+
* that will work out (every text datum may be unique). Caching does not
1960+
* slow things down measurably when it doesn't work out, and can speed
1961+
* things up by rather a lot when it does. In part, this is because the
1962+
* memcmp() compares data from cachelines that are needed in L1 cache even
1963+
* when the last comparison's result cannot be reused.
1964+
*/
1965+
arg1_match= true;
1966+
if (len1!=tss->last_len1||memcmp(tss->buf1,a1p,len1)!=0)
1967+
{
1968+
arg1_match= false;
1969+
memcpy(tss->buf1,a1p,len1);
1970+
tss->buf1[len1]='\0';
1971+
tss->last_len1=len1;
1972+
}
1973+
1974+
/*
1975+
* If we're comparing the same two strings as last time, we can return the
1976+
* same answer without calling strcoll() again. This is more likely than
1977+
* it seems (at least with moderate to low cardinality sets), because
1978+
* quicksort compares the same pivot against many values.
1979+
*/
1980+
if (len2!=tss->last_len2||memcmp(tss->buf2,a2p,len2)!=0)
1981+
{
1982+
memcpy(tss->buf2,a2p,len2);
1983+
tss->buf2[len2]='\0';
1984+
tss->last_len2=len2;
1985+
}
1986+
elseif (arg1_match&&tss->last_returned!=INT_MIN)
1987+
{
1988+
/* Use result cached following last actual strcoll() call */
1989+
result=tss->last_returned;
1990+
gotodone;
1991+
}
19391992

19401993
#ifdefHAVE_LOCALE_T
19411994
if (tss->locale)
@@ -1952,6 +2005,8 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19522005
if (result==0)
19532006
result=strcmp(tss->buf1,tss->buf2);
19542007

2008+
/* Cache result, perhaps saving an expensive strcoll() call next time */
2009+
tss->last_returned=result;
19552010
done:
19562011
/* We can't afford to leak memory here. */
19572012
if (PointerGetDatum(arg1)!=x)
@@ -2034,9 +2089,19 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
20342089
tss->buf1=palloc(tss->buflen1);
20352090
}
20362091

2092+
/* Might be able to reuse strxfrm() blob from last call */
2093+
if (tss->last_len1==len&&
2094+
memcmp(tss->buf1,authoritative_data,len)==0)
2095+
{
2096+
memcpy(pres,tss->buf2,Min(sizeof(Datum),tss->last_len2));
2097+
/* No change affecting cardinality, so no hashing required */
2098+
gotodone;
2099+
}
2100+
20372101
/* Just like strcoll(), strxfrm() expects a NUL-terminated string */
20382102
memcpy(tss->buf1,authoritative_data,len);
20392103
tss->buf1[len]='\0';
2104+
tss->last_len1=len;
20402105

20412106
for (;;)
20422107
{
@@ -2048,6 +2113,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
20482113
#endif
20492114
bsize=strxfrm(tss->buf2,tss->buf1,tss->buflen2);
20502115

2116+
tss->last_len2=bsize;
20512117
if (bsize<tss->buflen2)
20522118
break;
20532119

@@ -2105,6 +2171,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
21052171

21062172
addHyperLogLog(&tss->abbr_card,hash);
21072173

2174+
done:
21082175
/*
21092176
* Byteswap on little-endian machines.
21102177
*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp