@@ -62,6 +62,9 @@ typedef struct
6262char * buf2 ;/* 2nd string, or abbreviation strxfrm() buf */
6363int buflen1 ;
6464int buflen2 ;
65+ int last_len1 ;/* Length of last buf1 string/strxfrm() blob */
66+ int last_len2 ;/* Length of last buf2 string/strxfrm() blob */
67+ int last_returned ;/* Last comparison result (cache) */
6568bool collate_c ;
6669hyperLogLogState abbr_card ;/* Abbreviated key cardinality state */
6770hyperLogLogState full_card ;/* Full key cardinality state */
@@ -1832,9 +1835,20 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
18321835tss -> buflen1 = TEXTBUFLEN ;
18331836tss -> buf2 = palloc (TEXTBUFLEN );
18341837tss -> buflen2 = TEXTBUFLEN ;
1838+ /* Start with invalid values */
1839+ tss -> last_len1 = -1 ;
1840+ tss -> last_len2 = -1 ;
18351841#ifdef HAVE_LOCALE_T
18361842tss -> locale = locale ;
18371843#endif
1844+ /*
1845+ * To avoid somehow confusing a strxfrm() blob and an original string
1846+ * within bttextfastcmp_locale(), initialize last returned cache to a
1847+ * sentinel value. A platform-specific actual strcoll() return value
1848+ * of INT_MIN seems unlikely, but if that occurs it will not cause
1849+ * wrong answers.
1850+ */
1851+ tss -> last_returned = INT_MIN ;
18381852tss -> collate_c = collate_c ;
18391853ssup -> ssup_extra = tss ;
18401854
@@ -1897,6 +1911,7 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
18971911{
18981912text * arg1 = DatumGetTextPP (x );
18991913text * arg2 = DatumGetTextPP (y );
1914+ bool arg1_match ;
19001915TextSortSupport * tss = (TextSortSupport * )ssup -> ssup_extra ;
19011916
19021917/* working state */
@@ -1915,6 +1930,11 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19151930/* Fast pre-check for equality, as discussed in varstr_cmp() */
19161931if (len1 == len2 && memcmp (a1p ,a2p ,len1 )== 0 )
19171932{
1933+ /*
1934+ * No change in buf1 or buf2 contents, so avoid changing last_len1 or
1935+ * last_len2. Existing contents of buffers might still be used by next
1936+ * call.
1937+ */
19181938result = 0 ;
19191939gotodone ;
19201940}
@@ -1932,10 +1952,43 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19321952tss -> buf2 = MemoryContextAlloc (ssup -> ssup_cxt ,tss -> buflen2 );
19331953}
19341954
1935- memcpy (tss -> buf1 ,a1p ,len1 );
1936- tss -> buf1 [len1 ]= '\0' ;
1937- memcpy (tss -> buf2 ,a2p ,len2 );
1938- tss -> buf2 [len2 ]= '\0' ;
1955+ /*
1956+ * We're likely to be asked to compare the same strings repeatedly, and
1957+ * memcmp() is so much cheaper than strcoll() that it pays to try to cache
1958+ * comparisons, even though in general there is no reason to think that
1959+ * that will work out (every text datum may be unique). Caching does not
1960+ * slow things down measurably when it doesn't work out, and can speed
1961+ * things up by rather a lot when it does. In part, this is because the
1962+ * memcmp() compares data from cachelines that are needed in L1 cache even
1963+ * when the last comparison's result cannot be reused.
1964+ */
1965+ arg1_match = true;
1966+ if (len1 != tss -> last_len1 || memcmp (tss -> buf1 ,a1p ,len1 )!= 0 )
1967+ {
1968+ arg1_match = false;
1969+ memcpy (tss -> buf1 ,a1p ,len1 );
1970+ tss -> buf1 [len1 ]= '\0' ;
1971+ tss -> last_len1 = len1 ;
1972+ }
1973+
1974+ /*
1975+ * If we're comparing the same two strings as last time, we can return the
1976+ * same answer without calling strcoll() again. This is more likely than
1977+ * it seems (at least with moderate to low cardinality sets), because
1978+ * quicksort compares the same pivot against many values.
1979+ */
1980+ if (len2 != tss -> last_len2 || memcmp (tss -> buf2 ,a2p ,len2 )!= 0 )
1981+ {
1982+ memcpy (tss -> buf2 ,a2p ,len2 );
1983+ tss -> buf2 [len2 ]= '\0' ;
1984+ tss -> last_len2 = len2 ;
1985+ }
1986+ else if (arg1_match && tss -> last_returned != INT_MIN )
1987+ {
1988+ /* Use result cached following last actual strcoll() call */
1989+ result = tss -> last_returned ;
1990+ gotodone ;
1991+ }
19391992
19401993#ifdef HAVE_LOCALE_T
19411994if (tss -> locale )
@@ -1952,6 +2005,8 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
19522005if (result == 0 )
19532006result = strcmp (tss -> buf1 ,tss -> buf2 );
19542007
2008+ /* Cache result, perhaps saving an expensive strcoll() call next time */
2009+ tss -> last_returned = result ;
19552010done :
19562011/* We can't afford to leak memory here. */
19572012if (PointerGetDatum (arg1 )!= x )
@@ -2034,9 +2089,19 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
20342089tss -> buf1 = palloc (tss -> buflen1 );
20352090}
20362091
2092+ /* Might be able to reuse strxfrm() blob from last call */
2093+ if (tss -> last_len1 == len &&
2094+ memcmp (tss -> buf1 ,authoritative_data ,len )== 0 )
2095+ {
2096+ memcpy (pres ,tss -> buf2 ,Min (sizeof (Datum ),tss -> last_len2 ));
2097+ /* No change affecting cardinality, so no hashing required */
2098+ gotodone ;
2099+ }
2100+
20372101/* Just like strcoll(), strxfrm() expects a NUL-terminated string */
20382102memcpy (tss -> buf1 ,authoritative_data ,len );
20392103tss -> buf1 [len ]= '\0' ;
2104+ tss -> last_len1 = len ;
20402105
20412106for (;;)
20422107{
@@ -2048,6 +2113,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
20482113#endif
20492114bsize = strxfrm (tss -> buf2 ,tss -> buf1 ,tss -> buflen2 );
20502115
2116+ tss -> last_len2 = bsize ;
20512117if (bsize < tss -> buflen2 )
20522118break ;
20532119
@@ -2105,6 +2171,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
21052171
21062172addHyperLogLog (& tss -> abbr_card ,hash );
21072173
2174+ done :
21082175/*
21092176 * Byteswap on little-endian machines.
21102177 *