Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc64d0cd

Browse files
committed
Use perfect hashing, instead of binary search, for keyword lookup.
We've been speculating for a long time that hash-based keyword lookupought to be faster than binary search, but up to now we hadn't founda suitable tool for generating the hash function. Joerg Sonnenbergerprovided the inspiration, and sample code, to show us that rolling ourown generator wasn't a ridiculous idea. Hence, do that.The method used here requires a lookup table of approximately 4 bytesper keyword, but that's less than what we saved in the predecessor commitafb0d07, so it's not a big problem. The time savings is indeedsignificant: preliminary testing suggests that the total time for rawparsing (flex + bison phases) drops by ~20%.Patch by me, but it owes its existence to Joerg Sonnenberger;thanks also to John Naylor for review.Discussion:https://postgr.es/m/20190103163340.GA15803@britannica.bec.de
1 parent5d59a6c commitc64d0cd

File tree

14 files changed

+516
-107
lines changed

14 files changed

+516
-107
lines changed

‎src/common/Makefile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o file_utils.o restricted_token.o
6363
OBJS_SHLIB =$(OBJS_FRONTEND:%.o=%_shlib.o)
6464
OBJS_SRV =$(OBJS_COMMON:%.o=%_srv.o)
6565

66+
# where to find gen_keywordlist.pl and subsidiary files
67+
TOOLSDIR =$(top_srcdir)/src/tools
68+
GEN_KEYWORDLIST =$(PERL) -I$(TOOLSDIR)$(TOOLSDIR)/gen_keywordlist.pl
69+
GEN_KEYWORDLIST_DEPS =$(TOOLSDIR)/gen_keywordlist.pl$(TOOLSDIR)/PerfectHash.pm
70+
6671
all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
6772

6873
distprep: kwlist_d.h
@@ -118,8 +123,8 @@ libpgcommon_srv.a: $(OBJS_SRV)
118123
$(CC)$(CFLAGS)$(subst -DFRONTEND,,$(CPPFLAGS)) -c$< -o$@
119124

120125
# generate SQL keyword lookup table to be included into keywords*.o.
121-
kwlist_d.h:$(top_srcdir)/src/include/parser/kwlist.h$(top_srcdir)/src/tools/gen_keywordlist.pl
122-
$(PERL)$(top_srcdir)/src/tools/gen_keywordlist.pl --extern$<
126+
kwlist_d.h:$(top_srcdir)/src/include/parser/kwlist.h$(GEN_KEYWORDLIST_DEPS)
127+
$(GEN_KEYWORDLIST) --extern$<
123128

124129
# Dependencies of keywords*.o need to be managed explicitly to make sure
125130
# that you don't get broken parsing code, even in a non-enable-depend build.

‎src/common/kwlookup.c

Lines changed: 32 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -35,60 +35,51 @@
3535
* receive a different case-normalization mapping.
3636
*/
3737
int
38-
ScanKeywordLookup(constchar*text,
38+
ScanKeywordLookup(constchar*str,
3939
constScanKeywordList*keywords)
4040
{
41-
intlen,
42-
i;
43-
charword[NAMEDATALEN];
44-
constchar*kw_string;
45-
constuint16*kw_offsets;
46-
constuint16*low;
47-
constuint16*high;
48-
49-
len=strlen(text);
41+
size_tlen;
42+
inth;
43+
constchar*kw;
5044

45+
/*
46+
* Reject immediately if too long to be any keyword. This saves useless
47+
* hashing and downcasing work on long strings.
48+
*/
49+
len=strlen(str);
5150
if (len>keywords->max_kw_len)
52-
return-1;/* too long to be any keyword */
53-
54-
/* We assume all keywords are shorter than NAMEDATALEN. */
55-
Assert(len<NAMEDATALEN);
51+
return-1;
5652

5753
/*
58-
* Apply an ASCII-only downcasing. We must not use tolower() since it may
59-
* produce the wrong translation in some locales (eg, Turkish).
54+
* Compute the hash function. We assume it was generated to produce
55+
* case-insensitive results. Since it's a perfect hash, we need only
56+
* match to the specific keyword it identifies.
6057
*/
61-
for (i=0;i<len;i++)
62-
{
63-
charch=text[i];
58+
h=keywords->hash(str,len);
6459

65-
if (ch >='A'&&ch <='Z')
66-
ch+='a'-'A';
67-
word[i]=ch;
68-
}
69-
word[len]='\0';
60+
/* An out-of-range result implies no match */
61+
if (h<0||h >=keywords->num_keywords)
62+
return-1;
7063

7164
/*
72-
* Now do a binary search using plain strcmp() comparison.
65+
* Compare character-by-character to see if we have a match, applying an
66+
* ASCII-only downcasing to the input characters. We must not use
67+
* tolower() since it may produce the wrong translation in some locales
68+
* (eg, Turkish).
7369
*/
74-
kw_string=keywords->kw_string;
75-
kw_offsets=keywords->kw_offsets;
76-
low=kw_offsets;
77-
high=kw_offsets+ (keywords->num_keywords-1);
78-
while (low <=high)
70+
kw=GetScanKeyword(h,keywords);
71+
while (*str!='\0')
7972
{
80-
constuint16*middle;
81-
intdifference;
73+
charch=*str++;
8274

83-
middle=low+ (high-low) /2;
84-
difference=strcmp(kw_string+*middle,word);
85-
if (difference==0)
86-
returnmiddle-kw_offsets;
87-
elseif (difference<0)
88-
low=middle+1;
89-
else
90-
high=middle-1;
75+
if (ch >='A'&&ch <='Z')
76+
ch+='a'-'A';
77+
if (ch!=*kw++)
78+
return-1;
9179
}
80+
if (*kw!='\0')
81+
return-1;
9282

93-
return-1;
83+
/* Success! */
84+
returnh;
9485
}

‎src/include/common/kwlookup.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
#ifndefKWLOOKUP_H
1515
#defineKWLOOKUP_H
1616

17+
/* Hash function used by ScanKeywordLookup */
18+
typedefint (*ScanKeywordHashFunc) (constvoid*key,size_tkeylen);
19+
1720
/*
1821
* This struct contains the data needed by ScanKeywordLookup to perform a
1922
* search within a set of keywords. The contents are typically generated by
@@ -23,6 +26,7 @@ typedef struct ScanKeywordList
2326
{
2427
constchar*kw_string;/* all keywords in order, separated by \0 */
2528
constuint16*kw_offsets;/* offsets to the start of each keyword */
29+
ScanKeywordHashFunchash;/* perfect hash function for keywords */
2630
intnum_keywords;/* number of keywords */
2731
intmax_kw_len;/* length of longest keyword */
2832
}ScanKeywordList;

‎src/include/parser/kwlist.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@
2121
/*
2222
* List of keyword (name, token-value, category) entries.
2323
*
24-
* !!WARNING!!: This list must be sorted by ASCII name, because binary
25-
* search is used to locate entries.
24+
* Note: gen_keywordlist.pl requires the entries to appear in ASCII order.
2625
*/
2726

2827
/* name, value, category */

‎src/interfaces/ecpg/preproc/Makefile

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ OBJS=preproc.o pgc.o type.o ecpg.o output.o parser.o \
2828
keywords.o c_keywords.o ecpg_keywords.o typename.o descriptor.o variable.o\
2929
$(WIN32RES)
3030

31-
GEN_KEYWORDLIST =$(top_srcdir)/src/tools/gen_keywordlist.pl
31+
# where to find gen_keywordlist.pl and subsidiary files
32+
TOOLSDIR =$(top_srcdir)/src/tools
33+
GEN_KEYWORDLIST =$(PERL) -I$(TOOLSDIR)$(TOOLSDIR)/gen_keywordlist.pl
34+
GEN_KEYWORDLIST_DEPS =$(TOOLSDIR)/gen_keywordlist.pl$(TOOLSDIR)/PerfectHash.pm
3235

3336
# Suppress parallel build to avoid a bug in GNU make 3.82
3437
# (see comments in ../Makefile)
@@ -56,11 +59,11 @@ preproc.y: ../../../backend/parser/gram.y parse.pl ecpg.addons ecpg.header ecpg.
5659
$(PERL)$(srcdir)/check_rules.pl$(srcdir)$<
5760

5861
# generate keyword headers
59-
c_kwlist_d.h: c_kwlist.h$(GEN_KEYWORDLIST)
60-
$(PERL)$(GEN_KEYWORDLIST) --varname ScanCKeywords$<
62+
c_kwlist_d.h: c_kwlist.h$(GEN_KEYWORDLIST_DEPS)
63+
$(GEN_KEYWORDLIST) --varname ScanCKeywords --no-case-fold$<
6164

62-
ecpg_kwlist_d.h: ecpg_kwlist.h$(GEN_KEYWORDLIST)
63-
$(PERL)$(GEN_KEYWORDLIST) --varname ScanECPGKeywords$<
65+
ecpg_kwlist_d.h: ecpg_kwlist.h$(GEN_KEYWORDLIST_DEPS)
66+
$(GEN_KEYWORDLIST) --varname ScanECPGKeywords$<
6467

6568
# Force these dependencies to be known even without dependency info built:
6669
ecpg_keywords.oc_keywords.okeywords.opreproc.opgc.oparser.o: preproc.h

‎src/interfaces/ecpg/preproc/c_keywords.c

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
*/
1010
#include"postgres_fe.h"
1111

12-
#include<ctype.h>
13-
1412
#include"preproc_extern.h"
1513
#include"preproc.h"
1614

@@ -32,39 +30,38 @@ static const uint16 ScanCKeywordTokens[] = {
3230
*
3331
* Returns the token value of the keyword, or -1 if no match.
3432
*
35-
* Do abinary search using plain strcmp() comparison. This is much like
33+
* Do ahash search using plain strcmp() comparison. This is much like
3634
* ScanKeywordLookup(), except we want case-sensitive matching.
3735
*/
3836
int
39-
ScanCKeywordLookup(constchar*text)
37+
ScanCKeywordLookup(constchar*str)
4038
{
41-
constchar*kw_string;
42-
constuint16*kw_offsets;
43-
constuint16*low;
44-
constuint16*high;
39+
size_tlen;
40+
inth;
41+
constchar*kw;
42+
43+
/*
44+
* Reject immediately if too long to be any keyword. This saves useless
45+
* hashing work on long strings.
46+
*/
47+
len=strlen(str);
48+
if (len>ScanCKeywords.max_kw_len)
49+
return-1;
4550

46-
if (strlen(text)>ScanCKeywords.max_kw_len)
47-
return-1;/* too long to be any keyword */
51+
/*
52+
* Compute the hash function. Since it's a perfect hash, we need only
53+
* match to the specific keyword it identifies.
54+
*/
55+
h=ScanCKeywords_hash_func(str,len);
4856

49-
kw_string=ScanCKeywords.kw_string;
50-
kw_offsets=ScanCKeywords.kw_offsets;
51-
low=kw_offsets;
52-
high=kw_offsets+ (ScanCKeywords.num_keywords-1);
57+
/* An out-of-range result implies no match */
58+
if (h<0||h >=ScanCKeywords.num_keywords)
59+
return-1;
5360

54-
while (low <=high)
55-
{
56-
constuint16*middle;
57-
intdifference;
61+
kw=GetScanKeyword(h,&ScanCKeywords);
5862

59-
middle=low+ (high-low) /2;
60-
difference=strcmp(kw_string+*middle,text);
61-
if (difference==0)
62-
returnScanCKeywordTokens[middle-kw_offsets];
63-
elseif (difference<0)
64-
low=middle+1;
65-
else
66-
high=middle-1;
67-
}
63+
if (strcmp(kw,str)==0)
64+
returnScanCKeywordTokens[h];
6865

6966
return-1;
7067
}

‎src/interfaces/ecpg/preproc/c_kwlist.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
/*
2121
* List of (keyword-name, keyword-token-value) pairs.
2222
*
23-
* !!WARNING!!: This list must be sorted by ASCII name, because binary
24-
* search is used to locate entries.
23+
* Note: gen_keywordlist.pl requires the entries to appear in ASCII order.
2524
*/
2625

2726
/* name, value */

‎src/interfaces/ecpg/preproc/ecpg_kwlist.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
/*
2121
* List of (keyword-name, keyword-token-value) pairs.
2222
*
23-
* !!WARNING!!: This list must be sorted by ASCII name, because binary
24-
* search is used to locate entries.
23+
* Note: gen_keywordlist.pl requires the entries to appear in ASCII order.
2524
*/
2625

2726
/* name, value */

‎src/pl/plpgsql/src/Makefile

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ REGRESS_OPTS = --dbname=$(PL_TESTDB)
2929
REGRESS = plpgsql_call plpgsql_control plpgsql_domain plpgsql_record\
3030
plpgsql_cache plpgsql_transaction plpgsql_trigger plpgsql_varprops
3131

32-
GEN_KEYWORDLIST =$(top_srcdir)/src/tools/gen_keywordlist.pl
32+
# where to find gen_keywordlist.pl and subsidiary files
33+
TOOLSDIR =$(top_srcdir)/src/tools
34+
GEN_KEYWORDLIST =$(PERL) -I$(TOOLSDIR)$(TOOLSDIR)/gen_keywordlist.pl
35+
GEN_KEYWORDLIST_DEPS =$(TOOLSDIR)/gen_keywordlist.pl$(TOOLSDIR)/PerfectHash.pm
3336

3437
all: all-lib
3538

@@ -76,11 +79,11 @@ plerrcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-plerrcodes.p
7679
$(PERL)$(srcdir)/generate-plerrcodes.pl$<>$@
7780

7881
# generate keyword headers for the scanner
79-
pl_reserved_kwlist_d.h: pl_reserved_kwlist.h$(GEN_KEYWORDLIST)
80-
$(PERL)$(GEN_KEYWORDLIST) --varname ReservedPLKeywords$<
82+
pl_reserved_kwlist_d.h: pl_reserved_kwlist.h$(GEN_KEYWORDLIST_DEPS)
83+
$(GEN_KEYWORDLIST) --varname ReservedPLKeywords$<
8184

82-
pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h$(GEN_KEYWORDLIST)
83-
$(PERL)$(GEN_KEYWORDLIST) --varname UnreservedPLKeywords$<
85+
pl_unreserved_kwlist_d.h: pl_unreserved_kwlist.h$(GEN_KEYWORDLIST_DEPS)
86+
$(GEN_KEYWORDLIST) --varname UnreservedPLKeywords$<
8487

8588

8689
check: submake

‎src/pl/plpgsql/src/pl_reserved_kwlist.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,9 @@
2020
/*
2121
* List of (keyword-name, keyword-token-value) pairs.
2222
*
23-
* Be careful not to put the same wordin both lists.
23+
* Be careful not to put the same wordinto pl_unreserved_kwlist.h.
2424
*
25-
* !!WARNING!!: This list must be sorted by ASCII name, because binary
26-
* search is used to locate entries.
25+
* Note: gen_keywordlist.pl requires the entries to appear in ASCII order.
2726
*/
2827

2928
/* name, value */

‎src/pl/plpgsql/src/pl_unreserved_kwlist.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@
2020
/*
2121
* List of (keyword-name, keyword-token-value) pairs.
2222
*
23-
* Be careful not to put the same wordin both lists. Also be sure that
24-
* pl_gram.y's unreserved_keyword production agrees with this list.
23+
* Be careful not to put the same wordinto pl_reserved_kwlist.h. Also be
24+
*sure thatpl_gram.y's unreserved_keyword production agrees with this list.
2525
*
26-
* !!WARNING!!: This list must be sorted by ASCII name, because binary
27-
* search is used to locate entries.
26+
* Note: gen_keywordlist.pl requires the entries to appear in ASCII order.
2827
*/
2928

3029
/* name, value */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp