Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit80f8eb7

Browse files
committed
Use perfect hash for NFC and NFKC Unicode Normalization quick check
This makes the normalization quick check about 30% faster for NFC and50% faster for NFKC than the binary search used previously. The hashlookup reuses the existing array of bit fields used for the binarysearch to get the quick check property and is generated as part of "makeupdate-unicode" in src/common/unicode/.Author: John NaylorReviewed-by: Mark Dilger, Michael PaquierDiscussion:https://postgr.es/m/CACPNZCt4fbJ0_bGrN5QPt34N4whv=mszM0LMVQdoa2rC9UMRXA@mail.gmail.com
1 parent85d08b8 commit80f8eb7

File tree

5 files changed

+1681
-24
lines changed

5 files changed

+1681
-24
lines changed

‎src/common/unicode/generate-unicode_normprops_table.pl

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
use strict;
1010
use warnings;
1111

12+
use FindBin;
13+
use lib"$FindBin::RealBin/../../tools/";
14+
use PerfectHash;
15+
1216
my%data;
1317

1418
print
@@ -18,13 +22,25 @@
1822
#include "common/unicode_norm.h"
1923
2024
/*
21-
* We use a bit field here to save space.
25+
* Normalization quick check entry for codepoint. We use a bit field
26+
* here to save space.
2227
*/
2328
typedef struct
2429
{
2530
unsigned int codepoint:21;
2631
signed intquickcheck:4;/* really UnicodeNormalizationQC */
27-
}pg_unicode_normprops;
32+
} pg_unicode_normprops;
33+
34+
/* Typedef for hash function on quick check table */
35+
typedef int (*qc_hash_func) (const void *key);
36+
37+
/* Information for quick check lookup with perfect hash function */
38+
typedef struct
39+
{
40+
const pg_unicode_normprops *normprops;
41+
qc_hash_funchash;
42+
intnum_normprops;
43+
} pg_unicode_norminfo;
2844
EOS
2945

3046
foreachmy$line (<ARGV>)
@@ -66,6 +82,7 @@
6682
"static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n";
6783

6884
my%subdata = %{$data{$prop} };
85+
my@cp_packed;
6986
foreachmy$cp (sort {$a<=>$b }keys%subdata)
7087
{
7188
my$qc;
@@ -82,7 +99,27 @@
8299
die;
83100
}
84101
printf"\t{0x%04X,%s},\n",$cp,$qc;
102+
103+
# Save the bytes as a string in network order.
104+
push@cp_packed,pack('N',$cp);
85105
}
86106

87107
print"};\n";
108+
109+
# Emit the definition of the perfect hash function.
110+
my$funcname =$prop .'_hash_func';
111+
my$f = PerfectHash::generate_hash_function(\@cp_packed,$funcname,
112+
fixed_key_length=> 4);
113+
printf"\n/* Perfect hash function for%s */",$prop;
114+
print"\nstatic$f\n";
115+
116+
# Emit the structure that wraps the hash lookup information into
117+
# one variable.
118+
printf"/* Hash lookup information for%s */",$prop;
119+
printf"\nstatic const pg_unicode_norminfo";
120+
printf"UnicodeNormInfo_%s = {\n",$prop;
121+
printf"\tUnicodeNormProps_%s,\n",$prop;
122+
printf"\t%s,\n",$funcname;
123+
printf"\t%d\n",scalar@cp_packed;
124+
printf"};\n";
88125
}

‎src/common/unicode_norm.c

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -465,15 +465,32 @@ get_canonical_class(pg_wchar ch)
465465
returnentry->comb_class;
466466
}
467467

468-
staticint
469-
qc_compare(constvoid*p1,constvoid*p2)
468+
staticconstpg_unicode_normprops*
469+
qc_hash_lookup(pg_wcharch,constpg_unicode_norminfo*norminfo)
470470
{
471-
uint32v1,
472-
v2;
471+
inth;
472+
uint32hashkey;
473473

474-
v1= ((constpg_unicode_normprops*)p1)->codepoint;
475-
v2= ((constpg_unicode_normprops*)p2)->codepoint;
476-
return (v1-v2);
474+
/*
475+
* Compute the hash function. The hash key is the codepoint with the bytes
476+
* in network order.
477+
*/
478+
hashkey=htonl(ch);
479+
h=norminfo->hash(&hashkey);
480+
481+
/* An out-of-range result implies no match */
482+
if (h<0||h >=norminfo->num_normprops)
483+
returnNULL;
484+
485+
/*
486+
* Since it's a perfect hash, we need only match to the specific codepoint
487+
* it identifies.
488+
*/
489+
if (ch!=norminfo->normprops[h].codepoint)
490+
returnNULL;
491+
492+
/* Success! */
493+
return&norminfo->normprops[h];
477494
}
478495

479496
/*
@@ -482,26 +499,15 @@ qc_compare(const void *p1, const void *p2)
482499
staticUnicodeNormalizationQC
483500
qc_is_allowed(UnicodeNormalizationFormform,pg_wcharch)
484501
{
485-
pg_unicode_normpropskey;
486-
pg_unicode_normprops*found=NULL;
487-
488-
key.codepoint=ch;
502+
constpg_unicode_normprops*found=NULL;
489503

490504
switch (form)
491505
{
492506
caseUNICODE_NFC:
493-
found=bsearch(&key,
494-
UnicodeNormProps_NFC_QC,
495-
lengthof(UnicodeNormProps_NFC_QC),
496-
sizeof(pg_unicode_normprops),
497-
qc_compare);
507+
found=qc_hash_lookup(ch,&UnicodeNormInfo_NFC_QC);
498508
break;
499509
caseUNICODE_NFKC:
500-
found=bsearch(&key,
501-
UnicodeNormProps_NFKC_QC,
502-
lengthof(UnicodeNormProps_NFKC_QC),
503-
sizeof(pg_unicode_normprops),
504-
qc_compare);
510+
found=qc_hash_lookup(ch,&UnicodeNormInfo_NFKC_QC);
505511
break;
506512
default:
507513
Assert(false);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp