Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf52636c

Browse files
committed
Improve table search speed through lookups
Prior to this change table search would have to do a binary search overabout 1000 entries which resulted in around 10 memory loads on average.In this commit we reduce the search space by doing a pre-lookup in agenerated table to get a smaller (often zero-length) slice of the fullsorted range list. On average this gives us just one entry of the rangelist to perform binary search on, which reduces the average number ofmemory loads to 2.
1 parent243af2c commitf52636c

File tree

2 files changed

+486
-26
lines changed

2 files changed

+486
-26
lines changed

‎scripts/unicode.py

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,13 +274,36 @@ def emit_break_module(f, break_table, break_cats, name):
274274
pub enum %sCat {
275275
"""% (name,Name,Name))
276276

277+
# We don't want the lookup table to be too large so choose a reasonable
278+
# cutoff. 0x20000 is selected because most of the range table entries are
279+
# within the interval of [0x0, 0x20000]
280+
lookup_value_cutoff=0x20000
281+
282+
# Length of lookup table. It has to be a divisor of `lookup_value_cutoff`.
283+
lookup_table_len=0x400
284+
285+
lookup_interval=round(lookup_value_cutoff/lookup_table_len)
286+
287+
# Lookup table is a mapping from `character code / lookup_interval` to
288+
# the index in the range table that covers the `character code`.
289+
lookup_table= [0]*lookup_table_len
290+
j=0
291+
foriinrange(0,lookup_table_len):
292+
lookup_from=i*lookup_interval
293+
whilej<len(break_table):
294+
(_,entry_to,_)=break_table[j]
295+
ifentry_to>=lookup_from:
296+
break
297+
j+=1
298+
lookup_table[i]=j
299+
277300
break_cats.append("Any")
278301
break_cats.sort()
279302
forcatinbreak_cats:
280303
f.write((" %sC_"%Name[0])+cat+",\n")
281304
f.write(""" }
282305
283-
fn bsearch_range_value_table(c: char, r: &'static [(char, char, %sCat)]) -> (u32,u32, %sCat) {
306+
fn bsearch_range_value_table(c: char, r: &'static [(char, char, %sCat)]) -> (Option<u32>, Option<u32>, %sCat) {
284307
use core::cmp::Ordering::{Equal, Less, Greater};
285308
match r.binary_search_by(|&(lo, hi, _)| {
286309
if lo <= c && c <= hi { Equal }
@@ -289,23 +312,70 @@ def emit_break_module(f, break_table, break_cats, name):
289312
}) {
290313
Ok(idx) => {
291314
let (lower, upper, cat) = r[idx];
292-
(lower as u32,upper as u32, cat)
315+
(Some(lower as u32), Some(upper as u32), cat)
293316
}
294317
Err(idx) => {
295318
(
296-
if idx > 0 { r[idx-1].1 as u32 + 1 } else {0 },
297-
r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
319+
if idx > 0 {Some(r[idx-1].1 as u32 + 1) } else {None },
320+
r.get(idx).map(|c|c.0 as u32 - 1),
298321
%sC_Any,
299322
)
300323
}
301324
}
302325
}
303326
304327
pub fn %s_category(c: char) -> (u32, u32, %sCat) {
305-
bsearch_range_value_table(c, %s_cat_table)
328+
// Perform a quick O(1) lookup in a precomputed table to determine
329+
// the slice of the range table to search in.
330+
let lookup_interval = 0x%x;
331+
let idx = (c as u32 / lookup_interval) as usize;
332+
let range = %s_cat_lookup.get(idx..(idx + 2)).map_or(
333+
// If the `idx` is outside of the precomputed table - use the slice
334+
// starting from the last covered index in the precomputed table and
335+
// ending with the length of the range table.
336+
%d..%d,
337+
|r| (r[0] as usize)..((r[1] + 1) as usize)
338+
);
339+
let (lower, upper, cat) = bsearch_range_value_table(c, &%s_cat_table[range]);
340+
341+
(
342+
lower.unwrap_or_else(|| {
343+
if idx == 0 {
344+
0
345+
} else {
346+
// Use an entry just before the lookup index to find the lower
347+
// bound for Any category.
348+
let i = *%s_cat_lookup.get(idx - 1).unwrap_or(&%d) as usize;
349+
%s_cat_table[i].1 as u32
350+
}
351+
}),
352+
upper.unwrap_or_else(|| {
353+
%s_cat_lookup.get(idx + 1).map_or_else(|| {
354+
// If idx was outside of the lookup table - upper bound is either
355+
// already found in the ranges table, or has to be a u32::MAX.
356+
core::u32::MAX
357+
}, |&i| {
358+
// Otherwise use an entry just after the lookup index to find the
359+
// lower bound for Any category.
360+
%s_cat_table[i as usize].0 as u32
361+
})
362+
}),
363+
cat
364+
)
306365
}
307366
308-
"""% (Name,Name,Name[0],name,Name,name))
367+
"""% (Name,Name,Name[0],name,Name,lookup_interval,name,j,len(break_table),name,name,j,name,name,name))
368+
369+
iflen(break_table)<=0xff:
370+
lookup_type="u8"
371+
eliflen(break_table)<=0xffff:
372+
lookup_type="u16"
373+
else:
374+
lookup_type="u32"
375+
376+
emit_table(f,"%s_cat_lookup"%name,lookup_table,"&'static [%s]"%lookup_type,
377+
pfun=lambdax:"%d"%x,
378+
is_pub=False,is_const=True)
309379

310380
emit_table(f,"%s_cat_table"%name,break_table,"&'static [(char, char, %sCat)]"%Name,
311381
pfun=lambdax:"(%s,%s,%sC_%s)"% (escape_char(x[0]),escape_char(x[1]),Name[0],x[2]),

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp