Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Improve table search speed through lookups#112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 57 additions & 5 deletionsscripts/unicode.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -274,13 +274,36 @@ def emit_break_module(f, break_table, break_cats, name):
pub enum %sCat {
""" % (name, Name, Name))

# We don't want the lookup table to be too large so choose a reasonable
# cutoff. 0x20000 is selected because most of the range table entries are
# within the interval of [0x0, 0x20000]
lookup_value_cutoff = 0x20000

# Length of lookup table. It has to be a divisor of `lookup_value_cutoff`.
lookup_table_len = 0x400

lookup_interval = round(lookup_value_cutoff / lookup_table_len)

# Lookup table is a mapping from `character code / lookup_interval` to
# the index in the range table that covers the `character code`.
lookup_table = [0] * lookup_table_len
j = 0
for i in range(0, lookup_table_len):
lookup_from = i * lookup_interval
while j < len(break_table):
(_, entry_to, _) = break_table[j]
if entry_to >= lookup_from:
break
j += 1
lookup_table[i] = j

break_cats.append("Any")
break_cats.sort()
for cat in break_cats:
f.write((" %sC_" % Name[0]) + cat + ",\n")
f.write(""" }

fn bsearch_range_value_table(c: char, r: &'static [(char, char, %sCat)]) -> (u32, u32, %sCat) {
fn bsearch_range_value_table(c: char, r: &'static [(char, char, %sCat)], default_lower: u32, default_upper: u32) -> (u32, u32, %sCat) {
use core::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
Expand All@@ -293,19 +316,48 @@ def emit_break_module(f, break_table, break_cats, name):
}
Err(idx) => {
(
if idx > 0 { r[idx-1].1 as u32 + 1 } else {0 },
r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(core::u32::MAX),
if idx > 0 { r[idx-1].1 as u32 + 1 } else {default_lower },
r.get(idx).map(|c|c.0 as u32 - 1).unwrap_or(default_upper),
%sC_Any,
)
}
}
}

pub fn %s_category(c: char) -> (u32, u32, %sCat) {
bsearch_range_value_table(c, %s_cat_table)
// Perform a quick O(1) lookup in a precomputed table to determine
// the slice of the range table to search in.
let lookup_interval = 0x%x;
let idx = (c as u32 / lookup_interval) as usize;
let range = %s_cat_lookup.get(idx..(idx + 2)).map_or(
// If the `idx` is outside of the precomputed table - use the slice
// starting from the last covered index in the precomputed table and
// ending with the length of the range table.
%d..%d,
|r| (r[0] as usize)..((r[1] + 1) as usize)
);

// Compute pessimistic default lower and upper bounds on the category.
// If character doesn't map to any range and there is no adjacent range
// in the table slice - these bounds has to apply.
let lower = idx as u32 * lookup_interval;
let upper = lower + lookup_interval - 1;
bsearch_range_value_table(c, &%s_cat_table[range], lower, upper)
}

""" % (Name, Name, Name[0], name, Name, name))
""" % (Name, Name, Name[0], name, Name, lookup_interval, name, j, len(break_table), name))


if len(break_table) <= 0xff:
lookup_type = "u8"
elif len(break_table) <= 0xffff:
lookup_type = "u16"
else:
lookup_type = "u32"

emit_table(f, "%s_cat_lookup" % name, lookup_table, "&'static [%s]" % lookup_type,
pfun=lambda x: "%d" % x,
is_pub=False, is_const=True)

emit_table(f, "%s_cat_table" % name, break_table, "&'static [(char, char, %sCat)]" % Name,
pfun=lambda x: "(%s,%s,%sC_%s)" % (escape_char(x[0]), escape_char(x[1]), Name[0], x[2]),
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp