Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita18eb9c

Browse files
committed
Clean up the interface, add simple tests.
1 parent741303d commita18eb9c

File tree

6 files changed

+107
-197
lines changed

6 files changed

+107
-197
lines changed

‎scripts/unicode.py‎

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def is_codepoint_identifier_allowed(c, identifier_allowed):
239239
returnTrue
240240
returnFalse
241241

242-
defload_rustc_mixedscript_confusables(f,identifier_allowed,scripts):
242+
defload_potential_mixedscript_confusables(f,identifier_allowed,scripts):
243243
# First, load all confusables data from confusables.txt
244244
confusables=load_confusables(f)
245245

@@ -248,15 +248,6 @@ def load_rustc_mixedscript_confusables(f, identifier_allowed, scripts):
248248
# seen as substitutes to itself. So if the confusables.txt says A -> C, B -> C,
249249
# and implicitly C -> C, it means A <-> B, A <-> C, B <-> C are confusable.
250250

251-
# here we first make a dict that contains all As and Bs whose corresponding C is single code point.
252-
seekup_map= {}
253-
foriteminconfusables:
254-
d_proto_list=item[1]
255-
d_source=item[0]
256-
assert(len(d_proto_list)>0)
257-
iflen(d_proto_list)==1:
258-
seekup_map[escape_char(d_source)]=d_proto_list
259-
260251
# Here we're dividing all confusable lhs and rhs(prototype) operands of the substitution into equivalence classes.
261252
# Principally we'll be using the rhs operands as the representive element of its equivalence classes.
262253
# However some rhs operands are single code point, while some others are not.
@@ -275,9 +266,8 @@ def load_rustc_mixedscript_confusables(f, identifier_allowed, scripts):
275266
ifd_protonotincodepoint_map:
276267
codepoint_map[d_proto]= []
277268
# when we create new equivalence class, we'll check whether the representative element should be collected.
278-
# i.e. if it is not subject to substituion, and not restricted from identifier usage,
279-
# we collect it into the equivalence class.
280-
ifd_protonotinseekup_mapandis_codepoint_identifier_allowed(d_proto_list[0],identifier_allowed):
269+
# i.e. if it is not restricted from identifier usage, we collect it into the equivalence class.
270+
ifis_codepoint_identifier_allowed(d_proto_list[0],identifier_allowed):
281271
codepoint_map[d_proto].append(d_proto_list[0])
282272
# we collect the original code point to be substituted into this list.
283273
codepoint_map[d_proto].append(d_source)
@@ -562,23 +552,20 @@ def emit_confusable_detection_module(f):
562552
defescape_script_constant(name,longforms):
563553
return"Script::"+longforms[name].strip()
564554

565-
defemit_rustc_mixed_script_confusable_detection(f):
566-
f.write("pub modrustc_mixed_script_confusable_detection {")
555+
defemit_potiential_mixed_script_confusable(f):
556+
f.write("pub modpotential_mixed_script_confusable {")
567557
f.write("""
568-
use unicode_script::Script;
569-
570558
#[inline]
571-
pub fnis_rustc_mixed_script_confusable(c: char) ->Option<Script> {
559+
pub fnpotential_mixed_script_confusable(c: char) ->bool {
572560
match c as usize {
573-
_ => super::util::bsearch_value_table(c, CONFUSABLES)
561+
_ => super::util::bsearch_table(c, CONFUSABLES)
574562
}
575563
}
576-
577564
""")
578565
identifier_status_table=load_properties("IdentifierStatus.txt")
579-
longforms,scripts=load_scripts("Scripts.txt")
566+
_,scripts=load_scripts("Scripts.txt")
580567
identifier_allowed=identifier_status_table['Allowed']
581-
(mixedscript_confusable,mixedscript_confusable_unresolved)=load_rustc_mixedscript_confusables("confusables.txt",identifier_allowed,scripts)
568+
(mixedscript_confusable,mixedscript_confusable_unresolved)=load_potential_mixedscript_confusables("confusables.txt",identifier_allowed,scripts)
582569
debug=False
583570
ifdebug==True:
584571
debug_emit_mixedscript_confusable(f,mixedscript_confusable,"mixedscript_confusable",scripts)
@@ -589,16 +576,21 @@ def emit_rustc_mixed_script_confusable_detection(f):
589576
source=pair[0]
590577
confusable_table.append((source,script))
591578
confusable_table.sort(key=lambdaw:w[0])
592-
emit_table(f,"CONFUSABLES",confusable_table,"&'static [(char, Script)]",is_pub=False,
593-
pfun=lambdax:"(%s,%s)"%(escape_char(x[0]),escape_script_constant(x[1],longforms)))
579+
emit_table(f,"CONFUSABLES",confusable_table,"&'static [char]",is_pub=False,
580+
pfun=lambdax:"%s"%escape_char(x[0]))
594581
f.write("}\n\n")
595582

596583

597584
defemit_util_mod(f):
598585
f.write("""
599586
pub mod util {
600587
use core::result::Result::{Ok, Err};
601-
588+
589+
#[inline]
590+
pub fn bsearch_table(c: char, r: &'static [char]) -> bool {
591+
r.binary_search(&c).is_ok()
592+
}
593+
602594
#[inline]
603595
pub fn bsearch_value_table<T: Copy>(c: char, r: &'static [(char, T)]) -> Option<T> {
604596
match r.binary_search_by_key(&c, |&(k, _)| k) {
@@ -609,7 +601,7 @@ def emit_util_mod(f):
609601
Err(_) => None
610602
}
611603
}
612-
604+
613605
#[inline]
614606
pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
615607
use core::cmp::Ordering::{Equal, Less, Greater};
@@ -619,7 +611,7 @@ def emit_util_mod(f):
619611
else { Greater }
620612
}).is_ok()
621613
}
622-
614+
623615
pub fn bsearch_range_value_table<T: Copy>(c: char, r: &'static [(char, char, T)]) -> Option<T> {
624616
use core::cmp::Ordering::{Equal, Less, Greater};
625617
match r.binary_search_by(|&(lo, hi, _)| {
@@ -660,4 +652,4 @@ def emit_util_mod(f):
660652
### confusable_detection module
661653
emit_confusable_detection_module(rf)
662654
### mixed_script_confusable_detection module
663-
emit_rustc_mixed_script_confusable_detection(rf)
655+
emit_potiential_mixed_script_confusable(rf)

‎src/lib.rs‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,12 @@ pub mod confusable_detection;
6262
pubmod general_security_profile;
6363
pubmod mixed_script;
6464
pubmod restriction_level;
65-
pubmod rustc_mixed_script_confusable_detection;
6665

6766
pubuse confusable_detection::skeleton;
6867
pubuse general_security_profile::GeneralSecurityProfile;
68+
pubuse mixed_script::is_potential_mixed_script_confusable_char;
6969
pubuse mixed_script::MixedScript;
7070
pubuse restriction_level::{RestrictionLevel,RestrictionLevelDetection};
71-
pubuse rustc_mixed_script_confusable_detection::rustc_mixed_script_confusable_codepoint;
7271

7372
#[rustfmt::skip]
7473
pub(crate)mod tables;

‎src/mixed_script.rs‎

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,17 @@ impl MixedScript for &'_ str {
130130
self.into()
131131
}
132132
}
133+
134+
/// Check if a character is considered potential mixed script confusable.
135+
///
136+
/// If the specified character is not restricted from use for identifiers,
137+
/// this function returns whether it is considered mixed script confusable
138+
/// with another character that is not restricted from use for identifiers.
139+
///
140+
/// If the specified character is restricted from use for identifiers,
141+
/// the return value is unspecified.
142+
pubfnis_potential_mixed_script_confusable_char(c:char) ->bool{
143+
usecrate::tables::potential_mixed_script_confusable::potential_mixed_script_confusable;
144+
145+
potential_mixed_script_confusable(c)
146+
}

‎src/rustc_mixed_script_confusable_detection.rs‎

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp