Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0ae055e

Browse files
authored
Merge pull request#7 from unicode-rs/identifiertype
Support Identifier Type
2 parentsf35d6b6 +6dc688d commit0ae055e

File tree

3 files changed

+1611
-50
lines changed

3 files changed

+1611
-50
lines changed

‎scripts/unicode.py‎

Lines changed: 92 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -47,37 +47,39 @@ def fetch(f):
4747
sys.stderr.write("cannot load %s\n"%f)
4848
exit(1)
4949

50-
# load identifier status data
51-
defload_identifier_status():
52-
f="IdentifierStatus.txt"
50+
# Implementation from unicode-segmentation
51+
defload_properties(f,interestingprops=None):
5352
fetch(f)
54-
statuses=[]
55-
re1=re.compile("^([0-9A-F]+)+; +(\w+)")
56-
re2=re.compile("^([0-9A-F]+)\.\.([0-9A-F]+)+; +(\w+)")
53+
props={}
54+
re1=re.compile(r"^ *([0-9A-F]+)*; *(\w+)")
55+
re2=re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+)*; *(\w+)")
5756

58-
forlineinfileinput.input(f):
57+
forlineinfileinput.input(os.path.basename(f)):
58+
prop=None
5959
d_lo=0
6060
d_hi=0
61-
cat=None
6261
m=re1.match(line)
6362
ifm:
6463
d_lo=m.group(1)
6564
d_hi=m.group(1)
66-
cat=m.group(2)
65+
prop=m.group(2).strip()
6766
else:
6867
m=re2.match(line)
6968
ifm:
7069
d_lo=m.group(1)
7170
d_hi=m.group(2)
72-
cat=m.group(3)
71+
prop=m.group(3).strip()
7372
else:
7473
continue
75-
ifcat!="Allowed":
74+
ifinterestingpropsandpropnotininterestingprops:
7675
continue
7776
d_lo=int(d_lo,16)
7877
d_hi=int(d_hi,16)
79-
statuses.append((d_lo,d_hi))
80-
returnstatuses
78+
ifpropnotinprops:
79+
props[prop]= []
80+
props[prop].append((d_lo,d_hi))
81+
82+
returnprops
8183

8284
defformat_table_content(f,content,indent):
8385
line=" "*indent
@@ -115,41 +117,95 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
115117
format_table_content(f,data,8)
116118
f.write("\n ];\n\n")
117119

118-
defemit_identifier_status_module(f,statuses_table):
119-
f.write("pub modidentifier_status {")
120+
defemit_identifier_module(f):
121+
f.write("pub modidentifier {")
120122
f.write("""
121-
use core::result::Result::{Ok, Err};
122123
124+
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
125+
#[allow(non_camel_case_types)]
126+
/// https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type
127+
pub enum IdentifierType {
128+
// Restricted
129+
Not_Character,
130+
Deprecated,
131+
Default_Ignorable,
132+
Not_NFKC,
133+
Not_XID,
134+
Exclusion,
135+
Obsolete,
136+
Technical,
137+
Uncommon_Use,
138+
Limited_Use,
139+
140+
// Allowed
141+
Inclusion,
142+
Recommended
143+
}
123144
#[inline]
124-
fn bsearch_range_value_table(c: char, r: &'static [(char, char)]) -> bool {
125-
use core::cmp::Ordering::{Equal, Less, Greater};
126-
match r.binary_search_by(|&(lo, hi)| {
127-
if lo <= c && c <= hi { Equal }
128-
else if hi < c { Less }
129-
else { Greater }
130-
}) {
131-
Ok(_) => true,
132-
Err(_) => false
145+
pub fn identifier_status_allowed(c: char) -> bool {
146+
// FIXME: do we want to special case ASCII here?
147+
match c as usize {
148+
_ => super::util::bsearch_range_table(c, IDENTIFIER_STATUS)
133149
}
134150
}
135-
""")
136151
137-
f.write("""
138152
#[inline]
139-
pub fnidentifier_status_allowed(c: char) ->bool {
153+
pub fnidentifier_type(c: char) ->Option<IdentifierType> {
140154
// FIXME: do we want to special case ASCII here?
141155
match c as usize {
142-
_ => bsearch_range_value_table(c,identifier_status_table)
156+
_ =>super::util::bsearch_range_value_table(c,IDENTIFIER_TYPE)
143157
}
144158
}
145-
146159
""")
147160

148-
f.write(" // identifier status table.\n")
149-
emit_table(f,"identifier_status_table",statuses_table,"&'static [(char, char)]",is_pub=False,
161+
f.write(" // Identifier status table:\n")
162+
identifier_status_table=load_properties("IdentifierStatus.txt")
163+
emit_table(f,"IDENTIFIER_STATUS",identifier_status_table['Allowed'],"&'static [(char, char)]",is_pub=False,
150164
pfun=lambdax:"(%s,%s)"% (escape_char(x[0]),escape_char(x[1])))
165+
identifier_type=load_properties("IdentifierType.txt")
166+
type_table= []
167+
fortyinidentifier_type:
168+
type_table.extend([(x,y,ty)for (x,y)inidentifier_type[ty]])
169+
170+
type_table.sort(key=lambdaw:w[0])
171+
172+
emit_table(f,"IDENTIFIER_TYPE",type_table,"&'static [(char, char, IdentifierType)]",is_pub=False,
173+
pfun=lambdax:"(%s,%s, IdentifierType::%s)"% (escape_char(x[0]),escape_char(x[1]),x[2]))
151174
f.write("}\n\n")
152175

176+
defemit_util_mod(f):
177+
f.write("""
178+
pub mod util {
179+
use core::result::Result::{Ok, Err};
180+
#[inline]
181+
pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
182+
use core::cmp::Ordering::{Equal, Less, Greater};
183+
r.binary_search_by(|&(lo,hi)| {
184+
if lo <= c && c <= hi { Equal }
185+
else if hi < c { Less }
186+
else { Greater }
187+
}).is_ok()
188+
}
189+
190+
pub fn bsearch_range_value_table<T: Copy>(c: char, r: &'static [(char, char, T)]) -> Option<T> {
191+
use core::cmp::Ordering::{Equal, Less, Greater};
192+
match r.binary_search_by(|&(lo, hi, _)| {
193+
if lo <= c && c <= hi { Equal }
194+
else if hi < c { Less }
195+
else { Greater }
196+
}) {
197+
Ok(idx) => {
198+
let (_, _, cat) = r[idx];
199+
Some(cat)
200+
}
201+
Err(_) => None
202+
}
203+
}
204+
205+
}
206+
207+
""")
208+
153209
if__name__=="__main__":
154210
r="tables.rs"
155211
ifos.path.exists(r):
@@ -164,6 +220,7 @@ def emit_identifier_status_module(f, statuses_table):
164220
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
165221
166222
"""%UNICODE_VERSION)
167-
### identifier status module
168-
identifier_status_table=load_identifier_status()
169-
emit_identifier_status_module(rf,identifier_status_table)
223+
224+
emit_util_mod(rf)
225+
### identifier module
226+
emit_identifier_module(rf)

‎src/general_security_profile.rs‎

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
//! Utilities for working with the [General Security Profile](https://www.unicode.org/reports/tr39/#General_Security_Profile)
22
//! for identifiers
33
4-
usecrate::tables::identifier_statusas is;
4+
usecrate::tables::identifier;
5+
6+
pubuse identifier::IdentifierType;
57

68
/// Methods for determining characters not restricted from use for identifiers.
79
pubtraitGeneralSecurityProfile{
810
/// Returns whether the character is not restricted from use for identifiers.
911
fnidentifier_allowed(self) ->bool;
12+
13+
/// Returns the [identifier type](https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type)
14+
fnidentifier_type(self) ->Option<IdentifierType>;
1015
}
1116

1217
implGeneralSecurityProfileforchar{
1318
#[inline]
14-
fnidentifier_allowed(self) ->bool{ is::identifier_status_allowed(self)}
15-
}
16-
17-
implGeneralSecurityProfilefor&'_str{
19+
fnidentifier_allowed(self) ->bool{ identifier::identifier_status_allowed(self)}
1820
#[inline]
19-
fnidentifier_allowed(self) ->bool{self.chars().all(is::identifier_status_allowed)}
21+
fnidentifier_type(self) ->Option<IdentifierType>{ identifier::identifier_type(self)}
22+
2023
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp