Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf35d6b6

Browse files
authored
Merge pull request#6 from unicode-rs/mixed
Add mixed-script detection
2 parents03c709c +633ee3e commitf35d6b6

File tree

5 files changed

+166
-21
lines changed

5 files changed

+166
-21
lines changed

‎Cargo.toml‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ according to Unicode Technical Standard #39 rules.
1616
exclude = ["target/*","Cargo.lock" ]
1717

1818
[dependencies]
19+
unicode-script = {git ="https://github.com/unicode-rs/unicode-script",default-features =false }
1920
std = {version ="1.0",package ="rustc-std-workspace-std",optional =true }
2021
core = {version ="1.0",package ="rustc-std-workspace-core",optional =true }
2122
compiler_builtins = {version ="0.1",optional =true }

‎src/general_security_profile.rs‎

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//! Utilities for working with the [General Security Profile](https://www.unicode.org/reports/tr39/#General_Security_Profile)
2+
//! for identifiers
3+
4+
usecrate::tables::identifier_statusas is;
5+
6+
/// Methods for determining characters not restricted from use for identifiers.
7+
pubtraitGeneralSecurityProfile{
8+
/// Returns whether the character is not restricted from use for identifiers.
9+
fnidentifier_allowed(self) ->bool;
10+
}
11+
12+
implGeneralSecurityProfileforchar{
13+
#[inline]
14+
fnidentifier_allowed(self) ->bool{ is::identifier_status_allowed(self)}
15+
}
16+
17+
implGeneralSecurityProfilefor&'_str{
18+
#[inline]
19+
fnidentifier_allowed(self) ->bool{self.chars().all(is::identifier_status_allowed)}
20+
}

‎src/lib.rs‎

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
//! ```rust
1616
//! extern crate unicode_security;
1717
//!
18-
//! use unicode_security::IdentifierStatusChar;
18+
//! use unicode_security::GeneralSecurityProfile;
1919
//!
2020
//! fn main() {
2121
//! let ch = 'µ'; // U+00B5 MICRO SIGN
@@ -55,21 +55,16 @@ extern crate std;
5555
#[cfg(feature ="bench")]
5656
externcrate test;
5757

58-
use tables::identifier_statusas is;
5958
pubuse tables::UNICODE_VERSION;
6059

61-
mod tables;
60+
pubmod mixed_script;
61+
pubmod general_security_profile;
6262

63-
#[cfg(test)]
64-
mod tests;
63+
pubuse mixed_script::MixedScript;
64+
pubuse general_security_profile::GeneralSecurityProfile;
6565

66-
/// Methods for determining characters not restricted from use for identifiers.
67-
pubtraitUnicodeIdentifierStatus{
68-
/// Returns whether the character is not restricted from use for identifiers.
69-
fnidentifier_allowed(self) ->bool;
70-
}
66+
#[rustfmt::skip]
67+
pub(crate)mod tables;
7168

72-
implUnicodeIdentifierStatusforchar{
73-
#[inline]
74-
fnidentifier_allowed(self) ->bool{ is::identifier_status_allowed(self)}
75-
}
69+
#[cfg(test)]
70+
mod tests;

‎src/mixed_script.rs‎

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
//! [Mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
2+
3+
use unicode_script::{Script,ScriptExtension};
4+
5+
/// An Augmented script set, as defined by UTS 39
6+
///
7+
/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
8+
pubstructAugmentedScriptSet{
9+
/// The base ScriptExtension value
10+
pubbase:ScriptExtension,
11+
/// Han With Bopomofo
12+
pubhanb:bool,
13+
/// Japanese
14+
pubjpan:bool,
15+
/// Korean
16+
pubkore:bool,
17+
}
18+
19+
implFrom<ScriptExtension>forAugmentedScriptSet{
20+
fnfrom(ext:ScriptExtension) ->Self{
21+
letmut hanb =false;
22+
letmut jpan =false;
23+
letmut kore =false;
24+
25+
if ext ==ScriptExtension::Single(Script::Common) ||
26+
ext ==ScriptExtension::Single(Script::Inherited) ||
27+
ext.contains_script(Script::Han){
28+
hanb =true;
29+
jpan =true;
30+
kore =true;
31+
}else{
32+
if ext.contains_script(Script::Hiragana) || ext.contains_script(Script::Katakana){
33+
jpan =true;
34+
}
35+
36+
if ext.contains_script(Script::Hangul){
37+
kore =true;
38+
}
39+
40+
if ext.contains_script(Script::Bopomofo){
41+
hanb =true;
42+
}
43+
}
44+
Self{
45+
base: ext,
46+
hanb, jpan, kore
47+
}
48+
}
49+
}
50+
51+
implFrom<char>forAugmentedScriptSet{
52+
fnfrom(c:char) ->Self{
53+
AugmentedScriptSet::for_char(c)
54+
}
55+
}
56+
57+
implFrom<&'_str>forAugmentedScriptSet{
58+
fnfrom(s:&'_str) ->Self{
59+
AugmentedScriptSet::for_str(s)
60+
}
61+
}
62+
63+
implDefaultforAugmentedScriptSet{
64+
fndefault() ->Self{
65+
AugmentedScriptSet{
66+
base:ScriptExtension::Single(Script::Common),
67+
hanb:true,
68+
jpan:true,
69+
kore:true,
70+
}
71+
}
72+
}
73+
74+
implAugmentedScriptSet{
75+
/// Intersect this set with another
76+
pubfnintersect(mutself,other:Self) ->Self{
77+
self.base =self.base.intersect(other.base);
78+
self.hanb =self.hanb && other.hanb;
79+
self.jpan =self.jpan && other.jpan;
80+
self.kore =self.kore && other.kore;
81+
self
82+
}
83+
84+
/// Check if the set is empty
85+
pubfnis_empty(&self) ->bool{
86+
self.base.is_empty() && !self.hanb && !self.jpan && !self.kore
87+
}
88+
89+
/// Check if the set is "All" (Common or Inherited)
90+
pubfnis_all(&self) ->bool{
91+
self.base ==ScriptExtension::Single(Script::Common) ||
92+
self.base ==ScriptExtension::Single(Script::Inherited)
93+
}
94+
95+
/// Construct an AugmentedScriptSet for a given character
96+
pubfnfor_char(c:char) ->Self{
97+
ScriptExtension::from(c).into()
98+
}
99+
100+
/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
101+
pubfnfor_str(s:&str) ->Self{
102+
letmut set =AugmentedScriptSet::default();
103+
for chin s.chars(){
104+
set = set.intersect(ch.into())
105+
}
106+
set
107+
}
108+
}
109+
110+
/// Extension trait for [mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
111+
pubtraitMixedScript{
112+
/// Check if a string is [single-script](https://www.unicode.org/reports/tr39/#def-single-script)
113+
///
114+
/// Note that a single-script string may still contain multiple Script properties!
115+
fnis_single_script(self) ->bool;
116+
117+
/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
118+
fnresolve_script_set(self) ->AugmentedScriptSet;
119+
}
120+
121+
implMixedScriptfor&'_str{
122+
fnis_single_script(self) ->bool{
123+
!AugmentedScriptSet::for_str(self).is_empty()
124+
}
125+
126+
fnresolve_script_set(self) ->AugmentedScriptSet{
127+
self.into()
128+
}
129+
}

‎src/tests.rs‎

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,19 @@
1010

1111
#[test]
1212
fntest_char(){
13-
usesuper::IdentifierStatusChar;
14-
assert_eq!(IdentifierStatusChar::identifier_allowed('A'),true);
13+
usecrate::GeneralSecurityProfile;
14+
assert_eq!(GeneralSecurityProfile::identifier_allowed('A'),true);
1515
assert_eq!('A'.identifier_allowed(),true);
16-
assert_eq!(IdentifierStatusChar::identifier_allowed('0'),true);
16+
assert_eq!(GeneralSecurityProfile::identifier_allowed('0'),true);
1717
assert_eq!('0'.identifier_allowed(),true);
18-
assert_eq!(IdentifierStatusChar::identifier_allowed('_'),true);
18+
assert_eq!(GeneralSecurityProfile::identifier_allowed('_'),true);
1919
assert_eq!('_'.identifier_allowed(),true);
20-
assert_eq!(IdentifierStatusChar::identifier_allowed('\x00'),false);
20+
assert_eq!(GeneralSecurityProfile::identifier_allowed('\x00'),false);
2121
assert_eq!('\x00'.identifier_allowed(),false);
2222
// U+00B5 MICRO SIGN
23-
assert_eq!(IdentifierStatusChar::identifier_allowed('µ'),false);
23+
assert_eq!(GeneralSecurityProfile::identifier_allowed('µ'),false);
2424
assert_eq!('µ'.identifier_allowed(),false);
2525
// U+2160 ROMAN NUMERAL ONE
26-
assert_eq!(IdentifierStatusChar::identifier_allowed('Ⅰ'),false);
26+
assert_eq!(GeneralSecurityProfile::identifier_allowed('Ⅰ'),false);
2727
assert_eq!('Ⅰ'.identifier_allowed(),false);
2828
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp