Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit72d3684

Browse files
committed
Add mixed-script detection
1 parent03c709c commit72d3684

File tree

3 files changed

+124
-0
lines changed

3 files changed

+124
-0
lines changed

‎Cargo.toml‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ according to Unicode Technical Standard #39 rules.
1616
exclude = ["target/*","Cargo.lock" ]
1717

1818
[dependencies]
19+
unicode-script = {git ="https://github.com/unicode-rs/unicode-script",default-features =false }
1920
std = {version ="1.0",package ="rustc-std-workspace-std",optional =true }
2021
core = {version ="1.0",package ="rustc-std-workspace-core",optional =true }
2122
compiler_builtins = {version ="0.1",optional =true }

‎src/lib.rs‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ extern crate test;
5858
use tables::identifier_statusas is;
5959
pubuse tables::UNICODE_VERSION;
6060

61+
pubmod mixed;
62+
6163
mod tables;
6264

6365
#[cfg(test)]

‎src/mixed.rs‎

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//! [Mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
2+
3+
use unicode_script::{Script,ScriptExtension};
4+
5+
/// An Augmented script set, as defined by UTS 39
6+
///
7+
/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
8+
pubstructAugmentedScriptSet{
9+
/// The base ScriptExtension value
10+
pubbase:ScriptExtension,
11+
/// Han With Bopomofo
12+
pubhanb:bool,
13+
/// Japanese
14+
pubjpan:bool,
15+
/// Korean
16+
pubkore:bool,
17+
}
18+
19+
implFrom<ScriptExtension>forAugmentedScriptSet{
20+
fnfrom(ext:ScriptExtension) ->Self{
21+
letmut hanb =false;
22+
letmut jpan =false;
23+
letmut kore =false;
24+
25+
if ext.contains_script(Script::Han){
26+
hanb =true;
27+
jpan =true;
28+
kore =true;
29+
}else{
30+
if ext.contains_script(Script::Hiragana) || ext.contains_script(Script::Katakana){
31+
jpan =true;
32+
}
33+
34+
if ext.contains_script(Script::Hangul){
35+
kore =true;
36+
}
37+
38+
if ext.contains_script(Script::Bopomofo){
39+
hanb =true;
40+
}
41+
}
42+
Self{
43+
base: ext,
44+
hanb, jpan, kore
45+
}
46+
}
47+
}
48+
49+
implFrom<char>forAugmentedScriptSet{
50+
fnfrom(c:char) ->Self{
51+
AugmentedScriptSet::for_char(c)
52+
}
53+
}
54+
55+
implFrom<&'_str>forAugmentedScriptSet{
56+
fnfrom(s:&'_str) ->Self{
57+
AugmentedScriptSet::for_str(s)
58+
}
59+
}
60+
61+
implDefaultforAugmentedScriptSet{
62+
fndefault() ->Self{
63+
AugmentedScriptSet{
64+
base:ScriptExtension::Single(Script::Common),
65+
hanb:true,
66+
jpan:true,
67+
kore:true,
68+
}
69+
}
70+
}
71+
72+
implAugmentedScriptSet{
73+
/// Intersect this set with another
74+
pubfnintersect(mutself,other:Self) ->Self{
75+
self.base =self.base.intersect(other.base);
76+
self.hanb =self.hanb && other.hanb;
77+
self.jpan =self.jpan && other.jpan;
78+
self.kore =self.kore && other.kore;
79+
self
80+
}
81+
82+
/// Check if the set is empty
83+
pubfnis_empty(&self) ->bool{
84+
self.base.is_empty() && !self.hanb && !self.jpan && !self.kore
85+
}
86+
87+
/// Construct an AugmentedScriptSet for a given character
88+
pubfnfor_char(c:char) ->Self{
89+
ScriptExtension::from(c).into()
90+
}
91+
92+
/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
93+
pubfnfor_str(s:&str) ->Self{
94+
letmut set =AugmentedScriptSet::default();
95+
for chin s.chars(){
96+
set = set.intersect(ch.into())
97+
}
98+
set
99+
}
100+
}
101+
102+
/// Extension trait for [mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
103+
pubtraitUnicodeMixedScript{
104+
/// Check if a string is [single-script](https://www.unicode.org/reports/tr39/#def-single-script)
105+
///
106+
/// Note that a single-script string may still contain multiple Script properties!
107+
fnis_single_script(self) ->bool;
108+
109+
/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
110+
fnresolve_script_set(self) ->AugmentedScriptSet;
111+
}
112+
113+
implUnicodeMixedScriptfor&'_str{
114+
fnis_single_script(self) ->bool{
115+
!AugmentedScriptSet::for_str(self).is_empty()
116+
}
117+
118+
fnresolve_script_set(self) ->AugmentedScriptSet{
119+
self.into()
120+
}
121+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp