Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4d58f18

Browse files
committed
Update grapheme segmentation to Unicode 11
1 parent504ba99 commit4d58f18

File tree

2 files changed

+14
-11
lines changed

2 files changed

+14
-11
lines changed

‎src/grapheme.rs‎

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ enum GraphemeState {
147147
// The codepoint after is a Regional Indicator Symbol, so a boundary iff
148148
// it is preceded by an even number of RIS codepoints. (GB12, GB13)
149149
Regional,
150-
// The codepoint after isin the E_Modifier category, so whether it's a boundary
151-
// depends on pre-context according toGB10.
150+
// The codepoint after isExtended_Pictographic,
151+
//so whether it's a boundarydepends on pre-context according toGB11.
152152
Emoji,
153153
}
154154

@@ -239,11 +239,7 @@ fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
239239
(_,GC_ZWJ) =>NotBreak,// GB9
240240
(_,GC_SpacingMark) =>Extended,// GB9a
241241
(GC_Prepend, _) =>Extended,// GB9b
242-
(GC_E_Base,GC_E_Modifier) =>NotBreak,// GB10
243-
(GC_E_Base_GAZ,GC_E_Modifier) =>NotBreak,// GB10
244-
(GC_Extend,GC_E_Modifier) =>Emoji,// GB10
245-
(GC_ZWJ,GC_Glue_After_Zwj) =>NotBreak,// GB11
246-
(GC_ZWJ,GC_E_Base_GAZ) =>NotBreak,// GB11
242+
(GC_ZWJ,GC_Extended_Pictographic) =>Emoji,// GB11
247243
(GC_Regional_Indicator,GC_Regional_Indicator) =>Regional,// GB12, GB13
248244
(_, _) =>Break,// GB999
249245
}
@@ -415,10 +411,17 @@ impl GraphemeCursor {
415411

416412
fnhandle_emoji(&mutself,chunk:&str,chunk_start:usize){
417413
use tables::graphemeas gr;
418-
for chin chunk.chars().rev(){
414+
letmut iter = chunk.chars().rev();
415+
ifletSome(ch) = iter.next(){
416+
if gr::grapheme_category(ch) != gr::GC_ZWJ{
417+
self.decide(true);
418+
return;
419+
}
420+
}
421+
for chin iter{
419422
match gr::grapheme_category(ch){
420423
gr::GC_Extend =>(),
421-
gr::GC_E_Base | gr::GC_E_Base_GAZ =>{
424+
gr::GC_Extended_Pictographic =>{
422425
self.decide(false);
423426
return;
424427
}
@@ -484,7 +487,7 @@ impl GraphemeCursor {
484487
letmut need_pre_context =true;
485488
matchself.cat_after.unwrap(){
486489
gr::GC_Regional_Indicator =>self.state =GraphemeState::Regional,
487-
gr::GC_E_Modifier =>self.state =GraphemeState::Emoji,
490+
gr::GC_Extended_Pictographic =>self.state =GraphemeState::Emoji,
488491
_ => need_pre_context =self.cat_before.is_none(),
489492
}
490493
if need_pre_context{

‎src/test.rs‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ fn test_graphemes() {
3737
// cartwheel emoji followed by two fitzpatrick skin tone modifiers
3838
// (test case from issue #19)
3939
("\u{1F938}\u{1F3FE}\u{1F3FE}",
40-
&["\u{1F938}\u{1F3FE}","\u{1F3FE}"]),
40+
&["\u{1F938}\u{1F3FE}\u{1F3FE}"]),
4141
];
4242

4343
for&(s, g)inTEST_SAME.iter().chain(EXTRA_SAME){

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp