Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4347629

Browse files
Don't handle noncharacters differently than other unassigned codepoints
1 parent0b13808 commit4347629

File tree

4 files changed

+4
-31
lines changed

4 files changed

+4
-31
lines changed

‎scripts/unicode.py‎

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def _load_unicode_data(self):
106106

107107
# Characters that cannot be part of a combining character sequence:
108108
# control characters, format characters other than ZWJ and ZWNJ,
109-
# the line and paragraph separators, and noncharacters.
109+
#andthe line and paragraph separators.
110110
self.not_in_ccs= []
111111

112112
assigned_start=0;
@@ -147,14 +147,6 @@ def _load_unicode_data(self):
147147

148148
self.general_category_public_assigned.append((assigned_start,prev_char_int))
149149

150-
# Mark noncharacters as nongraphic
151-
foriinrange(0xFDD0,0xFDF0):
152-
self.not_in_ccs.append(i)
153-
forprefixinrange(0,0x11):
154-
shifted=prefix<<16
155-
self.not_in_ccs.append(shifted|0xFFFE)
156-
self.not_in_ccs.append(shifted|0xFFFF)
157-
158150
self.not_in_ccs.sort()
159151

160152
def_load_default_ignorable_marks(self):

‎src/correct_ccs.rs‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ impl CcsKind {
4141
/// [defective combining character sequences](https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#I6.1.36487)
4242
/// by inserting U+00A0 NO-BREAK SPACE in front of them.
4343
///
44-
/// For the purposes of this iterator, private use characters,
45-
/// as well as unassigned codepoints other than noncharacters,
44+
/// For the purposes of this iterator, private use characters and unassigned codepoints
4645
/// are considered valid base characters,
4746
/// so combining character sequences that follow such will not be modified.
4847
///

‎src/lib.rs‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
139139
/// with the correct advance width,
140140
/// in diverse contexts (for example, when printed to a terminal).
141141
///
142-
/// Sequences following a private use character or an unassigned codepoint that is not a noncharacter
142+
/// Sequences following a private use character or an unassigned codepoint
143143
/// are not corrected. Additionally, combining character sequences consisting entirely of
144144
/// [default-ignorable code points](https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#I8.1.40715)
145145
/// are also left untouched. Handling this last case may require the iterator

‎src/tables.rs‎

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20973,33 +20973,15 @@ pub fn not_in_ccs(c: char) -> bool {
2097320973
| '\u{2028}'..='\u{202E}'
2097420974
| '\u{2060}'..='\u{2064}'
2097520975
| '\u{2066}'..='\u{206F}'
20976-
| '\u{FDD0}'..='\u{FDEF}'
2097720976
| '\u{FEFF}'
2097820977
| '\u{FFF9}'..='\u{FFFB}'
20979-
| '\u{FFFE}'..='\u{FFFF}'
2098020978
| '\u{110BD}'
2098120979
| '\u{110CD}'
2098220980
| '\u{13430}'..='\u{1343F}'
2098320981
| '\u{1BCA0}'..='\u{1BCA3}'
2098420982
| '\u{1D173}'..='\u{1D17A}'
20985-
| '\u{1FFFE}'..='\u{1FFFF}'
20986-
| '\u{2FFFE}'..='\u{2FFFF}'
20987-
| '\u{3FFFE}'..='\u{3FFFF}'
20988-
| '\u{4FFFE}'..='\u{4FFFF}'
20989-
| '\u{5FFFE}'..='\u{5FFFF}'
20990-
| '\u{6FFFE}'..='\u{6FFFF}'
20991-
| '\u{7FFFE}'..='\u{7FFFF}'
20992-
| '\u{8FFFE}'..='\u{8FFFF}'
20993-
| '\u{9FFFE}'..='\u{9FFFF}'
20994-
| '\u{AFFFE}'..='\u{AFFFF}'
20995-
| '\u{BFFFE}'..='\u{BFFFF}'
20996-
| '\u{CFFFE}'..='\u{CFFFF}'
20997-
| '\u{DFFFE}'..='\u{DFFFF}'
2099820983
| '\u{E0001}'
20999-
| '\u{E0020}'..='\u{E007F}'
21000-
| '\u{EFFFE}'..='\u{EFFFF}'
21001-
| '\u{FFFFE}'..='\u{FFFFF}'
21002-
| '\u{10FFFE}'..='\u{10FFFF}' => true,
20984+
| '\u{E0020}'..='\u{E007F}' => true,
2100320985
_ => false,
2100420986
}
2100520987
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp