Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit79687ca

Browse files
authored
Merge pull request#100 from unicode-rs/safety-comments
Add safety comments
2 parents71a54fa +a97388a commit79687ca

File tree

1 file changed

+26
-3
lines changed

1 file changed

+26
-3
lines changed

‎src/normalize.rs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ where
7171
}
7272

7373
#[inline]
74+
#[allow(unsafe_code)]
7475
fndecompose<D,F>(c:char,decompose_char:D,mutemit_char:F)
7576
where
7677
D:Fn(char) ->Option<&'static[char]>,
@@ -84,7 +85,10 @@ where
8485

8586
// Perform decomposition for Hangul
8687
ifis_hangul_syllable(c){
87-
decompose_hangul(c, emit_char);
88+
// Safety: Hangul Syllables invariant checked by is_hangul_syllable above
89+
unsafe{
90+
decompose_hangul(c, emit_char);
91+
}
8892
return;
8993
}
9094

@@ -127,27 +131,37 @@ const T_LAST: u32 = T_BASE + T_COUNT - 1;
127131
// i.e. `T_BASE + 1 ..= T_LAST`.
128132
constT_FIRST:u32 =T_BASE +1;
129133

134+
// Safety-usable invariant: This ensures that c is a valid Hangul Syllable character (U+AC00..U+D7AF)
130135
pub(crate)fnis_hangul_syllable(c:char) ->bool{
136+
// Safety: This checks the range 0xAC00 (S_BASE) to 0xD7A4 (S_BASE + S_COUNT), upholding the safety-usable invariant
131137
(casu32) >=S_BASE &&(casu32) <(S_BASE +S_COUNT)
132138
}
133139

134140
// Decompose a precomposed Hangul syllable
135-
#[allow(unsafe_code)]
141+
// Safety: `s` MUST be a valid Hangul Syllable character, between U+AC00..U+D7AF
142+
#[allow(unsafe_code, unused_unsafe)]
136143
#[inline(always)]
137-
fndecompose_hangul<F>(s:char,mutemit_char:F)
144+
unsafefndecompose_hangul<F>(s:char,mutemit_char:F)
138145
where
139146
F:FnMut(char),
140147
{
148+
// This will be at most 0x2baf, the size of the Hangul Syllables block
141149
let s_index = sasu32 -S_BASE;
150+
// This will be at most 0x2baf / (21 * 28), 19
142151
let l_index = s_index /N_COUNT;
143152
unsafe{
153+
// Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
144154
emit_char(char::from_u32_unchecked(L_BASE + l_index));
145155

156+
// Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21
146157
let v_index =(s_index %N_COUNT) /T_COUNT;
158+
// Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
147159
emit_char(char::from_u32_unchecked(V_BASE + v_index));
148160

161+
// Safety: This will be at most T_COUNT - 1 (27)
149162
let t_index = s_index %T_COUNT;
150163
if t_index >0{
164+
// Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
151165
emit_char(char::from_u32_unchecked(T_BASE + t_index));
152166
}
153167
}
@@ -173,14 +187,23 @@ fn compose_hangul(a: char, b: char) -> Option<char> {
173187
match(a, b){
174188
// Compose a leading consonant and a vowel together into an LV_Syllable
175189
(L_BASE..=L_LAST,V_BASE..=V_LAST) =>{
190+
// Safety: based on the above bounds, l_index will be less than or equal to L_COUNT (19)
191+
// and v_index will be <= V_COUNT (21)
176192
let l_index = a -L_BASE;
177193
let v_index = b -V_BASE;
194+
// Safety: This will be <= 19 * (20 * 21) + (21 * 20), which is 8400.
178195
let lv_index = l_index*N_COUNT + v_index*T_COUNT;
196+
// Safety: This is between 0xAC00 and 0xCCD0, which are in range for Hangul Syllables (U+AC00..U+D7AF) and also in range
197+
// for BMP unicode
179198
let s =S_BASE + lv_index;
199+
// Safety: We've verified this is in-range
180200
Some(unsafe{ char::from_u32_unchecked(s)})
181201
}
182202
// Compose an LV_Syllable and a trailing consonant into an LVT_Syllable
183203
(S_BASE..=S_LAST,T_FIRST..=T_LAST)if(a -S_BASE) %T_COUNT ==0 =>{
204+
// Safety: a is between 0xAC00 and (0xAC00 + 19 * 21 * 28). b - T_BASE is between 0 and 19.
205+
// Adding a number 0 to 19 to a number that is at largest 0xD7A4 will not go out of bounds to 0xD800 (where the
206+
// surrogates start), so this is safe.
184207
Some(unsafe{ char::from_u32_unchecked(a +(b -T_BASE))})
185208
}
186209
_ =>None,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp