Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Add safety comments#100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Manishearth merged 1 commit intomasterfromsafety-comments
May 7, 2024
Merged
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletionssrc/normalize.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -71,6 +71,7 @@ where
}

#[inline]
#[allow(unsafe_code)]
fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F)
where
D: Fn(char) -> Option<&'static [char]>,
Expand All@@ -84,7 +85,10 @@ where

// Perform decomposition for Hangul
if is_hangul_syllable(c) {
decompose_hangul(c, emit_char);
// Safety: Hangul Syllables invariant checked by is_hangul_syllable above
unsafe {
decompose_hangul(c, emit_char);
}
return;
}

Expand DownExpand Up@@ -127,27 +131,37 @@ const T_LAST: u32 = T_BASE + T_COUNT - 1;
// i.e. `T_BASE + 1 ..= T_LAST`.
const T_FIRST: u32 = T_BASE + 1;

// Safety-usable invariant: This ensures that c is a valid Hangul Syllable character (U+AC00..U+D7AF)
pub(crate) fn is_hangul_syllable(c: char) -> bool {
// Safety: This checks the range 0xAC00 (S_BASE) to 0xD7A4 (S_BASE + S_COUNT), upholding the safety-usable invariant
(c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT)
}

// Decompose a precomposed Hangul syllable
#[allow(unsafe_code)]
// Safety: `s` MUST be a valid Hangul Syllable character, between U+AC00..U+D7AF
#[allow(unsafe_code, unused_unsafe)]
#[inline(always)]
fn decompose_hangul<F>(s: char, mut emit_char: F)
unsafefn decompose_hangul<F>(s: char, mut emit_char: F)
where
F: FnMut(char),
{
// This will be at most 0x2baf, the size of the Hangul Syllables block
let s_index = s as u32 - S_BASE;
// This will be at most 0x2baf / (21 * 28), 19
let l_index = s_index / N_COUNT;
unsafe {
// Safety: L_BASE (0x1100) plus at most 19 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
emit_char(char::from_u32_unchecked(L_BASE + l_index));

// Safety: This will be at most (N_COUNT - 1) / T_COUNT = (V*T - 1) / T, which gives us an upper bound of V_COUNT = 21
let v_index = (s_index % N_COUNT) / T_COUNT;
// Safety: V_BASE (0x1161) plus at most 21 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
emit_char(char::from_u32_unchecked(V_BASE + v_index));

// Safety: This will be at most T_COUNT - 1 (27)
let t_index = s_index % T_COUNT;
if t_index > 0 {
// Safety: T_BASE (0x11A7) plus at most 27 is still going to be in range for a valid Unicode code point in the BMP (< 0xD800)
emit_char(char::from_u32_unchecked(T_BASE + t_index));
}
}
Expand All@@ -173,14 +187,23 @@ fn compose_hangul(a: char, b: char) -> Option<char> {
match (a, b) {
// Compose a leading consonant and a vowel together into an LV_Syllable
(L_BASE..=L_LAST, V_BASE..=V_LAST) => {
// Safety: based on the above bounds, l_index will be less than or equal to L_COUNT (19)
// and v_index will be <= V_COUNT (21)
let l_index = a - L_BASE;
let v_index = b - V_BASE;
// Safety: This will be <= 19 * (20 * 21) + (21 * 20), which is 8400.
let lv_index = l_index * N_COUNT + v_index * T_COUNT;
// Safety: This is between 0xAC00 and 0xCCD0, which are in range for Hangul Syllables (U+AC00..U+D7AF) and also in range
// for BMP unicode
let s = S_BASE + lv_index;
// Safety: We've verified this is in-range
Some(unsafe { char::from_u32_unchecked(s) })
}
// Compose an LV_Syllable and a trailing consonant into an LVT_Syllable
(S_BASE..=S_LAST, T_FIRST..=T_LAST) if (a - S_BASE) % T_COUNT == 0 => {
// Safety: a is between 0xAC00 and (0xAC00 + 19 * 21 * 28). b - T_BASE is between 0 and 19.
// Adding a number 0 to 19 to a number that is at largest 0xD7A4 will not go out of bounds to 0xD800 (where the
// surrogates start), so this is safe.
Some(unsafe { char::from_u32_unchecked(a + (b - T_BASE)) })
}
_ => None,
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp