Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Add mixed-script detection#6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Manishearth merged 3 commits intomasterfrommixed
Jan 1, 2020
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletionsCargo.toml
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -16,6 +16,7 @@ according to Unicode Technical Standard #39 rules.
exclude = [ "target/*", "Cargo.lock" ]

[dependencies]
unicode-script = { git = "https://github.com/unicode-rs/unicode-script", default-features = false }
std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
compiler_builtins = { version = "0.1", optional = true }
Expand Down
20 changes: 20 additions & 0 deletionssrc/general_security_profile.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
//! Utilities for working with the [General Security Profile](https://www.unicode.org/reports/tr39/#General_Security_Profile)
//! for identifiers

use crate::tables::identifier_status as is;

/// Methods for determining characters not restricted from use for identifiers.
pub trait GeneralSecurityProfile {
/// Returns whether the character is not restricted from use for identifiers.
fn identifier_allowed(self) -> bool;
}

impl GeneralSecurityProfile for char {
#[inline]
fn identifier_allowed(self) -> bool { is::identifier_status_allowed(self) }
}

impl GeneralSecurityProfile for &'_ str {
#[inline]
fn identifier_allowed(self) -> bool { self.chars().all(is::identifier_status_allowed) }
}
23 changes: 9 additions & 14 deletionssrc/lib.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -15,7 +15,7 @@
//! ```rust
//! extern crate unicode_security;
//!
//! use unicode_security::IdentifierStatusChar;
//! use unicode_security::GeneralSecurityProfile;
//!
//! fn main() {
//! let ch = 'µ'; // U+00B5 MICRO SIGN
Expand DownExpand Up@@ -55,21 +55,16 @@ extern crate std;
#[cfg(feature = "bench")]
extern crate test;

use tables::identifier_status as is;
pub use tables::UNICODE_VERSION;

mod tables;
pub mod mixed_script;
pub mod general_security_profile;

#[cfg(test)]
mod tests;
pub use mixed_script::MixedScript;
pub use general_security_profile::GeneralSecurityProfile;

/// Methods for determining characters not restricted from use for identifiers.
pub trait UnicodeIdentifierStatus {
/// Returns whether the character is not restricted from use for identifiers.
fn identifier_allowed(self) -> bool;
}
#[rustfmt::skip]
pub(crate) mod tables;

impl UnicodeIdentifierStatus for char {
#[inline]
fn identifier_allowed(self) -> bool { is::identifier_status_allowed(self) }
}
#[cfg(test)]
mod tests;
129 changes: 129 additions & 0 deletionssrc/mixed_script.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
//! [Mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)

use unicode_script::{Script, ScriptExtension};

/// An Augmented script set, as defined by UTS 39
///
/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
pub struct AugmentedScriptSet {
/// The base ScriptExtension value
pub base: ScriptExtension,
/// Han With Bopomofo
pub hanb: bool,
/// Japanese
pub jpan: bool,
/// Korean
pub kore: bool,
}

impl From<ScriptExtension> for AugmentedScriptSet {
fn from(ext: ScriptExtension) -> Self {
let mut hanb = false;
let mut jpan = false;
let mut kore = false;

if ext == ScriptExtension::Single(Script::Common) ||
ext == ScriptExtension::Single(Script::Inherited) ||
ext.contains_script(Script::Han) {
hanb = true;
jpan = true;
kore = true;
} else {
if ext.contains_script(Script::Hiragana) || ext.contains_script(Script::Katakana) {
jpan = true;
}

if ext.contains_script(Script::Hangul) {
kore = true;
}

if ext.contains_script(Script::Bopomofo) {
hanb = true;
}
}
Self {
base: ext,
hanb, jpan, kore
}
}
}

impl From<char> for AugmentedScriptSet {
fn from(c: char) -> Self {
AugmentedScriptSet::for_char(c)
}
}

impl From<&'_ str> for AugmentedScriptSet {
fn from(s: &'_ str) -> Self {
AugmentedScriptSet::for_str(s)
}
}

impl Default for AugmentedScriptSet {
fn default() -> Self {
AugmentedScriptSet {
base: ScriptExtension::Single(Script::Common),
hanb: true,
jpan: true,
kore: true,
}
}
}

impl AugmentedScriptSet {
/// Intersect this set with another
pub fn intersect(mut self, other: Self) -> Self {
self.base = self.base.intersect(other.base);
self.hanb = self.hanb && other.hanb;
self.jpan = self.jpan && other.jpan;
self.kore = self.kore && other.kore;
self
}

/// Check if the set is empty
pub fn is_empty(&self) -> bool {
self.base.is_empty() && ! self.hanb && !self.jpan && !self.kore
}

/// Check if the set is "All" (Common or Inherited)
pub fn is_all(&self) -> bool {
self.base == ScriptExtension::Single(Script::Common) ||
self.base == ScriptExtension::Single(Script::Inherited)
}

/// Construct an AugmentedScriptSet for a given character
pub fn for_char(c: char) -> Self {
ScriptExtension::from(c).into()
}

/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
pub fn for_str(s: &str) -> Self {
let mut set = AugmentedScriptSet::default();
for ch in s.chars() {
set = set.intersect(ch.into())
}
set
}
}

/// Extension trait for [mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
pub trait MixedScript {
/// Check if a string is [single-script](https://www.unicode.org/reports/tr39/#def-single-script)
///
/// Note that a single-script string may still contain multiple Script properties!
fn is_single_script(self) -> bool;

/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
fn resolve_script_set(self) -> AugmentedScriptSet;
}

impl MixedScript for &'_ str {
fn is_single_script(self) -> bool {
!AugmentedScriptSet::for_str(self).is_empty()
}

fn resolve_script_set(self) -> AugmentedScriptSet {
self.into()
}
}
14 changes: 7 additions & 7 deletionssrc/tests.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -10,19 +10,19 @@

#[test]
fn test_char() {
usesuper::IdentifierStatusChar;
assert_eq!(IdentifierStatusChar::identifier_allowed('A'), true);
usecrate::GeneralSecurityProfile;
assert_eq!(GeneralSecurityProfile::identifier_allowed('A'), true);
assert_eq!('A'.identifier_allowed(), true);
assert_eq!(IdentifierStatusChar::identifier_allowed('0'), true);
assert_eq!(GeneralSecurityProfile::identifier_allowed('0'), true);
assert_eq!('0'.identifier_allowed(), true);
assert_eq!(IdentifierStatusChar::identifier_allowed('_'), true);
assert_eq!(GeneralSecurityProfile::identifier_allowed('_'), true);
assert_eq!('_'.identifier_allowed(), true);
assert_eq!(IdentifierStatusChar::identifier_allowed('\x00'), false);
assert_eq!(GeneralSecurityProfile::identifier_allowed('\x00'), false);
assert_eq!('\x00'.identifier_allowed(), false);
// U+00B5 MICRO SIGN
assert_eq!(IdentifierStatusChar::identifier_allowed('µ'), false);
assert_eq!(GeneralSecurityProfile::identifier_allowed('µ'), false);
assert_eq!('µ'.identifier_allowed(), false);
// U+2160 ROMAN NUMERAL ONE
assert_eq!(IdentifierStatusChar::identifier_allowed('Ⅰ'), false);
assert_eq!(GeneralSecurityProfile::identifier_allowed('Ⅰ'), false);
assert_eq!('Ⅰ'.identifier_allowed(), false);
}

[8]ページ先頭

©2009-2025 Movatter.jp