Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Support Identifier Type#7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
crlf0710 merged 3 commits intomasterfromidentifiertype
Jan 1, 2020
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 92 additions & 35 deletionsscripts/unicode.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -47,37 +47,39 @@ def fetch(f):
sys.stderr.write("cannot load %s\n" % f)
exit(1)

# load identifier status data
def load_identifier_status():
f = "IdentifierStatus.txt"
# Implementation from unicode-segmentation
def load_properties(f, interestingprops = None):
fetch(f)
statuses =[]
re1 = re.compile("^([0-9A-F]+)+; +(\w+)")
re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+)+; +(\w+)")
props ={}
re1 = re.compile(r"^ *([0-9A-F]+)*; *(\w+)")
re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+)*; *(\w+)")

for line in fileinput.input(f):
for line in fileinput.input(os.path.basename(f)):
prop = None
d_lo = 0
d_hi = 0
cat = None
m = re1.match(line)
if m:
d_lo = m.group(1)
d_hi = m.group(1)
cat = m.group(2)
prop = m.group(2).strip()
else:
m = re2.match(line)
if m:
d_lo = m.group(1)
d_hi = m.group(2)
cat = m.group(3)
prop = m.group(3).strip()
else:
continue
ifcat != "Allowed":
ifinterestingprops and prop not in interestingprops:
continue
d_lo = int(d_lo, 16)
d_hi = int(d_hi, 16)
statuses.append((d_lo, d_hi))
return statuses
if prop not in props:
props[prop] = []
props[prop].append((d_lo, d_hi))

return props

def format_table_content(f, content, indent):
line = " "*indent
Expand DownExpand Up@@ -115,41 +117,95 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
format_table_content(f, data, 8)
f.write("\n ];\n\n")

defemit_identifier_status_module(f, statuses_table):
f.write("pub modidentifier_status {")
defemit_identifier_module(f):
f.write("pub modidentifier {")
f.write("""
use core::result::Result::{Ok, Err};

#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
#[allow(non_camel_case_types)]
/// https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type
pub enum IdentifierType {
// Restricted
Not_Character,
Deprecated,
Default_Ignorable,
Not_NFKC,
Not_XID,
Exclusion,
Obsolete,
Technical,
Uncommon_Use,
Limited_Use,

// Allowed
Inclusion,
Recommended
}
#[inline]
fn bsearch_range_value_table(c: char, r: &'static [(char, char)]) -> bool {
use core::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Ok(_) => true,
Err(_) => false
pub fn identifier_status_allowed(c: char) -> bool {
// FIXME: do we want to special case ASCII here?
match c as usize {
_ => super::util::bsearch_range_table(c, IDENTIFIER_STATUS)
}
}
""")

f.write("""
#[inline]
pub fnidentifier_status_allowed(c: char) ->bool {
pub fnidentifier_type(c: char) ->Option<IdentifierType> {
// FIXME: do we want to special case ASCII here?
match c as usize {
_ => bsearch_range_value_table(c,identifier_status_table)
_ =>super::util::bsearch_range_value_table(c,IDENTIFIER_TYPE)
}
}

""")

f.write(" // identifier status table.\n")
emit_table(f, "identifier_status_table", statuses_table, "&'static [(char, char)]", is_pub=False,
f.write(" // Identifier status table:\n")
identifier_status_table = load_properties("IdentifierStatus.txt")
emit_table(f, "IDENTIFIER_STATUS", identifier_status_table['Allowed'], "&'static [(char, char)]", is_pub=False,
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])))
identifier_type = load_properties("IdentifierType.txt")
type_table = []
for ty in identifier_type:
type_table.extend([(x, y, ty) for (x, y) in identifier_type[ty]])

type_table.sort(key=lambda w: w[0])

emit_table(f, "IDENTIFIER_TYPE", type_table, "&'static [(char, char, IdentifierType)]", is_pub=False,
pfun=lambda x: "(%s,%s, IdentifierType::%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))
f.write("}\n\n")

def emit_util_mod(f):
f.write("""
pub mod util {
use core::result::Result::{Ok, Err};
#[inline]
pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
use core::cmp::Ordering::{Equal, Less, Greater};
r.binary_search_by(|&(lo,hi)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}).is_ok()
}

pub fn bsearch_range_value_table<T: Copy>(c: char, r: &'static [(char, char, T)]) -> Option<T> {
use core::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Ok(idx) => {
let (_, _, cat) = r[idx];
Some(cat)
}
Err(_) => None
}
}

}

""")

if __name__ == "__main__":
r = "tables.rs"
if os.path.exists(r):
Expand All@@ -164,6 +220,7 @@ def emit_identifier_status_module(f, statuses_table):
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);

""" % UNICODE_VERSION)
### identifier status module
identifier_status_table = load_identifier_status()
emit_identifier_status_module(rf, identifier_status_table)

emit_util_mod(rf)
### identifier module
emit_identifier_module(rf)
15 changes: 9 additions & 6 deletionssrc/general_security_profile.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
//! Utilities for working with the [General Security Profile](https://www.unicode.org/reports/tr39/#General_Security_Profile)
//! for identifiers

use crate::tables::identifier_status as is;
use crate::tables::identifier;

pub use identifier::IdentifierType;

/// Methods for determining characters not restricted from use for identifiers.
pub trait GeneralSecurityProfile {
/// Returns whether the character is not restricted from use for identifiers.
fn identifier_allowed(self) -> bool;

/// Returns the [identifier type](https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type)
fn identifier_type(self) -> Option<IdentifierType>;
}

impl GeneralSecurityProfile for char {
#[inline]
fn identifier_allowed(self) -> bool { is::identifier_status_allowed(self) }
}

impl GeneralSecurityProfile for &'_ str {
fn identifier_allowed(self) -> bool { identifier::identifier_status_allowed(self) }
#[inline]
fn identifier_allowed(self) -> bool { self.chars().all(is::identifier_status_allowed) }
fn identifier_type(self) -> Option<IdentifierType> { identifier::identifier_type(self) }

}
Loading

[8]ページ先頭

©2009-2025 Movatter.jp