Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit74c8394

Browse files
authored
Merge pull request#42 from Jules-Bertholet/more-normalization-tests
Add more canonical equivalence tests
2 parents8092f84 +decf378 commit74c8394

File tree

5 files changed

+19199
-28
lines changed

5 files changed

+19199
-28
lines changed

‎.github/workflows/rust.yml‎

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
build:
1818
runs-on:ubuntu-latest
1919
steps:
20-
-uses:actions/checkout@v3
20+
-uses:actions/checkout@v4
2121
-name:Build
2222
run:cargo build --verbose
2323
-name:Run tests
@@ -28,14 +28,15 @@ jobs:
2828
run:cargo fmt --check
2929
-name:Check clippy
3030
run:cargo clippy --lib --tests
31+
3132
regen:
3233
runs-on:ubuntu-latest
3334
steps:
34-
-uses:actions/checkout@v3
35+
-uses:actions/checkout@v4
3536
-uses:actions/setup-python@v5
3637
with:
3738
python-version:'3.12'
3839
-name:Regen
39-
run:cd scripts && python3 unicode.py
40+
run:rm tests/NormalizationTest.txt &&cd scripts && python3 unicode.py
4041
-name:Diff
41-
run:diff src/tables.rs scripts/tables.rs
42+
run:git update-index --refresh && gitdiff-index --quiet HEAD --

‎Cargo.toml‎

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
3131
core = {version ="1.0",package ="rustc-std-workspace-core",optional =true }
3232
compiler_builtins = {version ="0.1",optional =true }
3333

34-
[dev-dependencies]
35-
unicode-normalization ="0.1.23"
36-
3734
[features]
3835
default = []
3936
rustc-dep-of-std = ['std','core','compiler_builtins']

‎scripts/unicode.py‎

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,13 @@
2727
importos
2828
importre
2929
importsys
30+
importurllib.request
3031
fromcollectionsimportdefaultdict
3132
fromitertoolsimportbatched
3233

34+
UNICODE_VERSION="15.1.0"
35+
"""The version of the Unicode data files to download."""
36+
3337
NUM_CODEPOINTS=0x110000
3438
"""An upper bound for which `range(0, NUM_CODEPOINTS)` contains Unicode's codespace."""
3539

@@ -61,24 +65,28 @@ class OffsetType(enum.IntEnum):
6165
6266
If this is edited, you must ensure that `emit_module` reflects your changes."""
6367

64-
MODULE_FILENAME="tables.rs"
65-
"""Thefilename of the emitted Rust module (will be created in the working directory)"""
68+
MODULE_PATH="../src/tables.rs"
69+
"""Thepath of the emitted Rust module (relative to the working directory)"""
6670

6771
Codepoint=int
6872
BitPos=int
6973

7074

71-
deffetch_open(filename:str):
75+
deffetch_open(filename:str,local_prefix:str=""):
7276
"""Opens `filename` and return its corresponding file object. If `filename` isn't on disk,
73-
fetches it from `http://www.unicode.org/Public/UNIDATA/`. Exits with code 1 on failure.
77+
fetches it from `https://www.unicode.org/Public/`. Exits with code 1 on failure.
7478
"""
7579
basename=os.path.basename(filename)
76-
ifnotos.path.exists(basename):
77-
os.system(f"curl -O http://www.unicode.org/Public/UNIDATA/{filename}")
80+
localname=os.path.join(local_prefix,basename)
81+
ifnotos.path.exists(localname):
82+
urllib.request.urlretrieve(
83+
f"https://www.unicode.org/Public/{UNICODE_VERSION}/ucd/{filename}",
84+
localname,
85+
)
7886
try:
79-
returnopen(basename,encoding="utf-8")
87+
returnopen(localname,encoding="utf-8")
8088
exceptOSError:
81-
sys.stderr.write(f"cannot load{basename}")
89+
sys.stderr.write(f"cannot load{localname}")
8290
sys.exit(1)
8391

8492

@@ -637,7 +645,7 @@ def emit_module(
637645
module.write("}\n")
638646

639647

640-
defmain(module_filename:str):
648+
defmain(module_path:str):
641649
"""Obtain character data from the latest version of Unicode, transform it into a multi-level
642650
lookup table for character width, and write a Rust module utilizing that table to
643651
`module_filename`.
@@ -677,6 +685,9 @@ def main(module_filename: str):
677685
emoji_variations=load_variation_sequences()
678686
variation_table=make_variation_sequence_table(emoji_variations,width_map)
679687

688+
# Download normalization test file for use by tests
689+
fetch_open("NormalizationTest.txt","../tests/")
690+
680691
print("------------------------")
681692
total_size=0
682693
fori,tableinenumerate(tables):
@@ -692,9 +703,9 @@ def main(module_filename: str):
692703
print("------------------------")
693704
print(f" Total size:{total_size} bytes")
694705

695-
emit_module(module_filename,version,tables,variation_table)
696-
print(f'Wrote to "{module_filename}"')
706+
emit_module(module_path,version,tables,variation_table)
707+
print(f'Wrote to "{module_path}"')
697708

698709

699710
if__name__=="__main__":
700-
main(MODULE_FILENAME)
711+
main(MODULE_PATH)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp