NotificationsYou must be signed in to change notification settings
Fork32
Star278

Commit74c8394

authored

Merge pull request#42 from Jules-Bertholet/more-normalization-tests

Add more canonical equivalence tests

2 parents8092f84 +decf378 commit74c8394Copy full SHA for 74c8394

File tree

5 files changed

+19199

-28

lines changed

.github/workflows
- rust.yml
Cargo.toml
scripts
- unicode.py
tests
- NormalizationTest.txt
- tests.rs

5 files changed

+19199

-28

lines changed

`‎.github/workflows/rust.yml‎`

Lines changed: 5 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@ jobs:`
`17`	`17`	`build:`
`18`	`18`	`runs-on:ubuntu-latest`
`19`	`19`	`steps:`
`20`		`- -uses:actions/checkout@v3`
	`20`	`+ -uses:actions/checkout@v4`
`21`	`21`	`-name:Build`
`22`	`22`	`run:cargo build --verbose`
`23`	`23`	`-name:Run tests`
`@@ -28,14 +28,15 @@ jobs:`
`28`	`28`	`run:cargo fmt --check`
`29`	`29`	`-name:Check clippy`
`30`	`30`	`run:cargo clippy --lib --tests`
	`31`	`+`
`31`	`32`	`regen:`
`32`	`33`	`runs-on:ubuntu-latest`
`33`	`34`	`steps:`
`34`		`- -uses:actions/checkout@v3`
	`35`	`+ -uses:actions/checkout@v4`
`35`	`36`	`-uses:actions/setup-python@v5`
`36`	`37`	`with:`
`37`	`38`	`python-version:'3.12'`
`38`	`39`	`-name:Regen`
`39`		`-run:cd scripts && python3 unicode.py`
	`40`	`+run:rm tests/NormalizationTest.txt &&cd scripts && python3 unicode.py`
`40`	`41`	`-name:Diff`
`41`		`-run:diff src/tables.rs scripts/tables.rs`
	`42`	`+run:git update-index --refresh && gitdiff-index --quiet HEAD --`

`‎Cargo.toml‎`

Lines changed: 0 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -31,9 +31,6 @@ std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }`
`31`	`31`	`core = {version ="1.0",package ="rustc-std-workspace-core",optional =true }`
`32`	`32`	`compiler_builtins = {version ="0.1",optional =true }`
`33`	`33`
`34`		`-[dev-dependencies]`
`35`		`-unicode-normalization ="0.1.23"`
`36`		`-`
`37`	`34`	`[features]`
`38`	`35`	`default = []`
`39`	`36`	`rustc-dep-of-std = ['std','core','compiler_builtins']`

`‎scripts/unicode.py‎`

Lines changed: 23 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -27,9 +27,13 @@`
`27`	`27`	`importos`
`28`	`28`	`importre`
`29`	`29`	`importsys`
	`30`	`+importurllib.request`
`30`	`31`	`fromcollectionsimportdefaultdict`
`31`	`32`	`fromitertoolsimportbatched`
`32`	`33`
	`34`	`+UNICODE_VERSION="15.1.0"`
	`35`	`+"""The version of the Unicode data files to download."""`
	`36`	`+`
`33`	`37`	`NUM_CODEPOINTS=0x110000`
`34`	`38`	"""An upper bound for which `range(0, NUM_CODEPOINTS)` contains Unicode's codespace."""
`35`	`39`
`@@ -61,24 +65,28 @@ class OffsetType(enum.IntEnum):`
`61`	`65`
`62`	`66`	If this is edited, you must ensure that `emit_module` reflects your changes."""
`63`	`67`
`64`		`-MODULE_FILENAME="tables.rs"`
`65`		`-"""Thefilename of the emitted Rust module (will be created in the working directory)"""`
	`68`	`+MODULE_PATH="../src/tables.rs"`
	`69`	`+"""Thepath of the emitted Rust module (relative to the working directory)"""`
`66`	`70`
`67`	`71`	`Codepoint=int`
`68`	`72`	`BitPos=int`
`69`	`73`
`70`	`74`
`71`		`-deffetch_open(filename:str):`
	`75`	`+deffetch_open(filename:str,local_prefix:str=""):`
`72`	`76`	"""Opens `filename` and return its corresponding file object. If `filename` isn't on disk,
`73`		- fetches it from `http://www.unicode.org/Public/UNIDATA/`. Exits with code 1 on failure.
	`77`	+ fetches it from `https://www.unicode.org/Public/`. Exits with code 1 on failure.
`74`	`78`	`"""`
`75`	`79`	`basename=os.path.basename(filename)`
`76`		`-ifnotos.path.exists(basename):`
`77`		`-os.system(f"curl -O http://www.unicode.org/Public/UNIDATA/{filename}")`
	`80`	`+localname=os.path.join(local_prefix,basename)`
	`81`	`+ifnotos.path.exists(localname):`
	`82`	`+urllib.request.urlretrieve(`
	`83`	`+f"https://www.unicode.org/Public/{UNICODE_VERSION}/ucd/{filename}",`
	`84`	`+localname,`
	`85`	`+ )`
`78`	`86`	`try:`
`79`		`-returnopen(basename,encoding="utf-8")`
	`87`	`+returnopen(localname,encoding="utf-8")`
`80`	`88`	`exceptOSError:`
`81`		`-sys.stderr.write(f"cannot load{basename}")`
	`89`	`+sys.stderr.write(f"cannot load{localname}")`
`82`	`90`	`sys.exit(1)`
`83`	`91`
`84`	`92`
`@@ -637,7 +645,7 @@ def emit_module(`
`637`	`645`	`module.write("}\n")`
`638`	`646`
`639`	`647`
`640`		`-defmain(module_filename:str):`
	`648`	`+defmain(module_path:str):`
`641`	`649`	`"""Obtain character data from the latest version of Unicode, transform it into a multi-level`
`642`	`650`	`lookup table for character width, and write a Rust module utilizing that table to`
`643`	`651`	`module_filename`.
`@@ -677,6 +685,9 @@ def main(module_filename: str):`
`677`	`685`	`emoji_variations=load_variation_sequences()`
`678`	`686`	`variation_table=make_variation_sequence_table(emoji_variations,width_map)`
`679`	`687`
	`688`	`+# Download normalization test file for use by tests`
	`689`	`+fetch_open("NormalizationTest.txt","../tests/")`
	`690`	`+`
`680`	`691`	`print("------------------------")`
`681`	`692`	`total_size=0`
`682`	`693`	`fori,tableinenumerate(tables):`
`@@ -692,9 +703,9 @@ def main(module_filename: str):`
`692`	`703`	`print("------------------------")`
`693`	`704`	`print(f" Total size:{total_size} bytes")`
`694`	`705`
`695`		`-emit_module(module_filename,version,tables,variation_table)`
`696`		`-print(f'Wrote to "{module_filename}"')`
	`706`	`+emit_module(module_path,version,tables,variation_table)`
	`707`	`+print(f'Wrote to "{module_path}"')`
`697`	`708`
`698`	`709`
`699`	`710`	`if__name__=="__main__":`
`700`		`-main(MODULE_FILENAME)`
	`711`	`+main(MODULE_PATH)`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit74c8394

File tree

5 files changed

5 files changed

`‎.github/workflows/rust.yml‎`

`‎Cargo.toml‎`

`‎scripts/unicode.py‎`

0 commit comments