Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Treat emoji presentation sequences as fullwidth#35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Closed
Jules-Bertholet wants to merge13 commits intounicode-rs:masterfromJules-Bertholet:emoji-presentation
Closed
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
13 commits
Select commitHold shift + click to select a range
130f3fd
Treat emoji presentation sequences as fullwidth
Jules-BertholetFeb 10, 2024
6bd8215
emoji presentation: store single codepoints instead of ranges
Jules-BertholetFeb 14, 2024
a4d25a9
Use a better datastructure
Jules-BertholetFeb 14, 2024
51a8417
Document exact width rules
Jules-BertholetFeb 14, 2024
5d8bc25
Add more CI checks
Jules-BertholetFeb 14, 2024
6beb76f
Add emoji benchmark
Jules-BertholetFeb 16, 2024
ad55481
Address review comments
Jules-BertholetMar 5, 2024
4f80b57
Use `match` instead of array for first level of tree
Jules-BertholetMar 5, 2024
d944bdd
Spuriously treat certain always-wide characters as eligible for emoji…
Jules-BertholetMar 5, 2024
a8b2fab
Align `EMOJI_PRESENTATION_LEAVES` to 128 bytes
Jules-BertholetMar 5, 2024
a5066aa
Convert tests into integration tests
Jules-BertholetApr 22, 2024
5e8bf9b
Update docs to mention `Grapheme_Extend`
Jules-BertholetApr 22, 2024
46a6067
Update unicode.py commendt to match new rules
Jules-BertholetApr 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions.github/workflows/rust.yml
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -7,7 +7,11 @@ on:
branches: [ "master" ]

env:
CARGO_INCREMENTAL: 0
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
RUSTFLAGS: -D warnings
RUSTDOCFLAGS: -D warnings

jobs:
build:
Expand All@@ -18,10 +22,19 @@ jobs:
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
- name: Build docs
run: cargo doc
- name: Check formatting
run: cargo fmt --check
- name: Check clippy
run: cargo clippy --lib --tests
regen:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Regen
run: cd scripts && python3 unicode.py
- name: Diff
Expand Down
1 change: 1 addition & 0 deletions.gitignore
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -3,3 +3,4 @@ Cargo.lock
scripts/tmp
scripts/*.txt
scripts/*.rs
bench_data/*
10 changes: 6 additions & 4 deletionsCargo.toml
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2,20 +2,23 @@

name = "unicode-width"
version = "0.1.11"
authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]

authors = [
"kwantam <kwantam@gmail.com>",
"Manish Goregaokar <manishsmail@gmail.com>",
]
homepage = "https://github.com/unicode-rs/unicode-width"
repository = "https://github.com/unicode-rs/unicode-width"
documentation = "https://unicode-rs.github.io/unicode-width"
license = "MIT/Apache-2.0"
keywords = ["text", "width", "unicode"]
readme = "README.md"
edition = "2021"
description = """
Determine displayed width of `char` and `str` types
according to Unicode Standard Annex #11 rules.
"""

exclude = ["target/*", "Cargo.lock"]
exclude = ["target/*", "Cargo.lock"]

[dependencies]
std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
Expand All@@ -27,7 +30,6 @@ unicode-normalization = "0.1.23"

[features]
default = []
bench = []
rustc-dep-of-std = ['std', 'core', 'compiler_builtins']

# Legacy, now a no-op
Expand Down
113 changes: 113 additions & 0 deletionsbenches/benches.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(test)]

extern crate test;

use std::iter;

use test::Bencher;

use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

#[bench]
fn cargo(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(UnicodeWidthChar::width(c));
}
});
}

#[bench]
fn stdlib(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(c.width());
}
});
}

#[bench]
fn simple_if(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(simple_width_if(c));
}
});
}

#[bench]
fn simple_match(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(simple_width_match(c));
}
});
}

#[inline]
fn simple_width_if(c: char) -> Option<usize> {
let cu = c as u32;
if cu < 127 {
if cu > 31 {
Some(1)
} else if cu == 0 {
Some(0)
} else {
None
}
} else {
UnicodeWidthChar::width(c)
}
}

#[inline]
fn simple_width_match(c: char) -> Option<usize> {
match c as u32 {
cu if cu == 0 => Some(0),
cu if cu < 0x20 => None,
cu if cu < 0x7f => Some(1),
_ => UnicodeWidthChar::width(c),
}
}

#[bench]
fn enwik8(b: &mut Bencher) {
// To benchmark, download & unzip `enwik8` from https://data.deepai.org/enwik8.zip
let data_path = "bench_data/enwik8";
let string = std::fs::read_to_string(data_path).unwrap_or_default();
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
}

#[bench]
fn jawiki(b: &mut Bencher) {
// To benchmark, download & extract `jawiki-20240201-pages-articles-multistream-index.txt` from
// https://dumps.wikimedia.org/jawiki/20240201/jawiki-20240201-pages-articles-multistream-index.txt.bz2
let data_path = "bench_data/jawiki-20240201-pages-articles-multistream-index.txt";
let string = std::fs::read_to_string(data_path).unwrap_or_default();
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
}

#[bench]
fn emoji(b: &mut Bencher) {
// To benchmark, download emoji-style.txt from https://www.unicode.org/emoji/charts/emoji-style.txt
let data_path = "bench_data/emoji-style.txt";
let string = std::fs::read_to_string(data_path).unwrap_or_default();
b.iter(|| test::black_box(UnicodeWidthStr::width(string.as_str())));
}
Loading

[8]ページ先頭

©2009-2025 Movatter.jp