Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
/rariPublic

fix(fix-flaws): handle UTF-8 characters and HTML entities#395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
caugner wants to merge8 commits intomain
base:main
Choose a base branch
Loading
fromfix-flaws
Open
Show file tree
Hide file tree
Changes from1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
NextNext commit
fix: correct byte/character inconsistency in column position handling
Fix critical bug where byte offsets and character positions were mixedthroughout the codebase, causing incorrect position reporting for contentwith multi-byte UTF-8 characters (emojis, accented characters, etc.).Changes:- Add position_utils module with byte ↔ character conversion functions- Fix render.rs bug mixing character count with byte offset in end_col- Convert Issue byte columns to DisplayIssue character columns for output- Update actual_offset to convert character positions back to bytes- Improve char boundary checking with proper warnings- Document all position fields as bytes (internal) or characters (display)- Verify Comrak uses byte-based sourcepos (1-based)- Add comprehensive UTF-8 tests with emojis and accented charactersAll 153 existing tests pass. The fix ensures correct position handlingthroughout: tree-sitter/Comrak (bytes) → Issue (bytes) → DisplayIssue(characters) → file operations (bytes).
  • Loading branch information
@caugner
caugner committedNov 26, 2025
commitef4b52248cee2d3ba552cdfc0dfbf0c191822855
48 changes: 41 additions & 7 deletionscrates/rari-doc/src/issues.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -15,6 +15,7 @@ use tracing_subscriber::Layer;
use tracing_subscriber::registry::LookupSpan;

use crate::pages::page::{Page, PageLike};
use crate::position_utils::byte_to_char_column;

pub static ISSUE_COUNTER_F: OnceLock<fn() -> i64> = OnceLock::new();
static ISSUE_COUNTER: AtomicI64 = AtomicI64::new(0);
Expand All@@ -27,13 +28,21 @@ pub(crate) fn get_issue_counter_f() -> i64 {
ISSUE_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}

/// Internal representation of an issue detected during build.
///
/// This struct stores position information in **byte offsets** (from tree-sitter and comrak),
/// which are later converted to character positions in `DisplayIssue` for user-facing output.
#[derive(Debug, Clone, Serialize)]
pub struct Issue {
pub req: u64,
pub ic: i64,
/// Column in BYTES from start of line (from tree-sitter or comrak sourcepos)
pub col: i64,
/// Line number (1-based)
pub line: i64,
/// End column in BYTES from start of line
pub end_col: i64,
/// End line number (1-based)
pub end_line: i64,
pub file: String,
pub ignore: bool,
Expand DownExpand Up@@ -226,6 +235,10 @@ pub enum Additional {
None,
}

/// User-facing representation of an issue for display and JSON output.
///
/// This struct stores position information in **character positions** for proper display
/// in editors and user interfaces. The positions are converted from byte offsets in `Issue`.
#[derive(Serialize, Deserialize, Debug, Default, Clone, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct DisplayIssue {
Expand All@@ -234,9 +247,13 @@ pub struct DisplayIssue {
pub suggestion: Option<String>,
pub fixable: Option<bool>,
pub fixed: bool,
/// Line number (1-based)
pub line: Option<i64>,
/// Column in CHARACTERS from start of line (1-based, user-facing)
pub column: Option<i64>,
/// End line number (1-based)
pub end_line: Option<i64>,
/// End column in CHARACTERS from start of line (1-based, user-facing)
pub end_column: Option<i64>,
pub source_context: Option<String>,
pub filepath: Option<String>,
Expand DownExpand Up@@ -314,22 +331,39 @@ pub type DisplayIssues = BTreeMap<&'static str, Vec<DIssue>>;
impl DIssue {
pub fn from_issue(issue: Issue, page: &Page) -> Option<Self> {
if let Ok(id) = usize::try_from(issue.ic) {
// Convert byte columns to character columns for user-facing display
let (char_col, char_end_col) = if issue.line != 0 && issue.col != 0 {
// Get the line content (adjust for frontmatter offset)
let line_idx =
(issue.line.saturating_sub(1) as usize).saturating_sub(page.fm_offset());
if let Some(line_content) = page.content().lines().nth(line_idx) {
let char_col = byte_to_char_column(line_content, issue.col as usize) as i64 + 1; // +1 for 1-based
let char_end_col = if issue.end_col != 0 {
byte_to_char_column(line_content, issue.end_col as usize) as i64 + 1
} else {
0
};
(char_col, char_end_col)
} else {
// Fallback: if we can't get the line, use byte positions (legacy behavior)
(issue.col, issue.end_col)
}
} else {
(issue.col, issue.end_col)
};

let mut di = DisplayIssue {
id: id as i64,
column: if issue.col == 0 {
None
} else {
Some(issue.col)
},
column: if char_col == 0 { None } else { Some(char_col) },
line: if issue.line == 0 {
None
} else {
Some(issue.line)
},
end_column: ifissue.end_col == 0 {
end_column: ifchar_end_col == 0 {
None
} else {
Some(issue.end_col)
Some(char_end_col)
},
end_line: if issue.end_line == 0 {
None
Expand Down
2 changes: 2 additions & 0 deletionscrates/rari-doc/src/lib.rs
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -16,6 +16,7 @@
//! - `html`: Manages HTML rendering and processing.
//! - `pages`: Handles the creation and management of documentation pages.
//! - `percent`: Utilities for percent encodings.
//! - `position_utils`: Utilities for converting between byte offsets and character positions.
//! - `reader`: Defines traits and implementations for reading pages.
//! - `redirects`: Manages URL redirects within the documentation.
//! - `resolve`: Handles path and URL resolution.
Expand DownExpand Up@@ -46,6 +47,7 @@ pub mod html;
pub mod issues;
pub mod pages;
pub mod percent;
pub mod position_utils;
pub mod reader;
pub mod redirects;
pub mod resolve;
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp