Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf2d3e4c

Browse files
committed
fix JS regex literal parsing in char classes
1 parent3931a96 commitf2d3e4c

File tree

3 files changed

+93
-6
lines changed

3 files changed

+93
-6
lines changed

‎compiler/syntax/src/res_scanner.ml‎

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -580,9 +580,53 @@ let scan_regex scanner =
580580
bring_buf_up_to_date~start_offset:last_char_offset;
581581
Buffer.contents buf)
582582
in
583-
letrecscan()=
583+
(* Look ahead from a given absolute offset to see if a valid class closer
584+
exists on the same line.
585+
Semantics:
586+
- Applies BOS rules: an initial '^' does not count as content; the
587+
very first ']' after '[' or after '[^' is treated as literal.
588+
- Skips escaped characters (\\.) while scanning.
589+
- Returns true only if a subsequent unescaped ']' (after some content)
590+
is found before a line break or EOF.*)
591+
lethas_valid_class_closer_ahead~from_offset=
592+
let src= scanner.srcin
593+
let len=String.length srcin
594+
let i=ref (from_offset+1)in
595+
(* start scanning after current '['*)
596+
let bos=reftruein
597+
letrecloop()=
598+
if!i>= lenthenfalse
599+
else
600+
matchString.unsafe_get src!iwith
601+
|'\n'|'\r' ->false
602+
|'\\' ->
603+
if!i+1< lenthen (
604+
i:=!i+2;
605+
loop())
606+
elsefalse
607+
|'^'when!bos ->
608+
incr i;
609+
loop()
610+
|']'when!bos ->
611+
(* Leading ']' is literal content; after that, we're no longer at BOS.*)
612+
bos:=false;
613+
incr i;
614+
loop()
615+
|']' ->true
616+
|_ ->
617+
bos:=false;
618+
incr i;
619+
loop()
620+
in
621+
loop()
622+
in
623+
624+
(* Scan until closing '/' that is not inside a character class. Only enter
625+
character-class mode when a valid ']' is present ahead (same line).
626+
Track beginning-of-class to allow a leading ']' (or leading '^' then ']').*)
627+
letrecscan~in_class~class_at_bos=
584628
match scanner.chwith
585-
|'/' ->
629+
|'/'whennot in_class->
586630
let last_char_offset= scanner.offsetin
587631
next scanner;
588632
let pattern= result~first_char_offset~last_char_offsetin
@@ -606,12 +650,34 @@ let scan_regex scanner =
606650
|'\\' ->
607651
next scanner;
608652
next scanner;
609-
scan()
653+
(* Escapes count as content when inside a class; clear BOS.*)
654+
scan~in_class~class_at_bos:(if in_classthenfalseelse class_at_bos)
655+
|'['whennot in_class ->
656+
(* Only enter a character class if a closing ']' exists ahead on the
657+
same line. Otherwise treat '[' as a normal char.*)
658+
if has_valid_class_closer_ahead~from_offset:scanner.offsetthen (
659+
next scanner;
660+
scan~in_class:true~class_at_bos:true)
661+
else (
662+
next scanner;
663+
scan~in_class~class_at_bos)
664+
|'^'when in_class&& class_at_bos ->
665+
(* Leading caret does not count as content.*)
666+
next scanner;
667+
scan~in_class~class_at_bos:true
668+
|']'when in_class&& class_at_bos ->
669+
(* First ']' after '[' or '[^' is literal, not a closer.*)
670+
next scanner;
671+
scan~in_class~class_at_bos:false
672+
|']'when in_class ->
673+
(* Leave character class.*)
674+
next scanner;
675+
scan~in_class:false~class_at_bos:false
610676
|_ ->
611677
next scanner;
612-
scan()
678+
scan~in_class~class_at_bos:(if in_classthenfalseelse class_at_bos)
613679
in
614-
let pattern, flags= scan()in
680+
let pattern, flags= scan~in_class:false~class_at_bos:falsein
615681
let end_pos= position scannerin
616682
(start_pos, end_pos,Token.Regex (pattern, flags))
617683

‎tests/syntax_tests/data/parsing/grammar/expressions/expected/regex.res.txt‎

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,4 +499,12 @@ let re = [%re {js|/^a*?$/|js}]
499499
let re = [%re {js|/^((a)c)?(ab)$/|js}]
500500
let re = [%re {js|/^([ab]*?)(?=(b)?)c/|js}]
501501
let re = [%re {js|/^([ab]*?)(?!(b))c/|js}]
502-
let re = [%re {js|/^([ab]*?)(?<!(a))c/|js}]
502+
let re = [%re {js|/^([ab]*?)(?<!(a))c/|js}]
503+
let re = [%re {js|/\.[^/.]+$/|js}]
504+
let re = [%re {js|/[]/]/|js}]
505+
let re = [%re {js|/[^]]/|js}]
506+
let re = [%re {js|/[/]/|js}]
507+
let re = [%re {js|/[]]/|js}]
508+
let re = [%re {js|/[\]]/|js}]
509+
let re = [%re {js|/[[]]/|js}]
510+
let re = [%re {js|/[^]/]/|js}]

‎tests/syntax_tests/data/parsing/grammar/expressions/regex.res‎

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,3 +607,16 @@ let re = /^((a)c)?(ab)$/
607607
letre=/^([ab]*?)(?=(b)?)c/
608608
letre=/^([ab]*?)(?!(b))c/
609609
letre=/^([ab]*?)(?<!(a))c/
610+
611+
letre=/\.[^/.]+$/
612+
613+
// Leading ']' is literal; '/' inside class must not terminate
614+
letre=/[]/]/
615+
letre=/[^]]/
616+
letre=/[/]/
617+
618+
// Additional leading ']' edge cases
619+
letre=/[]]/
620+
letre=/[\]]/
621+
letre=/[[]]/
622+
letre=/[^]/]/

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp