Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd4aa857

Browse files
authored
gh-102856: Clean some of the PEP 701 tokenizer implementation (#103634)
1 parent5f7d68e commitd4aa857

File tree

2 files changed

+67
-74
lines changed

2 files changed

+67
-74
lines changed

‎Parser/tokenizer.c

Lines changed: 65 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,6 @@
1111
#include"tokenizer.h"
1212
#include"errcode.h"
1313

14-
#include"unicodeobject.h"
15-
#include"bytesobject.h"
16-
#include"fileobject.h"
17-
#include"abstract.h"
18-
1914
/* Alternate tab spacing */
2015
#defineALTTABSIZE 1
2116

@@ -43,6 +38,8 @@
4338
tok->lineno++; \
4439
tok->col_offset = 0;
4540

41+
#defineINSIDE_FSTRING(tok) (tok->tok_mode_stack_index > 0)
42+
#defineINSIDE_FSTRING_EXPR(tok) (tok->curly_bracket_expr_start_depth >= 0)
4643
#ifdefPy_DEBUG
4744
staticinlinetokenizer_mode*TOK_GET_MODE(structtok_state*tok) {
4845
assert(tok->tok_mode_stack_index >=0);
@@ -54,15 +51,9 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
5451
assert(tok->tok_mode_stack_index<MAXLEVEL);
5552
return&(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
5653
}
57-
staticinlineint*TOK_GET_BRACKET_MARK(tokenizer_mode*mode) {
58-
assert(mode->bracket_mark_index >=0);
59-
assert(mode->bracket_mark_index<MAX_EXPR_NESTING);
60-
return&(mode->bracket_mark[mode->bracket_mark_index]);
61-
}
6254
#else
6355
#defineTOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
6456
#defineTOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
65-
#defineTOK_GET_BRACKET_MARK(mode) (&(mode->bracket_mark[mode->bracket_mark_index]))
6657
#endif
6758

6859
/* Forward */
@@ -398,20 +389,7 @@ update_fstring_expr(struct tok_state *tok, char cur)
398389
tokenizer_mode*tok_mode=TOK_GET_MODE(tok);
399390

400391
switch (cur) {
401-
case'{':
402-
if (tok_mode->last_expr_buffer!=NULL) {
403-
PyMem_Free(tok_mode->last_expr_buffer);
404-
}
405-
tok_mode->last_expr_buffer=PyMem_Malloc(size);
406-
if (tok_mode->last_expr_buffer==NULL) {
407-
tok->done=E_NOMEM;
408-
return0;
409-
}
410-
tok_mode->last_expr_size=size;
411-
tok_mode->last_expr_end=-1;
412-
strncpy(tok_mode->last_expr_buffer,tok->cur,size);
413-
break;
414-
case0:
392+
case0:
415393
if (!tok_mode->last_expr_buffer||tok_mode->last_expr_end >=0) {
416394
return1;
417395
}
@@ -421,23 +399,38 @@ update_fstring_expr(struct tok_state *tok, char cur)
421399
);
422400
if (new_buffer==NULL) {
423401
PyMem_Free(tok_mode->last_expr_buffer);
424-
tok->done=E_NOMEM;
425-
return0;
402+
gotoerror;
426403
}
427404
tok_mode->last_expr_buffer=new_buffer;
428405
strncpy(tok_mode->last_expr_buffer+tok_mode->last_expr_size,tok->cur,size);
429406
tok_mode->last_expr_size+=size;
430407
break;
408+
case'{':
409+
if (tok_mode->last_expr_buffer!=NULL) {
410+
PyMem_Free(tok_mode->last_expr_buffer);
411+
}
412+
tok_mode->last_expr_buffer=PyMem_Malloc(size);
413+
if (tok_mode->last_expr_buffer==NULL) {
414+
gotoerror;
415+
}
416+
tok_mode->last_expr_size=size;
417+
tok_mode->last_expr_end=-1;
418+
strncpy(tok_mode->last_expr_buffer,tok->cur,size);
419+
break;
431420
case'}':
432421
case'!':
433422
case':':
434423
if (tok_mode->last_expr_end==-1) {
435424
tok_mode->last_expr_end=strlen(tok->start);
436425
}
437426
break;
427+
default:
428+
Py_UNREACHABLE();
438429
}
439-
440430
return1;
431+
error:
432+
tok->done=E_NOMEM;
433+
return0;
441434
}
442435

443436
staticvoid
@@ -1766,7 +1759,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
17661759
/* Skip comment, unless it's a type comment */
17671760
if (c=='#') {
17681761

1769-
if (tok->tok_mode_stack_index>0) {
1762+
if (INSIDE_FSTRING(tok)) {
17701763
returnMAKE_TOKEN(syntaxerror(tok,"f-string expression part cannot include '#'"));
17711764
}
17721765

@@ -2208,32 +2201,31 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
22082201

22092202
p_start=tok->start;
22102203
p_end=tok->cur;
2211-
tokenizer_mode*current_tok=TOK_NEXT_MODE(tok);
2212-
current_tok->kind=TOK_FSTRING_MODE;
2213-
current_tok->f_string_quote=quote;
2214-
current_tok->f_string_quote_size=quote_size;
2215-
current_tok->f_string_start=tok->start;
2216-
current_tok->f_string_multi_line_start=tok->line_start;
2217-
current_tok->last_expr_buffer=NULL;
2218-
current_tok->last_expr_size=0;
2219-
current_tok->last_expr_end=-1;
2204+
tokenizer_mode*the_current_tok=TOK_NEXT_MODE(tok);
2205+
the_current_tok->kind=TOK_FSTRING_MODE;
2206+
the_current_tok->f_string_quote=quote;
2207+
the_current_tok->f_string_quote_size=quote_size;
2208+
the_current_tok->f_string_start=tok->start;
2209+
the_current_tok->f_string_multi_line_start=tok->line_start;
2210+
the_current_tok->last_expr_buffer=NULL;
2211+
the_current_tok->last_expr_size=0;
2212+
the_current_tok->last_expr_end=-1;
22202213

22212214
switch (*tok->start) {
22222215
case'F':
22232216
case'f':
2224-
current_tok->f_string_raw=tolower(*(tok->start+1))=='r';
2217+
the_current_tok->f_string_raw=tolower(*(tok->start+1))=='r';
22252218
break;
22262219
case'R':
22272220
case'r':
2228-
current_tok->f_string_raw=1;
2221+
the_current_tok->f_string_raw=1;
22292222
break;
22302223
default:
22312224
Py_UNREACHABLE();
22322225
}
22332226

2234-
current_tok->bracket_stack=0;
2235-
current_tok->bracket_mark[0]=0;
2236-
current_tok->bracket_mark_index=-1;
2227+
the_current_tok->curly_bracket_depth=0;
2228+
the_current_tok->curly_bracket_expr_start_depth=-1;
22372229
returnMAKE_TOKEN(FSTRING_START);
22382230
}
22392231

@@ -2282,15 +2274,15 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
22822274
intstart=tok->lineno;
22832275
tok->lineno=tok->first_lineno;
22842276

2285-
if (tok->tok_mode_stack_index>0) {
2277+
if (INSIDE_FSTRING(tok)) {
22862278
/* When we are in an f-string, before raising the
22872279
* unterminated string literal error, check whether
22882280
* does the initial quote matches with f-strings quotes
22892281
* and if it is, then this must be a missing '}' token
22902282
* so raise the proper error */
2291-
tokenizer_mode*current_tok=TOK_GET_MODE(tok);
2292-
if (current_tok->f_string_quote==quote&&
2293-
current_tok->f_string_quote_size==quote_size) {
2283+
tokenizer_mode*the_current_tok=TOK_GET_MODE(tok);
2284+
if (the_current_tok->f_string_quote==quote&&
2285+
the_current_tok->f_string_quote_size==quote_size) {
22942286
returnMAKE_TOKEN(syntaxerror(tok,"f-string: expecting '}'",start));
22952287
}
22962288
}
@@ -2339,18 +2331,17 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
23392331

23402332
/* Punctuation character */
23412333
intis_punctuation= (c==':'||c=='}'||c=='!'||c=='{');
2342-
if (is_punctuation&&tok->tok_mode_stack_index>0&&current_tok->bracket_mark_index >=0) {
2343-
intmark=*TOK_GET_BRACKET_MARK(current_tok);
2344-
/* This code block gets executed before the bracket_stack is incremented
2334+
if (is_punctuation&&INSIDE_FSTRING(tok)&&INSIDE_FSTRING_EXPR(current_tok)) {
2335+
/* This code block gets executed before the curly_bracket_depth is incremented
23452336
* by the `{` case, so for ensuring that we are on the 0th level, we need
23462337
* to adjust it manually */
2347-
intcursor=current_tok->bracket_stack- (c!='{');
2338+
intcursor=current_tok->curly_bracket_depth- (c!='{');
23482339

23492340
if (cursor==0&& !update_fstring_expr(tok,c)) {
23502341
returnMAKE_TOKEN(ENDMARKER);
23512342
}
23522343

2353-
if (c==':'&&cursor==mark) {
2344+
if (c==':'&&cursor==current_tok->curly_bracket_expr_start_depth) {
23542345
current_tok->kind=TOK_FSTRING_MODE;
23552346
p_start=tok->start;
23562347
p_end=tok->cur;
@@ -2390,16 +2381,15 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
23902381
tok->parenlinenostack[tok->level]=tok->lineno;
23912382
tok->parencolstack[tok->level]= (int)(tok->start-tok->line_start);
23922383
tok->level++;
2393-
2394-
if (tok->tok_mode_stack_index>0) {
2395-
current_tok->bracket_stack++;
2384+
if (INSIDE_FSTRING(tok)) {
2385+
current_tok->curly_bracket_depth++;
23962386
}
23972387
break;
23982388
case')':
23992389
case']':
24002390
case'}':
24012391
if (!tok->level) {
2402-
if (tok->tok_mode_stack_index>0&& !current_tok->bracket_stack&&c=='}') {
2392+
if (INSIDE_FSTRING(tok)&& !current_tok->curly_bracket_depth&&c=='}') {
24032393
returnMAKE_TOKEN(syntaxerror(tok,"f-string: single '}' is not allowed"));
24042394
}
24052395
returnMAKE_TOKEN(syntaxerror(tok,"unmatched '%c'",c));
@@ -2415,10 +2405,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
24152405
nested expression, then instead of matching a different
24162406
syntactical construct with it; we'll throw an unmatched
24172407
parentheses error. */
2418-
if (tok->tok_mode_stack_index>0&&opening=='{') {
2419-
assert(current_tok->bracket_stack >=0);
2420-
intprevious_bracket=current_tok->bracket_stack-1;
2421-
if (previous_bracket==*TOK_GET_BRACKET_MARK(current_tok)) {
2408+
if (INSIDE_FSTRING(tok)&&opening=='{') {
2409+
assert(current_tok->curly_bracket_depth >=0);
2410+
intprevious_bracket=current_tok->curly_bracket_depth-1;
2411+
if (previous_bracket==current_tok->curly_bracket_expr_start_depth) {
24222412
returnMAKE_TOKEN(syntaxerror(tok,"f-string: unmatched '%c'",c));
24232413
}
24242414
}
@@ -2436,14 +2426,16 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
24362426
}
24372427
}
24382428

2439-
if (tok->tok_mode_stack_index>0) {
2440-
current_tok->bracket_stack--;
2441-
if (c=='}'&&current_tok->bracket_stack==*TOK_GET_BRACKET_MARK(current_tok)) {
2442-
current_tok->bracket_mark_index--;
2429+
if (INSIDE_FSTRING(tok)) {
2430+
current_tok->curly_bracket_depth--;
2431+
if (c=='}'&&current_tok->curly_bracket_depth==current_tok->curly_bracket_expr_start_depth) {
2432+
current_tok->curly_bracket_expr_start_depth--;
24432433
current_tok->kind=TOK_FSTRING_MODE;
24442434
}
24452435
}
24462436
break;
2437+
default:
2438+
break;
24472439
}
24482440

24492441
if (!Py_UNICODE_ISPRINTABLE(c)) {
@@ -2479,11 +2471,10 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
24792471

24802472
if ((start_char=='{'&&peek1!='{')|| (start_char=='}'&&peek1!='}')) {
24812473
if (start_char=='{') {
2482-
current_tok->bracket_mark_index++;
2483-
if (current_tok->bracket_mark_index >=MAX_EXPR_NESTING) {
2474+
current_tok->curly_bracket_expr_start_depth++;
2475+
if (current_tok->curly_bracket_expr_start_depth >=MAX_EXPR_NESTING) {
24842476
returnMAKE_TOKEN(syntaxerror(tok,"f-string: expressions nested too deeply"));
24852477
}
2486-
*TOK_GET_BRACKET_MARK(current_tok)=current_tok->bracket_stack;
24872478
}
24882479
TOK_GET_MODE(tok)->kind=TOK_REGULAR_MODE;
24892480
returntok_get_normal_mode(tok,current_tok,token);
@@ -2544,17 +2535,20 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
25442535
end_quote_size=0;
25452536
}
25462537

2547-
intin_format_spec=current_tok->last_expr_end!=-1&&current_tok->bracket_mark_index >=0;
2538+
intin_format_spec= (
2539+
current_tok->last_expr_end!=-1
2540+
&&
2541+
INSIDE_FSTRING_EXPR(current_tok)
2542+
);
25482543
if (c=='{') {
25492544
intpeek=tok_nextc(tok);
25502545
if (peek!='{'||in_format_spec) {
25512546
tok_backup(tok,peek);
25522547
tok_backup(tok,c);
2553-
current_tok->bracket_mark_index++;
2554-
if (current_tok->bracket_mark_index >=MAX_EXPR_NESTING) {
2548+
current_tok->curly_bracket_expr_start_depth++;
2549+
if (current_tok->curly_bracket_expr_start_depth >=MAX_EXPR_NESTING) {
25552550
returnMAKE_TOKEN(syntaxerror(tok,"f-string: expressions nested too deeply"));
25562551
}
2557-
*TOK_GET_BRACKET_MARK(current_tok)=current_tok->bracket_stack;
25582552
TOK_GET_MODE(tok)->kind=TOK_REGULAR_MODE;
25592553
p_start=tok->start;
25602554
p_end=tok->cur;

‎Parser/tokenizer.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,8 @@ enum tokenizer_mode_kind_t {
4343
typedefstruct_tokenizer_mode {
4444
enumtokenizer_mode_kind_tkind;
4545

46-
intbracket_stack;
47-
intbracket_mark[MAX_EXPR_NESTING];
48-
intbracket_mark_index;
46+
intcurly_bracket_depth;
47+
intcurly_bracket_expr_start_depth;
4948

5049
charf_string_quote;
5150
intf_string_quote_size;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp