Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc8c74ad

Browse files
committed
Get rid of O(N^2) script-parsing overhead in pgbench.
pgbench wants to record the starting line number of each commandin its scripts. It was computing that by scanning from the scriptstart and counting newlines, so that O(N^2) work had to be donefor an N-command script. In a script with 50K lines, this addsup to about 10 seconds on my machine.To add insult to injury, the results were subtly wrong, becauseexpr_scanner_offset() scanned to find the NUL that flex insertsat the end of the current token --- and before the first yylexcall, no such NUL has been inserted. So we ended by computing thescript's last line number not its first one. This was visible onlyin case of \gset at the start of a script, which perhaps accountsfor the lack of complaints.To fix, steal an idea from plpgsql and track the current lexerending position and line count as we advance through the script.(It's a bit simpler than plpgsql since we can't need to back up.)Also adjust a couple of other places that were invoking scansfrom script start when they didn't really need to. I made a newpsqlscan function psql_scan_get_location() that replaces bothexpr_scanner_offset() and expr_scanner_get_lineno(), since inpractice expr_scanner_get_lineno() was only being invoked to findthe line number of the current lexer end position.Reported-by: Daniel Vérité <daniel@manitou-mail.org>Author: Tom Lane <tgl@sss.pgh.pa.us>Discussion:https://postgr.es/m/84a8a89e-adb8-47a9-9d34-c13f7150ee45@manitou-mail.org
1 parente167191 commitc8c74ad

File tree

6 files changed

+89
-55
lines changed

6 files changed

+89
-55
lines changed

‎src/bin/pgbench/exprscan.l

Lines changed: 21 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,14 @@ void
271271
expr_yyerror_more(yyscan_t yyscanner,constchar *message,constchar *more)
272272
{
273273
PsqlScanState state =yyget_extra(yyscanner);
274-
interror_detection_offset =expr_scanner_offset(state) -1;
274+
intlineno;
275+
interror_detection_offset;
275276
YYSTYPElval;
276277
char *full_line;
277278

279+
psql_scan_get_location(state, &lineno, &error_detection_offset);
280+
error_detection_offset--;
281+
278282
/*
279283
* While parsing an expression, we may not have collected the whole line
280284
* yet from the input source. Lex till EOL so we can report whole line.
@@ -289,7 +293,6 @@ expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
289293
/* Extract the line, trimming trailing newline if any */
290294
full_line =expr_scanner_get_substring(state,
291295
expr_start_offset,
292-
expr_scanner_offset(state),
293296
true);
294297

295298
syntax_error(expr_source, expr_lineno, full_line, expr_command,
@@ -336,12 +339,15 @@ expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
336339
/* And lex. */
337340
lexresult =yylex(&lval, state->scanner);
338341

339-
/*
340-
* Save start offset of word, if any. We could do this more efficiently,
341-
* but for now this seems fine.
342-
*/
342+
/* Save start offset of word, if any. */
343343
if (lexresult)
344-
*offset =expr_scanner_offset(state) - word_buf->len;
344+
{
345+
intlineno;
346+
intend_offset;
347+
348+
psql_scan_get_location(state, &lineno, &end_offset);
349+
*offset = end_offset - word_buf->len;
350+
}
345351
else
346352
*offset = -1;
347353

@@ -404,35 +410,25 @@ expr_scanner_finish(yyscan_t yyscanner)
404410
}
405411

406412
/*
407-
* Get offset from start of string to end of current lexer token.
413+
* Get a malloc'd copy of the lexer input string from start_offset
414+
* to end of current lexer token. If chomp is true, drop any trailing
415+
* newline(s).
408416
*
409417
* We rely on the knowledge that flex modifies the scan buffer by storing
410418
* a NUL at the end of the current token (yytext). Note that this might
411419
* not work quite right if we were parsing a sub-buffer, but since pgbench
412-
* never invokes that functionality, it doesn't matter.
413-
*/
414-
int
415-
expr_scanner_offset(PsqlScanState state)
416-
{
417-
returnstrlen(state->scanbuf);
418-
}
419-
420-
/*
421-
* Get a malloc'd copy of the lexer input string from start_offset
422-
* to just before end_offset. If chomp is true, drop any trailing
423-
* newline(s).
420+
* never invokes that functionality, it doesn't matter. Also, this will
421+
* give the wrong answer (the whole remainder of the input) if called
422+
* before any yylex() call has been done.
424423
*/
425424
char *
426425
expr_scanner_get_substring(PsqlScanState state,
427-
int start_offset,int end_offset,
426+
int start_offset,
428427
bool chomp)
429428
{
430429
char *result;
431430
constchar *scanptr = state->scanbuf + start_offset;
432-
intslen = end_offset - start_offset;
433-
434-
Assert(slen >=0);
435-
Assert(end_offset <=strlen(state->scanbuf));
431+
size_tslen =strlen(scanptr);
436432

437433
if (chomp)
438434
{
@@ -447,22 +443,3 @@ expr_scanner_get_substring(PsqlScanState state,
447443

448444
return result;
449445
}
450-
451-
/*
452-
* Get the line number associated with the given string offset
453-
* (which must not be past the end of where we've lexed to).
454-
*/
455-
int
456-
expr_scanner_get_lineno(PsqlScanState state,int offset)
457-
{
458-
intlineno =1;
459-
constchar *p = state->scanbuf;
460-
461-
while (*p && offset >0)
462-
{
463-
if (*p =='\n')
464-
lineno++;
465-
p++, offset--;
466-
}
467-
return lineno;
468-
}

‎src/bin/pgbench/pgbench.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5690,8 +5690,8 @@ process_backslash_command(PsqlScanState sstate, const char *source)
56905690
initPQExpBuffer(&word_buf);
56915691

56925692
/* Remember location of the backslash */
5693-
start_offset=expr_scanner_offset(sstate)-1;
5694-
lineno=expr_scanner_get_lineno(sstate,start_offset);
5693+
psql_scan_get_location(sstate,&lineno,&start_offset);
5694+
start_offset--;
56955695

56965696
/* Collect first word of command */
56975697
if (!expr_lex_one_word(sstate,&word_buf,&word_offset))
@@ -5747,7 +5747,6 @@ process_backslash_command(PsqlScanState sstate, const char *source)
57475747
my_command->first_line=
57485748
expr_scanner_get_substring(sstate,
57495749
start_offset,
5750-
expr_scanner_offset(sstate),
57515750
true);
57525751

57535752
expr_scanner_finish(yyscanner);
@@ -5777,7 +5776,6 @@ process_backslash_command(PsqlScanState sstate, const char *source)
57775776
my_command->first_line=
57785777
expr_scanner_get_substring(sstate,
57795778
start_offset,
5780-
expr_scanner_offset(sstate),
57815779
true);
57825780

57835781
if (my_command->meta==META_SLEEP)
@@ -5952,8 +5950,6 @@ ParseScript(const char *script, const char *desc, int weight)
59525950
PQExpBufferDataline_buf;
59535951
intalloc_num;
59545952
intindex;
5955-
intlineno;
5956-
intstart_offset;
59575953

59585954
#defineCOMMANDS_ALLOC_NUM 128
59595955
alloc_num=COMMANDS_ALLOC_NUM;
@@ -5977,20 +5973,22 @@ ParseScript(const char *script, const char *desc, int weight)
59775973
* stdstrings should be true, which is a bit riskier.
59785974
*/
59795975
psql_scan_setup(sstate,script,strlen(script),0, true);
5980-
start_offset=expr_scanner_offset(sstate)-1;
59815976

59825977
initPQExpBuffer(&line_buf);
59835978

59845979
index=0;
59855980

59865981
for (;;)
59875982
{
5983+
intlineno;
5984+
intstart_offset;
59885985
PsqlScanResultsr;
59895986
promptStatus_tprompt;
59905987
Command*command=NULL;
59915988

59925989
resetPQExpBuffer(&line_buf);
5993-
lineno=expr_scanner_get_lineno(sstate,start_offset);
5990+
5991+
psql_scan_get_location(sstate,&lineno,&start_offset);
59945992

59955993
sr=psql_scan(sstate,&line_buf,&prompt);
59965994

‎src/bin/pgbench/pgbench.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,9 @@ extern yyscan_t expr_scanner_init(PsqlScanState state,
149149
constchar*source,intlineno,intstart_offset,
150150
constchar*command);
151151
externvoidexpr_scanner_finish(yyscan_tyyscanner);
152-
externintexpr_scanner_offset(PsqlScanStatestate);
153152
externchar*expr_scanner_get_substring(PsqlScanStatestate,
154-
intstart_offset,intend_offset,
153+
intstart_offset,
155154
boolchomp);
156-
externintexpr_scanner_get_lineno(PsqlScanStatestate,intoffset);
157155

158156
externvoidsyntax_error(constchar*source,intlineno,constchar*line,
159157
constchar*command,constchar*msg,

‎src/fe_utils/psqlscan.l

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,10 @@ psql_scan_setup(PsqlScanState state,
10791079
/* Set lookaside data in case we have to map unsafe encoding */
10801080
state->curline = state->scanbuf;
10811081
state->refline = state->scanline;
1082+
1083+
/* Initialize state for psql_scan_get_location() */
1084+
state->cur_line_no =0;/* yylex not called yet */
1085+
state->cur_line_ptr = state->scanbuf;
10821086
}
10831087

10841088
/*
@@ -1136,6 +1140,10 @@ psql_scan(PsqlScanState state,
11361140
/* And lex. */
11371141
lexresult =yylex(NULL, state->scanner);
11381142

1143+
/* Notify psql_scan_get_location() that a yylex call has been made. */
1144+
if (state->cur_line_no ==0)
1145+
state->cur_line_no =1;
1146+
11391147
/*
11401148
* Check termination state and return appropriate result info.
11411149
*/
@@ -1311,6 +1319,52 @@ psql_scan_in_quote(PsqlScanState state)
13111319
state->start_state != xqs;
13121320
}
13131321

1322+
/*
1323+
* Return the current scanning location (end+1 of last scanned token),
1324+
* as a line number counted from 1 and an offset from string start.
1325+
*
1326+
* This considers only the outermost input string, and therefore is of
1327+
* limited use for programs that use psqlscan_push_new_buffer().
1328+
*
1329+
* It would be a bit easier probably to use "%option yylineno" to count
1330+
* lines, but the flex manual says that has a performance cost, and only
1331+
* a minority of programs using psqlscan have need for this functionality.
1332+
* So we implement it ourselves without adding overhead to the lexer itself.
1333+
*/
1334+
void
1335+
psql_scan_get_location(PsqlScanState state,
1336+
int *lineno,int *offset)
1337+
{
1338+
constchar *line_end;
1339+
1340+
/*
1341+
* We rely on flex's having stored a NUL after the current token in
1342+
* scanbuf. Therefore we must specially handle the state before yylex()
1343+
* has been called, when obviously that won't have happened yet.
1344+
*/
1345+
if (state->cur_line_no ==0)
1346+
{
1347+
*lineno =1;
1348+
*offset =0;
1349+
return;
1350+
}
1351+
1352+
/*
1353+
* Advance cur_line_no/cur_line_ptr past whatever has been lexed so far.
1354+
* Doing this prevents repeated calls from being O(N^2) for long inputs.
1355+
*/
1356+
while ((line_end =strchr(state->cur_line_ptr,'\n')) !=NULL)
1357+
{
1358+
state->cur_line_no++;
1359+
state->cur_line_ptr = line_end +1;
1360+
}
1361+
state->cur_line_ptr +=strlen(state->cur_line_ptr);
1362+
1363+
/* Report current location. */
1364+
*lineno = state->cur_line_no;
1365+
*offset = state->cur_line_ptr - state->scanbuf;
1366+
}
1367+
13141368
/*
13151369
* Push the given string onto the stack of stuff to scan.
13161370
*

‎src/include/fe_utils/psqlscan.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,7 @@ extern void psql_scan_reselect_sql_lexer(PsqlScanState state);
8787

8888
externboolpsql_scan_in_quote(PsqlScanStatestate);
8989

90+
externvoidpsql_scan_get_location(PsqlScanStatestate,
91+
int*lineno,int*offset);
92+
9093
#endif/* PSQLSCAN_H */

‎src/include/fe_utils/psqlscan_int.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ typedef struct PsqlScanStateData
104104
constchar*curline;/* actual flex input string for cur buf */
105105
constchar*refline;/* original data for cur buffer */
106106

107+
/* status for psql_scan_get_location() */
108+
intcur_line_no;/* current line#, or 0 if no yylex done */
109+
constchar*cur_line_ptr;/* points into cur_line_no'th line in scanbuf */
110+
107111
/*
108112
* All this state lives across successive input lines, until explicitly
109113
* reset by psql_scan_reset. start_state is adopted by yylex() on entry,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp