Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite35cc3b

Browse files
committed
pgbench: Use COPY for client-side data generation
This commit switches the client-side data generation from INSERT queriesto COPY for the two tables pgbench_branches and pgbench_tellers.pgbench_accounts was already using COPY.COPY is a better interface for bulk loading or high latency connections(this point can be countered with the option for server-side datageneration, still client-side is the default), and measurements haveproved that using it for these two other tables can lead to improvementsduring initialization. I did not notice slowdowns at large scalenumbers on a local setup, either, most of the work happening for theaccounts table.Previously COPY was only used for the pgbench_accounts table because theamount of data was much larger than the two other tables. The code isrefactored so as all three tables use the same code path to execute theCOPY queries, with a callback to build data rows.Author: Tristan PartinDiscussion:https://postgr.es/m/CSTU5P82ONZ1.19XFUGHMXHBRY@c3po
1 parent29836df commite35cc3b

File tree

2 files changed

+98
-66
lines changed

2 files changed

+98
-66
lines changed

‎doc/src/sgml/ref/pgbench.sgml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -231,10 +231,11 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
231231
extensively through a <command>COPY</command>.
232232
<command>pgbench</command> uses the FREEZE option with version 14 or later
233233
of <productname>PostgreSQL</productname> to speed up
234-
subsequent <command>VACUUM</command>, unless partitions are enabled.
235-
Using <literal>g</literal> causes logging to print one message
236-
every 100,000 rows while generating data for the
237-
<structname>pgbench_accounts</structname> table.
234+
subsequent <command>VACUUM</command>, except on the
235+
<literal>pgbench_accounts</literal> table if partitions are
236+
enabled. Using <literal>g</literal> causes logging to
237+
print one message every 100,000 rows while generating data for all
238+
tables.
238239
</para>
239240
<para>
240241
With <literal>G</literal> (server-side data generation),

‎src/bin/pgbench/pgbench.c

Lines changed: 93 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,8 @@ static void add_socket_to_set(socket_set *sa, int fd, int idx);
835835
staticintwait_on_socket_set(socket_set*sa,int64usecs);
836836
staticboolsocket_has_input(socket_set*sa,intfd,intidx);
837837

838+
/* callback used to build rows for COPY during data loading */
839+
typedefvoid (*initRowMethod) (PQExpBufferData*sql,int64curr);
838840

839841
/* callback functions for our flex lexer */
840842
staticconstPsqlScanCallbackspgbench_callbacks= {
@@ -4859,17 +4861,45 @@ initTruncateTables(PGconn *con)
48594861
"pgbench_tellers");
48604862
}
48614863

4862-
/*
4863-
* Fill the standard tables with some data generated and sent from the client
4864-
*/
48654864
staticvoid
4866-
initGenerateDataClientSide(PGconn*con)
4865+
initBranch(PQExpBufferData*sql,int64curr)
48674866
{
4868-
PQExpBufferDatasql;
4867+
/* "filler" column uses NULL */
4868+
printfPQExpBuffer(sql,
4869+
INT64_FORMAT"\t0\t\\N\n",
4870+
curr+1);
4871+
}
4872+
4873+
staticvoid
4874+
initTeller(PQExpBufferData*sql,int64curr)
4875+
{
4876+
/* "filler" column uses NULL */
4877+
printfPQExpBuffer(sql,
4878+
INT64_FORMAT"\t"INT64_FORMAT"\t0\t\\N\n",
4879+
curr+1,curr /ntellers+1);
4880+
}
4881+
4882+
staticvoid
4883+
initAccount(PQExpBufferData*sql,int64curr)
4884+
{
4885+
/* "filler" column defaults to blank padded empty string */
4886+
printfPQExpBuffer(sql,
4887+
INT64_FORMAT"\t"INT64_FORMAT"\t0\t\n",
4888+
curr+1,curr /naccounts+1);
4889+
}
4890+
4891+
staticvoid
4892+
initPopulateTable(PGconn*con,constchar*table,int64base,
4893+
initRowMethodinit_row)
4894+
{
4895+
intn;
4896+
intk;
4897+
intchars=0;
48694898
PGresult*res;
4870-
inti;
4871-
int64k;
4872-
char*copy_statement;
4899+
PQExpBufferDatasql;
4900+
charcopy_statement[256];
4901+
constchar*copy_statement_fmt="copy %s from stdin";
4902+
int64total=base*scale;
48734903

48744904
/* used to track elapsed time and estimate of the remaining time */
48754905
pg_time_usec_tstart;
@@ -4878,50 +4908,24 @@ initGenerateDataClientSide(PGconn *con)
48784908
/* Stay on the same line if reporting to a terminal */
48794909
chareol=isatty(fileno(stderr)) ?'\r' :'\n';
48804910

4881-
fprintf(stderr,"generating data (client-side)...\n");
4882-
4883-
/*
4884-
* we do all of this in one transaction to enable the backend's
4885-
* data-loading optimizations
4886-
*/
4887-
executeStatement(con,"begin");
4888-
4889-
/* truncate away any old data */
4890-
initTruncateTables(con);
4891-
48924911
initPQExpBuffer(&sql);
48934912

48944913
/*
4895-
*fill branches, tellers, accounts in that order in case foreign keys
4896-
*already exist
4914+
*Use COPY with FREEZE on v14 and later for all the tables except
4915+
*pgbench_accounts when it is partitioned.
48974916
*/
4898-
for (i=0;i<nbranches*scale;i++)
4917+
if (PQserverVersion(con) >=140000)
48994918
{
4900-
/* "filler" column defaults to NULL */
4901-
printfPQExpBuffer(&sql,
4902-
"insert into pgbench_branches(bid,bbalance) values(%d,0)",
4903-
i+1);
4904-
executeStatement(con,sql.data);
4919+
if (strcmp(table,"pgbench_accounts")!=0||
4920+
partitions==0)
4921+
copy_statement_fmt="copy %s from stdin with (freeze on)";
49054922
}
49064923

4907-
for (i=0;i<ntellers*scale;i++)
4908-
{
4909-
/* "filler" column defaults to NULL */
4910-
printfPQExpBuffer(&sql,
4911-
"insert into pgbench_tellers(tid,bid,tbalance) values (%d,%d,0)",
4912-
i+1,i /ntellers+1);
4913-
executeStatement(con,sql.data);
4914-
}
4915-
4916-
/*
4917-
* accounts is big enough to be worth using COPY and tracking runtime
4918-
*/
4919-
4920-
/* use COPY with FREEZE on v14 and later without partitioning */
4921-
if (partitions==0&&PQserverVersion(con) >=140000)
4922-
copy_statement="copy pgbench_accounts from stdin with (freeze on)";
4923-
else
4924-
copy_statement="copy pgbench_accounts from stdin";
4924+
n=pg_snprintf(copy_statement,sizeof(copy_statement),copy_statement_fmt,table);
4925+
if (n >=sizeof(copy_statement))
4926+
pg_fatal("invalid buffer size: must be at least %d characters long",n);
4927+
elseif (n==-1)
4928+
pg_fatal("invalid format string");
49254929

49264930
res=PQexec(con,copy_statement);
49274931

@@ -4931,14 +4935,11 @@ initGenerateDataClientSide(PGconn *con)
49314935

49324936
start=pg_time_now();
49334937

4934-
for (k=0;k<(int64)naccounts*scale;k++)
4938+
for (k=0;k<total;k++)
49354939
{
49364940
int64j=k+1;
49374941

4938-
/* "filler" column defaults to blank padded empty string */
4939-
printfPQExpBuffer(&sql,
4940-
INT64_FORMAT"\t"INT64_FORMAT"\t0\t\n",
4941-
j,k /naccounts+1);
4942+
init_row(&sql,k);
49424943
if (PQputline(con,sql.data))
49434944
pg_fatal("PQputline failed");
49444945

@@ -4952,41 +4953,71 @@ initGenerateDataClientSide(PGconn *con)
49524953
if ((!use_quiet)&& (j %100000==0))
49534954
{
49544955
doubleelapsed_sec=PG_TIME_GET_DOUBLE(pg_time_now()-start);
4955-
doubleremaining_sec= ((double)scale*naccounts-j)*elapsed_sec /j;
4956+
doubleremaining_sec= ((double)total-j)*elapsed_sec /j;
49564957

4957-
fprintf(stderr,INT64_FORMAT" of "INT64_FORMAT" tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c",
4958-
j,(int64)naccounts*scale,
4959-
(int) (((int64)j*100) /(naccounts* (int64)scale)),
4960-
elapsed_sec,remaining_sec,eol);
4958+
chars=fprintf(stderr,INT64_FORMAT" of "INT64_FORMAT" tuples (%d%%) of %s done (elapsed %.2f s, remaining %.2f s)%c",
4959+
j,total,
4960+
(int) ((j*100) /total),
4961+
table,elapsed_sec,remaining_sec,eol);
49614962
}
49624963
/* let's not call the timing for each row, but only each 100 rows */
49634964
elseif (use_quiet&& (j %100==0))
49644965
{
49654966
doubleelapsed_sec=PG_TIME_GET_DOUBLE(pg_time_now()-start);
4966-
doubleremaining_sec= ((double)scale*naccounts-j)*elapsed_sec /j;
4967+
doubleremaining_sec= ((double)total-j)*elapsed_sec /j;
49674968

49684969
/* have we reached the next interval (or end)? */
4969-
if ((j==scale*naccounts)|| (elapsed_sec >=log_interval*LOG_STEP_SECONDS))
4970+
if ((j==total)|| (elapsed_sec >=log_interval*LOG_STEP_SECONDS))
49704971
{
4971-
fprintf(stderr,INT64_FORMAT" of "INT64_FORMAT" tuples (%d%%) done (elapsed %.2f s, remaining %.2f s)%c",
4972-
j, (int64)naccounts*scale,
4973-
(int) (((int64)j*100) / (naccounts* (int64)scale)),elapsed_sec,remaining_sec,eol);
4972+
chars=fprintf(stderr,INT64_FORMAT" of "INT64_FORMAT" tuples (%d%%) of %s done (elapsed %.2f s, remaining %.2f s)%c",
4973+
j,total,
4974+
(int) ((j*100) /total),
4975+
table,elapsed_sec,remaining_sec,eol);
49744976

49754977
/* skip to the next interval */
49764978
log_interval= (int)ceil(elapsed_sec /LOG_STEP_SECONDS);
49774979
}
49784980
}
49794981
}
49804982

4981-
if (eol!='\n')
4982-
fputc('\n',stderr);/*Need to move to next line */
4983+
if (chars!=0&&eol!='\n')
4984+
fprintf(stderr,"%*c\r",chars-1,' ');/*Clear the current line */
49834985

49844986
if (PQputline(con,"\\.\n"))
49854987
pg_fatal("very last PQputline failed");
49864988
if (PQendcopy(con))
49874989
pg_fatal("PQendcopy failed");
49884990

49894991
termPQExpBuffer(&sql);
4992+
}
4993+
4994+
/*
4995+
* Fill the standard tables with some data generated and sent from the client.
4996+
*
4997+
* The filler column is NULL in pgbench_branches and pgbench_tellers, and is
4998+
* a blank-padded string in pgbench_accounts.
4999+
*/
5000+
staticvoid
5001+
initGenerateDataClientSide(PGconn*con)
5002+
{
5003+
fprintf(stderr,"generating data (client-side)...\n");
5004+
5005+
/*
5006+
* we do all of this in one transaction to enable the backend's
5007+
* data-loading optimizations
5008+
*/
5009+
executeStatement(con,"begin");
5010+
5011+
/* truncate away any old data */
5012+
initTruncateTables(con);
5013+
5014+
/*
5015+
* fill branches, tellers, accounts in that order in case foreign keys
5016+
* already exist
5017+
*/
5018+
initPopulateTable(con,"pgbench_branches",nbranches,initBranch);
5019+
initPopulateTable(con,"pgbench_tellers",ntellers,initTeller);
5020+
initPopulateTable(con,"pgbench_accounts",naccounts,initAccount);
49905021

49915022
executeStatement(con,"commit");
49925023
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp