Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0a0727c

Browse files
committed
Improve performance of binary COPY FROM through better buffering.
At least on Linux and macOS, fread() turns out to have far higherper-call overhead than one could wish. Reading 64KB of data at a timeand then parceling it out with our own memcpy logic makes binary COPYfrom a file significantly faster --- around 30% in simple testing forcases with narrow text columns (on Linux ... even more on macOS).In binary COPY from frontend, there's no per-call fread(), and thispatch introduces an extra layer of memcpy'ing, but it still managesto eke out a small win. Apparently, the control-logic overhead inCopyGetData() is enough to be worth avoiding for small fetches.Bharath Rupireddy and Amit Langote, reviewed by Vignesh C,cosmetic tweaks by meDiscussion:https://postgr.es/m/CALj2ACU5Bz06HWLwqSzNMN=Gupoj6Rcn_QVC+k070V4em9wu=A@mail.gmail.com
1 parent8a37951 commit0a0727c

File tree

1 file changed

+83
-35
lines changed

1 file changed

+83
-35
lines changed

‎src/backend/commands/copy.c

Lines changed: 83 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -187,15 +187,15 @@ typedef struct CopyStateData
187187
TransitionCaptureState*transition_capture;
188188

189189
/*
190-
* These variables are used to reduce overhead intextualCOPY FROM.
190+
* These variables are used to reduce overhead in COPY FROM.
191191
*
192192
* attribute_buf holds the separated, de-escaped text for each field of
193193
* the current line. The CopyReadAttributes functions return arrays of
194194
* pointers into this buffer. We avoid palloc/pfree overhead by re-using
195195
* the buffer on each cycle.
196196
*
197-
*(In binary COPY FROM, attribute_buf holds the binary data for the
198-
* current field,while theother variables are not used.)
197+
* In binary COPY FROM, attribute_buf holds the binary data for the
198+
* current field,but theusage is otherwise similar.
199199
*/
200200
StringInfoDataattribute_buf;
201201

@@ -209,23 +209,27 @@ typedef struct CopyStateData
209209
* input cycle is first to read the whole line into line_buf, convert it
210210
* to server encoding there, and then extract the individual attribute
211211
* fields into attribute_buf. line_buf is preserved unmodified so that we
212-
* can display it in error messages if appropriate.
212+
* can display it in error messages if appropriate. (In binary mode,
213+
* line_buf is not used.)
213214
*/
214215
StringInfoDataline_buf;
215216
boolline_buf_converted;/* converted to server encoding? */
216217
boolline_buf_valid;/* contains the row being processed? */
217218

218219
/*
219220
* Finally, raw_buf holds raw data read from the data source (file or
220-
* client connection). CopyReadLine parses this data sufficiently to
221-
* locate line boundaries, then transfers the data to line_buf and
222-
* converts it. Note: we guarantee that there is a \0 at
223-
* raw_buf[raw_buf_len].
221+
* client connection). In text mode, CopyReadLine parses this data
222+
* sufficiently to locate line boundaries, then transfers the data to
223+
* line_buf and converts it. In binary mode, CopyReadBinaryData fetches
224+
* appropriate amounts of data from this buffer. In both modes, we
225+
* guarantee that there is a \0 at raw_buf[raw_buf_len].
224226
*/
225227
#defineRAW_BUF_SIZE 65536/* we palloc RAW_BUF_SIZE+1 bytes */
226228
char*raw_buf;
227229
intraw_buf_index;/* next byte to process */
228230
intraw_buf_len;/* total # of bytes stored */
231+
/* Shorthand for number of unconsumed bytes available in raw_buf */
232+
#defineRAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
229233
}CopyStateData;
230234

231235
/* DestReceiver for COPY (query) TO */
@@ -394,6 +398,8 @@ static void CopySendInt32(CopyState cstate, int32 val);
394398
staticboolCopyGetInt32(CopyStatecstate,int32*val);
395399
staticvoidCopySendInt16(CopyStatecstate,int16val);
396400
staticboolCopyGetInt16(CopyStatecstate,int16*val);
401+
staticboolCopyLoadRawBuf(CopyStatecstate);
402+
staticintCopyReadBinaryData(CopyStatecstate,char*dest,intnbytes);
397403

398404

399405
/*
@@ -723,7 +729,7 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
723729
/*
724730
* CopySendInt32 sends an int32 in network byte order
725731
*/
726-
staticvoid
732+
staticinlinevoid
727733
CopySendInt32(CopyStatecstate,int32val)
728734
{
729735
uint32buf;
@@ -737,12 +743,12 @@ CopySendInt32(CopyState cstate, int32 val)
737743
*
738744
* Returns true if OK, false if EOF
739745
*/
740-
staticbool
746+
staticinlinebool
741747
CopyGetInt32(CopyStatecstate,int32*val)
742748
{
743749
uint32buf;
744750

745-
if (CopyGetData(cstate,&buf,sizeof(buf),sizeof(buf))!=sizeof(buf))
751+
if (CopyReadBinaryData(cstate,(char*)&buf,sizeof(buf))!=sizeof(buf))
746752
{
747753
*val=0;/* suppress compiler warning */
748754
return false;
@@ -754,7 +760,7 @@ CopyGetInt32(CopyState cstate, int32 *val)
754760
/*
755761
* CopySendInt16 sends an int16 in network byte order
756762
*/
757-
staticvoid
763+
staticinlinevoid
758764
CopySendInt16(CopyStatecstate,int16val)
759765
{
760766
uint16buf;
@@ -766,12 +772,12 @@ CopySendInt16(CopyState cstate, int16 val)
766772
/*
767773
* CopyGetInt16 reads an int16 that appears in network byte order
768774
*/
769-
staticbool
775+
staticinlinebool
770776
CopyGetInt16(CopyStatecstate,int16*val)
771777
{
772778
uint16buf;
773779

774-
if (CopyGetData(cstate,&buf,sizeof(buf),sizeof(buf))!=sizeof(buf))
780+
if (CopyReadBinaryData(cstate,(char*)&buf,sizeof(buf))!=sizeof(buf))
775781
{
776782
*val=0;/* suppress compiler warning */
777783
return false;
@@ -786,26 +792,20 @@ CopyGetInt16(CopyState cstate, int16 *val)
786792
*
787793
* Returns true if able to obtain at least one more byte, else false.
788794
*
789-
* If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
790-
* down to the start of the buffer and then we load more data after that.
791-
* This case is used only when a frontend multibyte character crosses a
792-
* bufferload boundary.
795+
* If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
796+
* of the buffer and then we load more data after that. This case occurs only
797+
* when a multibyte character crosses a bufferload boundary.
793798
*/
794799
staticbool
795800
CopyLoadRawBuf(CopyStatecstate)
796801
{
797-
intnbytes;
802+
intnbytes=RAW_BUF_BYTES(cstate);
798803
intinbytes;
799804

800-
if (cstate->raw_buf_index<cstate->raw_buf_len)
801-
{
802-
/* Copy down the unprocessed data */
803-
nbytes=cstate->raw_buf_len-cstate->raw_buf_index;
805+
/* Copy down the unprocessed data if any. */
806+
if (nbytes>0)
804807
memmove(cstate->raw_buf,cstate->raw_buf+cstate->raw_buf_index,
805808
nbytes);
806-
}
807-
else
808-
nbytes=0;/* no data need be saved */
809809

810810
inbytes=CopyGetData(cstate,cstate->raw_buf+nbytes,
811811
1,RAW_BUF_SIZE-nbytes);
@@ -816,6 +816,54 @@ CopyLoadRawBuf(CopyState cstate)
816816
return (inbytes>0);
817817
}
818818

819+
/*
820+
* CopyReadBinaryData
821+
*
822+
* Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
823+
* and writes them to 'dest'. Returns the number of bytes read (which
824+
* would be less than 'nbytes' only if we reach EOF).
825+
*/
826+
staticint
827+
CopyReadBinaryData(CopyStatecstate,char*dest,intnbytes)
828+
{
829+
intcopied_bytes=0;
830+
831+
if (RAW_BUF_BYTES(cstate) >=nbytes)
832+
{
833+
/* Enough bytes are present in the buffer. */
834+
memcpy(dest,cstate->raw_buf+cstate->raw_buf_index,nbytes);
835+
cstate->raw_buf_index+=nbytes;
836+
copied_bytes=nbytes;
837+
}
838+
else
839+
{
840+
/*
841+
* Not enough bytes in the buffer, so must read from the file. Need
842+
* to loop since 'nbytes' could be larger than the buffer size.
843+
*/
844+
do
845+
{
846+
intcopy_bytes;
847+
848+
/* Load more data if buffer is empty. */
849+
if (RAW_BUF_BYTES(cstate)==0)
850+
{
851+
if (!CopyLoadRawBuf(cstate))
852+
break;/* EOF */
853+
}
854+
855+
/* Transfer some bytes. */
856+
copy_bytes=Min(nbytes-copied_bytes,RAW_BUF_BYTES(cstate));
857+
memcpy(dest,cstate->raw_buf+cstate->raw_buf_index,copy_bytes);
858+
cstate->raw_buf_index+=copy_bytes;
859+
dest+=copy_bytes;
860+
copied_bytes+=copy_bytes;
861+
}while (copied_bytes<nbytes);
862+
}
863+
864+
returncopied_bytes;
865+
}
866+
819867

820868
/*
821869
* DoCopy executes the SQL COPY statement
@@ -3366,17 +3414,17 @@ BeginCopyFrom(ParseState *pstate,
33663414
cstate->cur_attval=NULL;
33673415

33683416
/*
3369-
* Set up variables to avoid per-attribute overhead. attribute_bufis
3370-
* used in both text and binary modes, but we use line_buf and raw_buf
3417+
* Set up variables to avoid per-attribute overhead. attribute_bufand
3418+
*raw_buf areused in both text and binary modes, but we use line_buf
33713419
* only in text mode.
33723420
*/
33733421
initStringInfo(&cstate->attribute_buf);
3422+
cstate->raw_buf= (char*)palloc(RAW_BUF_SIZE+1);
3423+
cstate->raw_buf_index=cstate->raw_buf_len=0;
33743424
if (!cstate->binary)
33753425
{
33763426
initStringInfo(&cstate->line_buf);
33773427
cstate->line_buf_converted= false;
3378-
cstate->raw_buf= (char*)palloc(RAW_BUF_SIZE+1);
3379-
cstate->raw_buf_index=cstate->raw_buf_len=0;
33803428
}
33813429

33823430
/* Assign range table, we'll need it in CopyFrom. */
@@ -3527,7 +3575,7 @@ BeginCopyFrom(ParseState *pstate,
35273575
int32tmp;
35283576

35293577
/* Signature */
3530-
if (CopyGetData(cstate,readSig,11,11)!=11||
3578+
if (CopyReadBinaryData(cstate,readSig,11)!=11||
35313579
memcmp(readSig,BinarySignature,11)!=0)
35323580
ereport(ERROR,
35333581
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
@@ -3555,7 +3603,7 @@ BeginCopyFrom(ParseState *pstate,
35553603
/* Skip extension header, if present */
35563604
while (tmp-->0)
35573605
{
3558-
if (CopyGetData(cstate,readSig,1,1)!=1)
3606+
if (CopyReadBinaryData(cstate,readSig,1)!=1)
35593607
ereport(ERROR,
35603608
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
35613609
errmsg("invalid COPY file header (wrong length)")));
@@ -3771,7 +3819,7 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
37713819
chardummy;
37723820

37733821
if (cstate->copy_dest!=COPY_OLD_FE&&
3774-
CopyGetData(cstate,&dummy,1,1)>0)
3822+
CopyReadBinaryData(cstate,&dummy,1)>0)
37753823
ereport(ERROR,
37763824
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
37773825
errmsg("received copy data after EOF marker")));
@@ -4744,8 +4792,8 @@ CopyReadBinaryAttribute(CopyState cstate, FmgrInfo *flinfo,
47444792
resetStringInfo(&cstate->attribute_buf);
47454793

47464794
enlargeStringInfo(&cstate->attribute_buf,fld_size);
4747-
if (CopyGetData(cstate,cstate->attribute_buf.data,
4748-
fld_size,fld_size)!=fld_size)
4795+
if (CopyReadBinaryData(cstate,cstate->attribute_buf.data,
4796+
fld_size)!=fld_size)
47494797
ereport(ERROR,
47504798
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
47514799
errmsg("unexpected EOF in COPY data")));

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp