Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0eceaaf

Browse files
committed
Modify COPY FROM to match the null-value string against the column value
before it is de-backslashed, not after. This allows the null string \Nto be reliably distinguished from the data value \N (which must berepresented as \\N). Per bug report from Manfred Koizar ... but it'samazing this hasn't been reported before ...Also, be consistent about encoding conversion for null string: the formspecified in the command is in the server encoding, but what is sentto/from client must be in client encoding. This never worked quiteright before either.
1 parent351adb8 commit0eceaaf

File tree

2 files changed

+62
-30
lines changed

2 files changed

+62
-30
lines changed

‎doc/src/sgml/ref/copy.sgml

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/ref/copy.sgml,v 1.52 2003/09/30 01:56:11 tgl Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/ref/copy.sgml,v 1.53 2003/10/06 02:38:53 tgl Exp $
33
PostgreSQL documentation
44
-->
55

@@ -257,7 +257,7 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
257257
Columns in a row are separated by the delimiter character.
258258
The column values themselves are strings generated by the
259259
output function, or acceptable to the input function, of each
260-
attribute's data type. The specified null-value string is used in
260+
attribute's data type. The specified null string is used in
261261
place of columns that are null.
262262
<command>COPY FROM</command> will raise an error if any line of the
263263
input file contains more or fewer columns than are expected.
@@ -282,6 +282,15 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
282282
newline, carriage return, and the current delimiter character.
283283
</para>
284284

285+
<para>
286+
The specified null string is sent by <command>COPY TO</command> without
287+
adding any backslashes; conversely, <command>COPY FROM</command> matches
288+
the input against the null string before removing backslashes. Therefore,
289+
a null string such as <literal>\N</literal> cannot be confused with
290+
the actual data value <literal>\N</literal> (which would be represented
291+
as <literal>\\N</literal>).
292+
</para>
293+
285294
<para>
286295
The following special backslash sequences are recognized by
287296
<command>COPY FROM</command>:
@@ -335,10 +344,12 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
335344
</para>
336345

337346
<para>
338-
Never put a backslash before a data character <literal>N</> or period
339-
(<literal>.</>). Such pairs will be mistaken for the default null string
340-
or the end-of-data marker, respectively. Any other backslashed character
341-
that is not mentioned in the above table will be taken to represent itself.
347+
Any other backslashed character that is not mentioned in the above table
348+
will be taken to represent itself. However, beware of adding backslashes
349+
unnecessarily, since that might accidentally produce a string matching the
350+
end-of-data marker (<literal>\.</>) or the null string (<literal>\N</> by
351+
default). These strings will be recognized before any other backslash
352+
processing is done.
342353
</para>
343354

344355
<para>

‎src/backend/commands/copy.c

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.212 2003/09/29 22:06:40 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.213 2003/10/06 02:38:53 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -122,7 +122,7 @@ static StringInfoData attribute_buf;
122122
* to server encoding, and then extract individual attribute fields into
123123
* attribute_buf. (We used to have CopyReadAttribute read the input source
124124
* directly, but that caused a lot of encoding issues and unnecessary logic
125-
* complexity).
125+
* complexity.)
126126
*/
127127
staticStringInfoDataline_buf;
128128
staticboolline_buf_converted;
@@ -133,7 +133,8 @@ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
133133
staticvoidCopyFrom(Relationrel,List*attnumlist,boolbinary,booloids,
134134
char*delim,char*null_print);
135135
staticboolCopyReadLine(void);
136-
staticchar*CopyReadAttribute(constchar*delim,CopyReadResult*result);
136+
staticchar*CopyReadAttribute(constchar*delim,constchar*null_print,
137+
CopyReadResult*result,bool*isnull);
137138
staticDatumCopyReadBinaryAttribute(intcolumn_no,FmgrInfo*flinfo,
138139
Oidtypelem,bool*isnull);
139140
staticvoidCopyAttributeOut(char*string,char*delim);
@@ -1014,6 +1015,17 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
10141015
tmp=0;
10151016
CopySendInt32(tmp);
10161017
}
1018+
else
1019+
{
1020+
/*
1021+
* For non-binary copy, we need to convert null_print to client
1022+
* encoding, because it will be sent directly with CopySendString.
1023+
*/
1024+
if (server_encoding!=client_encoding)
1025+
null_print= (char*)
1026+
pg_server_to_client((unsignedchar*)null_print,
1027+
strlen(null_print));
1028+
}
10171029

10181030
mySnapshot=CopyQuerySnapshot();
10191031

@@ -1441,9 +1453,10 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
14411453

14421454
if (file_has_oids)
14431455
{
1444-
string=CopyReadAttribute(delim,&result);
1456+
string=CopyReadAttribute(delim,null_print,
1457+
&result,&isnull);
14451458

1446-
if (strcmp(string,null_print)==0)
1459+
if (isnull)
14471460
ereport(ERROR,
14481461
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
14491462
errmsg("null OID in COPY data")));
@@ -1478,9 +1491,10 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
14781491
errmsg("missing data for column \"%s\"",
14791492
NameStr(attr[m]->attname))));
14801493

1481-
string=CopyReadAttribute(delim,&result);
1494+
string=CopyReadAttribute(delim,null_print,
1495+
&result,&isnull);
14821496

1483-
if (strcmp(string,null_print)==0)
1497+
if (isnull)
14841498
{
14851499
/* we read an SQL NULL, no need to do anything */
14861500
}
@@ -1880,25 +1894,33 @@ CopyReadLine(void)
18801894
returnresult;
18811895
}
18821896

1883-
/*
1897+
/*----------
18841898
* Read the value of a single attribute, performing de-escaping as needed.
18851899
*
1900+
* delim is the column delimiter string (must be just one byte for now).
1901+
* null_print is the null marker string. Note that this is compared to
1902+
* the pre-de-escaped input string.
1903+
*
18861904
* *result is set to indicate what terminated the read:
18871905
*NORMAL_ATTR:column delimiter
18881906
*END_OF_LINE:end of line
18891907
* In either case, the string read up to the terminator is returned.
18901908
*
1891-
*Note: This function does not care about SQL NULL values -- it
1892-
*isthecaller's responsibility to check if thereturned string
1893-
*matches what the user specified fortheSQL NULL value.
1894-
*
1895-
* delim is the column delimiter string.
1909+
**isnull is set true or false depending on whether the input matched
1910+
* thenull marker. Note that thecaller cannot check this since the
1911+
*returned string will bethepost-de-escaping equivalent, which may
1912+
* look the same as some valid data string.
1913+
*----------
18961914
*/
18971915
staticchar*
1898-
CopyReadAttribute(constchar*delim,CopyReadResult*result)
1916+
CopyReadAttribute(constchar*delim,constchar*null_print,
1917+
CopyReadResult*result,bool*isnull)
18991918
{
19001919
charc;
19011920
chardelimc=delim[0];
1921+
intstart_cursor=line_buf.cursor;
1922+
intend_cursor;
1923+
intinput_len;
19021924

19031925
/* reset attribute_buf to empty */
19041926
attribute_buf.len=0;
@@ -1909,6 +1931,7 @@ CopyReadAttribute(const char *delim, CopyReadResult *result)
19091931

19101932
for (;;)
19111933
{
1934+
end_cursor=line_buf.cursor;
19121935
if (line_buf.cursor >=line_buf.len)
19131936
break;
19141937
c=line_buf.data[line_buf.cursor++];
@@ -1957,16 +1980,6 @@ CopyReadAttribute(const char *delim, CopyReadResult *result)
19571980
c=val&0377;
19581981
}
19591982
break;
1960-
1961-
/*
1962-
* This is a special hack to parse `\N' as
1963-
* <backslash-N> rather then just 'N' to provide
1964-
* compatibility with the default NULL output. -- pe
1965-
*/
1966-
case'N':
1967-
appendStringInfoCharMacro(&attribute_buf,'\\');
1968-
c='N';
1969-
break;
19701983
case'b':
19711984
c='\b';
19721985
break;
@@ -1993,6 +2006,14 @@ CopyReadAttribute(const char *delim, CopyReadResult *result)
19932006
appendStringInfoCharMacro(&attribute_buf,c);
19942007
}
19952008

2009+
/* check whether raw input matched null marker */
2010+
input_len=end_cursor-start_cursor;
2011+
if (input_len==strlen(null_print)&&
2012+
strncmp(&line_buf.data[start_cursor],null_print,input_len)==0)
2013+
*isnull= true;
2014+
else
2015+
*isnull= false;
2016+
19962017
returnattribute_buf.data;
19972018
}
19982019

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp