Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5fdd941

Browse files
committed
Handle carriage returns and line feeds in COPY CSV mode.
Andrew Dunstan
1 parent06a61d6 commit5fdd941

File tree

1 file changed

+120
-76
lines changed

1 file changed

+120
-76
lines changed

‎src/backend/commands/copy.c

Lines changed: 120 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.236 2004/12/31 21:59:41 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.237 2005/03/12 05:41:34 momjian Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -98,7 +98,6 @@ static bool fe_eof;/* true if detected end of copy data */
9898
staticEolTypeeol_type;/* EOL type of input */
9999
staticintclient_encoding;/* remote side's character encoding */
100100
staticintserver_encoding;/* local encoding */
101-
staticboolembedded_line_warning;
102101

103102
/* these are just for error messages, see copy_in_error_callback */
104103
staticboolcopy_binary;/* is it a binary copy? */
@@ -139,7 +138,7 @@ static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
139138
staticvoidCopyFrom(Relationrel,List*attnumlist,boolbinary,booloids,
140139
char*delim,char*null_print,boolcsv_mode,char*quote,char*escape,
141140
List*force_notnull_atts);
142-
staticboolCopyReadLine(void);
141+
staticboolCopyReadLine(char*quote,char*escape);
143142
staticchar*CopyReadAttribute(constchar*delim,constchar*null_print,
144143
CopyReadResult*result,bool*isnull);
145144
staticchar*CopyReadAttributeCSV(constchar*delim,constchar*null_print,
@@ -1191,7 +1190,6 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
11911190
attr=tupDesc->attrs;
11921191
num_phys_attrs=tupDesc->natts;
11931192
attr_count=list_length(attnumlist);
1194-
embedded_line_warning= false;
11951193

11961194
/*
11971195
* Get info about the columns we need to process.
@@ -1718,7 +1716,8 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
17181716
ListCell*cur;
17191717

17201718
/* Actually read the line into memory here */
1721-
done=CopyReadLine();
1719+
done=csv_mode ?
1720+
CopyReadLine(quote,escape) :CopyReadLine(NULL,NULL);
17221721

17231722
/*
17241723
* EOF at start of line means we're done. If we see EOF after
@@ -2006,7 +2005,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
20062005
* by newline.
20072006
*/
20082007
staticbool
2009-
CopyReadLine(void)
2008+
CopyReadLine(char*quote,char*escape)
20102009
{
20112010
boolresult;
20122011
boolchange_encoding= (client_encoding!=server_encoding);
@@ -2015,6 +2014,19 @@ CopyReadLine(void)
20152014
intj;
20162015
unsignedchars[2];
20172016
char*cvt;
2017+
boolin_quote= false,last_was_esc= false,csv_mode= false;
2018+
charquotec='\0',escapec='\0';
2019+
2020+
if (quote)
2021+
{
2022+
csv_mode= true;
2023+
quotec=quote[0];
2024+
escapec=escape[0];
2025+
/* ignore special escape processing if it's the same as quotec */
2026+
if (quotec==escapec)
2027+
escapec='\0';
2028+
}
2029+
20182030

20192031
s[1]=0;
20202032

@@ -2031,11 +2043,20 @@ CopyReadLine(void)
20312043

20322044
/*
20332045
* In this loop we only care for detecting newlines (\r and/or \n) and
2034-
* the end-of-copy marker (\.). For backwards compatibility we allow
2046+
* the end-of-copy marker (\.).
2047+
*
2048+
* In Text mode, for backwards compatibility we allow
20352049
* backslashes to escape newline characters. Backslashes other than
20362050
* the end marker get put into the line_buf, since CopyReadAttribute
2037-
* does its own escape processing.These four characters, and only
2038-
* these four, are assumed the same in frontend and backend encodings.
2051+
* does its own escape processing.
2052+
*
2053+
* In CSV mode, CR and NL inside q quoted field are just part of the
2054+
* data value and are put in line_buf. We keep just enough state
2055+
* to know if we are currently in a quoted field or not.
2056+
*
2057+
* These four characters, and only these four, are assumed the same in
2058+
* frontend and backend encodings.
2059+
*
20392060
* We do not assume that second and later bytes of a frontend
20402061
* multibyte character couldn't look like ASCII characters.
20412062
*/
@@ -2047,13 +2068,49 @@ CopyReadLine(void)
20472068
result= true;
20482069
break;
20492070
}
2050-
if (c=='\r')
2071+
2072+
if (csv_mode)
2073+
{
2074+
/*
2075+
* Dealing with quotes and escapes here is mildly tricky. If the
2076+
* quote char is also the escape char, there's no problem - we
2077+
* just use the char as a toggle. If they are different, we need
2078+
* to ensure that we only take account of an escape inside a quoted
2079+
* field and immediately preceding a quote char, and not the
2080+
* second in a escape-escape sequence.
2081+
*/
2082+
2083+
if (in_quote&&c==escapec)
2084+
last_was_esc= !last_was_esc;
2085+
if (c==quotec&& !last_was_esc)
2086+
in_quote= !in_quote;
2087+
if (c!=escapec)
2088+
last_was_esc= false;
2089+
2090+
/*
2091+
* updating the line count for embedded CR and/or LF chars is
2092+
* necessarily a little fragile - this test is probably about
2093+
* the best we can do.
2094+
*/
2095+
if (in_quote&&c== (eol_type==EOL_CR ?'\r' :'\n'))
2096+
copy_lineno++;
2097+
}
2098+
2099+
if (!in_quote&&c=='\r')
20512100
{
20522101
if (eol_type==EOL_NL)
2053-
ereport(ERROR,
2054-
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2055-
errmsg("literal carriage return found in data"),
2056-
errhint("Use \"\\r\" to represent carriage return.")));
2102+
{
2103+
if (!csv_mode)
2104+
ereport(ERROR,
2105+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2106+
errmsg("literal carriage return found in data"),
2107+
errhint("Use \"\\r\" to represent carriage return.")));
2108+
else
2109+
ereport(ERROR,
2110+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2111+
errmsg("unquoted carriage return found in CSV data"),
2112+
errhint("Use quoted CSV field to represent carriage return.")));
2113+
}
20572114
/* Check for \r\n on first line, _and_ handle \r\n. */
20582115
if (eol_type==EOL_UNKNOWN||eol_type==EOL_CRNL)
20592116
{
@@ -2068,10 +2125,19 @@ CopyReadLine(void)
20682125
{
20692126
/* found \r, but no \n */
20702127
if (eol_type==EOL_CRNL)
2071-
ereport(ERROR,
2072-
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2073-
errmsg("literal carriage return found in data"),
2074-
errhint("Use \"\\r\" to represent carriage return.")));
2128+
{
2129+
if (!csv_mode)
2130+
ereport(ERROR,
2131+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2132+
errmsg("literal carriage return found in data"),
2133+
errhint("Use \"\\r\" to represent carriage return.")));
2134+
else
2135+
ereport(ERROR,
2136+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2137+
errmsg("unquoted carriage return found in data"),
2138+
errhint("Use quoted CSV field to represent carriage return.")));
2139+
2140+
}
20752141

20762142
/*
20772143
* if we got here, it is the first line and we didn't
@@ -2083,26 +2149,47 @@ CopyReadLine(void)
20832149
}
20842150
break;
20852151
}
2086-
if (c=='\n')
2152+
if (!in_quote&&c=='\n')
20872153
{
20882154
if (eol_type==EOL_CR||eol_type==EOL_CRNL)
2089-
ereport(ERROR,
2090-
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2091-
errmsg("literal newline found in data"),
2092-
errhint("Use \"\\n\" to represent newline.")));
2155+
{
2156+
if (!csv_mode)
2157+
ereport(ERROR,
2158+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2159+
errmsg("literal newline found in data"),
2160+
errhint("Use \"\\n\" to represent newline.")));
2161+
else
2162+
ereport(ERROR,
2163+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
2164+
errmsg("unquoted newline found in data"),
2165+
errhint("Use quoted CSV field to represent newline.")));
2166+
2167+
}
20932168
eol_type=EOL_NL;
20942169
break;
20952170
}
2096-
if (c=='\\')
2171+
2172+
if ((line_buf.len==0|| !csv_mode)&&c=='\\')
20972173
{
2098-
c=CopyGetChar();
2099-
if (c==EOF)
2174+
intc2;
2175+
2176+
if (csv_mode)
2177+
c2=CopyPeekChar();
2178+
else
2179+
c2=c=CopyGetChar();
2180+
2181+
if (c2==EOF)
21002182
{
21012183
result= true;
2184+
if (csv_mode)
2185+
CopyDonePeek(c2, true);
21022186
break;
21032187
}
2104-
if (c=='.')
2188+
if (c2=='.')
21052189
{
2190+
if (csv_mode)
2191+
CopyDonePeek(c2, true);/* allow keep calling GetChar() */
2192+
21062193
if (eol_type==EOL_CRNL)
21072194
{
21082195
c=CopyGetChar();
@@ -2140,8 +2227,12 @@ CopyReadLine(void)
21402227
result= true;/* report EOF */
21412228
break;
21422229
}
2143-
/* not EOF mark, so emit \ and following char literally */
2144-
appendStringInfoCharMacro(&line_buf,'\\');
2230+
2231+
if (csv_mode)
2232+
CopyDonePeek(c2, false);/* not a dot, so put it back */
2233+
else
2234+
/* not EOF mark, so emit \ and following char literally */
2235+
appendStringInfoCharMacro(&line_buf,'\\');
21452236
}
21462237

21472238
appendStringInfoCharMacro(&line_buf,c);
@@ -2369,34 +2460,6 @@ CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote,
23692460

23702461
for (;;)
23712462
{
2372-
/* handle multiline quoted fields */
2373-
if (in_quote&&line_buf.cursor >=line_buf.len)
2374-
{
2375-
booldone;
2376-
2377-
switch (eol_type)
2378-
{
2379-
caseEOL_NL:
2380-
appendStringInfoString(&attribute_buf,"\n");
2381-
break;
2382-
caseEOL_CR:
2383-
appendStringInfoString(&attribute_buf,"\r");
2384-
break;
2385-
caseEOL_CRNL:
2386-
appendStringInfoString(&attribute_buf,"\r\n");
2387-
break;
2388-
caseEOL_UNKNOWN:
2389-
/* shouldn't happen - just keep going */
2390-
break;
2391-
}
2392-
2393-
copy_lineno++;
2394-
done=CopyReadLine();
2395-
if (done&&line_buf.len==0)
2396-
break;
2397-
start_cursor=line_buf.cursor;
2398-
}
2399-
24002463
end_cursor=line_buf.cursor;
24012464
if (line_buf.cursor >=line_buf.len)
24022465
break;
@@ -2629,25 +2692,6 @@ CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
26292692
!use_quote&& (c=*test_string)!='\0';
26302693
test_string+=mblen)
26312694
{
2632-
/*
2633-
* We don't know here what the surrounding line end characters
2634-
* might be. It might not even be under postgres' control. So
2635-
* we simple warn on ANY embedded line ending character.
2636-
*
2637-
* This warning will disappear when we make line parsing field-aware,
2638-
* so that we can reliably read in embedded line ending characters
2639-
* regardless of the file's line-end context.
2640-
*
2641-
*/
2642-
2643-
if (!embedded_line_warning&& (c=='\n'||c=='\r') )
2644-
{
2645-
embedded_line_warning= true;
2646-
elog(WARNING,
2647-
"CSV fields with embedded linefeed or carriage return "
2648-
"characters might not be able to be reimported");
2649-
}
2650-
26512695
if (c==delimc||c==quotec||c=='\n'||c=='\r')
26522696
use_quote= true;
26532697
if (!same_encoding)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp