Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3cba824

Browse files
committed
Add ENCODING option to COPY TO/FROM and file_fdw.
File encodings can be specified separately from client encoding.If not specified, client encoding is used for backward compatibility.Cases when the encoding doesn't match client encoding are slowerthan matched cases because we don't have conversion procs for otherencodings. Performance improvement would be be a future work.Original patch by Hitoshi Harada, and modified by me.
1 parent48d25ba commit3cba824

File tree

9 files changed

+119
-37
lines changed

9 files changed

+119
-37
lines changed

‎contrib/file_fdw/file_fdw.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ static struct FileFdwOption valid_options[] = {
5555
{"quote",ForeignTableRelationId },
5656
{"escape",ForeignTableRelationId },
5757
{"null",ForeignTableRelationId },
58+
{"encoding",ForeignTableRelationId },
5859

5960
/*
6061
* force_quote is not supported by file_fdw because it's for COPY TO.

‎doc/src/sgml/file-fdw.sgml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,17 @@
9797
</listitem>
9898
</varlistentry>
9999

100+
<varlistentry>
101+
<term><literal>encoding</literal></term>
102+
103+
<listitem>
104+
<para>
105+
Specifies the file's encoding.
106+
the same as <command>COPY</>'s <literal>ENCODING</literal> option.
107+
</para>
108+
</listitem>
109+
</varlistentry>
110+
100111
</variablelist>
101112

102113
<para>

‎doc/src/sgml/ref/copy.sgml

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
4040
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
4141
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
4242
FORCE_QUOTE { ( <replaceable class="parameter">column</replaceable> [, ...] ) | * }
43-
FORCE_NOT_NULL ( <replaceable class="parameter">column</replaceable> [, ...] )
43+
FORCE_NOT_NULL ( <replaceable class="parameter">column</replaceable> [, ...] ) |
44+
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
4445
</synopsis>
4546
</refsynopsisdiv>
4647

@@ -282,6 +283,18 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
282283
</listitem>
283284
</varlistentry>
284285

286+
<varlistentry>
287+
<term><literal>ENCODING</></term>
288+
<listitem>
289+
<para>
290+
Specifies that the file is encoded in the <replaceable
291+
class="parameter">encoding_name</replaceable>. If this option is
292+
omitted, the current client encoding is used. See the Notes below
293+
for more details.
294+
</para>
295+
</listitem>
296+
</varlistentry>
297+
285298
</variablelist>
286299
</refsect1>
287300

@@ -377,8 +390,9 @@ COPY <replaceable class="parameter">count</replaceable>
377390
</para>
378391

379392
<para>
380-
Input data is interpreted according to the current client encoding,
381-
and output data is encoded in the current client encoding, even
393+
Input data is interpreted according to <literal>ENCODING</literal>
394+
option or the current client encoding, and output data is encoded
395+
in <literal>ENCODING</literal> or the current client encoding, even
382396
if the data does not pass through the client but is read from or
383397
written to a file directly by the server.
384398
</para>

‎src/backend/commands/copy.c

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ typedef struct CopyStateData
9595
* dest == COPY_NEW_FE in COPY FROM */
9696
boolfe_eof;/* true if detected end of copy data */
9797
EolTypeeol_type;/* EOL type of input */
98-
intclient_encoding;/* remote side's character encoding */
99-
boolneed_transcoding;/*client encoding diff from server? */
98+
intfile_encoding;/* file or remote side's character encoding */
99+
boolneed_transcoding;/*file encoding diff from server? */
100100
boolencoding_embeds_ascii;/* ASCII can be non-first byte? */
101101

102102
/* parameters from the COPY command */
@@ -110,7 +110,7 @@ typedef struct CopyStateData
110110
boolheader_line;/* CSV header line? */
111111
char*null_print;/* NULL marker string (server encoding!) */
112112
intnull_print_len;/* length of same */
113-
char*null_print_client;/* same converted toclient encoding */
113+
char*null_print_client;/* same converted tofile encoding */
114114
char*delim;/* column delimiter (must be 1 byte) */
115115
char*quote;/* CSV quote char (must be 1 byte) */
116116
char*escape;/* CSV escape char (must be 1 byte) */
@@ -845,6 +845,8 @@ ProcessCopyOptions(CopyState cstate,
845845
if (cstate==NULL)
846846
cstate= (CopyStateData*)palloc0(sizeof(CopyStateData));
847847

848+
cstate->file_encoding=-1;
849+
848850
/* Extract options from the statement node tree */
849851
foreach(option,options)
850852
{
@@ -948,6 +950,19 @@ ProcessCopyOptions(CopyState cstate,
948950
errmsg("argument to option \"%s\" must be a list of column names",
949951
defel->defname)));
950952
}
953+
elseif (strcmp(defel->defname,"encoding")==0)
954+
{
955+
if (cstate->file_encoding >=0)
956+
ereport(ERROR,
957+
(errcode(ERRCODE_SYNTAX_ERROR),
958+
errmsg("conflicting or redundant options")));
959+
cstate->file_encoding=pg_char_to_encoding(defGetString(defel));
960+
if (cstate->file_encoding<0)
961+
ereport(ERROR,
962+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
963+
errmsg("argument to option \"%s\" must be a valid encoding name",
964+
defel->defname)));
965+
}
951966
else
952967
ereport(ERROR,
953968
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -1278,17 +1293,20 @@ BeginCopy(bool is_from,
12781293
}
12791294
}
12801295

1296+
/* Use client encoding when ENCODING option is not specified. */
1297+
if (cstate->file_encoding<0)
1298+
cstate->file_encoding=pg_get_client_encoding();
1299+
12811300
/*
1282-
* Set up encoding conversion info. Even if theclient and server
1283-
* encodings are the same, we must applypg_client_to_server() to validate
1301+
* Set up encoding conversion info. Even if thefile and server
1302+
* encodings are the same, we must applypg_any_to_server() to validate
12841303
* data in multibyte encodings.
12851304
*/
1286-
cstate->client_encoding=pg_get_client_encoding();
12871305
cstate->need_transcoding=
1288-
(cstate->client_encoding!=GetDatabaseEncoding()||
1306+
(cstate->file_encoding!=GetDatabaseEncoding()||
12891307
pg_database_encoding_max_length()>1);
12901308
/* See Multibyte encoding comment above */
1291-
cstate->encoding_embeds_ascii=PG_ENCODING_IS_CLIENT_ONLY(cstate->client_encoding);
1309+
cstate->encoding_embeds_ascii=PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
12921310

12931311
cstate->copy_dest=COPY_FILE;/* default */
12941312

@@ -1526,12 +1544,13 @@ CopyTo(CopyState cstate)
15261544
else
15271545
{
15281546
/*
1529-
* For non-binary copy, we need to convert null_print toclient
1547+
* For non-binary copy, we need to convert null_print tofile
15301548
* encoding, because it will be sent directly with CopySendString.
15311549
*/
15321550
if (cstate->need_transcoding)
1533-
cstate->null_print_client=pg_server_to_client(cstate->null_print,
1534-
cstate->null_print_len);
1551+
cstate->null_print_client=pg_server_to_any(cstate->null_print,
1552+
cstate->null_print_len,
1553+
cstate->file_encoding);
15351554

15361555
/* if a header has been requested send the line */
15371556
if (cstate->header_line)
@@ -2608,8 +2627,9 @@ CopyReadLine(CopyState cstate)
26082627
{
26092628
char*cvt;
26102629

2611-
cvt=pg_client_to_server(cstate->line_buf.data,
2612-
cstate->line_buf.len);
2630+
cvt=pg_any_to_server(cstate->line_buf.data,
2631+
cstate->line_buf.len,
2632+
cstate->file_encoding);
26132633
if (cvt!=cstate->line_buf.data)
26142634
{
26152635
/* transfer converted data back to line_buf */
@@ -2854,7 +2874,7 @@ CopyReadLineText(CopyState cstate)
28542874
/* -----
28552875
* get next character
28562876
* Note: we do not change c so if it isn't \., we can fall
2857-
* through and continue processing forclient encoding.
2877+
* through and continue processing forfile encoding.
28582878
* -----
28592879
*/
28602880
c2=copy_raw_buf[raw_buf_ptr];
@@ -2968,7 +2988,7 @@ CopyReadLineText(CopyState cstate)
29682988

29692989
mblen_str[0]=c;
29702990
/* All our encodings only read the first byte to get the length */
2971-
mblen=pg_encoding_mblen(cstate->client_encoding,mblen_str);
2991+
mblen=pg_encoding_mblen(cstate->file_encoding,mblen_str);
29722992
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen-1);
29732993
IF_NEED_REFILL_AND_EOF_BREAK(mblen-1);
29742994
raw_buf_ptr+=mblen-1;
@@ -3467,7 +3487,7 @@ CopyAttributeOutText(CopyState cstate, char *string)
34673487
chardelimc=cstate->delim[0];
34683488

34693489
if (cstate->need_transcoding)
3470-
ptr=pg_server_to_client(string,strlen(string));
3490+
ptr=pg_server_to_any(string,strlen(string),cstate->file_encoding);
34713491
else
34723492
ptr=string;
34733493

@@ -3540,7 +3560,7 @@ CopyAttributeOutText(CopyState cstate, char *string)
35403560
start=ptr++;/* we include char in next run */
35413561
}
35423562
elseif (IS_HIGHBIT_SET(c))
3543-
ptr+=pg_encoding_mblen(cstate->client_encoding,ptr);
3563+
ptr+=pg_encoding_mblen(cstate->file_encoding,ptr);
35443564
else
35453565
ptr++;
35463566
}
@@ -3627,7 +3647,7 @@ CopyAttributeOutCSV(CopyState cstate, char *string,
36273647
use_quote= true;
36283648

36293649
if (cstate->need_transcoding)
3630-
ptr=pg_server_to_client(string,strlen(string));
3650+
ptr=pg_server_to_any(string,strlen(string),cstate->file_encoding);
36313651
else
36323652
ptr=string;
36333653

@@ -3654,7 +3674,7 @@ CopyAttributeOutCSV(CopyState cstate, char *string,
36543674
break;
36553675
}
36563676
if (IS_HIGHBIT_SET(c)&&cstate->encoding_embeds_ascii)
3657-
tptr+=pg_encoding_mblen(cstate->client_encoding,tptr);
3677+
tptr+=pg_encoding_mblen(cstate->file_encoding,tptr);
36583678
else
36593679
tptr++;
36603680
}
@@ -3678,7 +3698,7 @@ CopyAttributeOutCSV(CopyState cstate, char *string,
36783698
start=ptr;/* we include char in next run */
36793699
}
36803700
if (IS_HIGHBIT_SET(c)&&cstate->encoding_embeds_ascii)
3681-
ptr+=pg_encoding_mblen(cstate->client_encoding,ptr);
3701+
ptr+=pg_encoding_mblen(cstate->file_encoding,ptr);
36823702
else
36833703
ptr++;
36843704
}

‎src/backend/parser/gram.y

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2236,6 +2236,10 @@ copy_opt_item:
22362236
{
22372237
$$ = makeDefElem("force_not_null", (Node *)$4);
22382238
}
2239+
|ENCODINGSconst
2240+
{
2241+
$$ = makeDefElem("encoding", (Node *)makeString($2));
2242+
}
22392243
;
22402244

22412245
/* The following exist for backward compatibility with very old versions*/

‎src/backend/utils/mb/mbutils.c

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -496,15 +496,26 @@ pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
496496
*/
497497
char*
498498
pg_client_to_server(constchar*s,intlen)
499+
{
500+
Assert(ClientEncoding);
501+
502+
returnpg_any_to_server(s,len,ClientEncoding->encoding);
503+
}
504+
505+
/*
506+
* convert any encoding to server encoding.
507+
*/
508+
char*
509+
pg_any_to_server(constchar*s,intlen,intencoding)
499510
{
500511
Assert(DatabaseEncoding);
501512
Assert(ClientEncoding);
502513

503514
if (len <=0)
504515
return (char*)s;
505516

506-
if (ClientEncoding->encoding==DatabaseEncoding->encoding||
507-
ClientEncoding->encoding==PG_SQL_ASCII)
517+
if (encoding==DatabaseEncoding->encoding||
518+
encoding==PG_SQL_ASCII)
508519
{
509520
/*
510521
* No conversion is needed, but we must still validate the data.
@@ -524,8 +535,8 @@ pg_client_to_server(const char *s, int len)
524535
* to the parser but we have no way to convert it.We compromise by
525536
* rejecting the data if it contains any non-ASCII characters.
526537
*/
527-
if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
528-
(void)pg_verify_mbstr(ClientEncoding->encoding,s,len, false);
538+
if (PG_VALID_BE_ENCODING(encoding))
539+
(void)pg_verify_mbstr(encoding,s,len, false);
529540
else
530541
{
531542
inti;
@@ -543,27 +554,46 @@ pg_client_to_server(const char *s, int len)
543554
return (char*)s;
544555
}
545556

546-
returnperform_default_encoding_conversion(s,len, true);
557+
if (ClientEncoding->encoding==encoding)
558+
returnperform_default_encoding_conversion(s,len, true);
559+
else
560+
return (char*)pg_do_encoding_conversion(
561+
(unsignedchar*)s,len,encoding,DatabaseEncoding->encoding);
547562
}
548563

549564
/*
550565
* convert server encoding to client encoding.
551566
*/
552567
char*
553568
pg_server_to_client(constchar*s,intlen)
569+
{
570+
Assert(ClientEncoding);
571+
572+
returnpg_any_to_server(s,len,ClientEncoding->encoding);
573+
}
574+
575+
/*
576+
* convert server encoding to any encoding.
577+
*/
578+
char*
579+
pg_server_to_any(constchar*s,intlen,intencoding)
554580
{
555581
Assert(DatabaseEncoding);
556582
Assert(ClientEncoding);
557583

558584
if (len <=0)
559585
return (char*)s;
560586

561-
if (ClientEncoding->encoding==DatabaseEncoding->encoding||
562-
ClientEncoding->encoding==PG_SQL_ASCII||
587+
if (encoding==DatabaseEncoding->encoding||
588+
encoding==PG_SQL_ASCII||
563589
DatabaseEncoding->encoding==PG_SQL_ASCII)
564590
return (char*)s;/* assume data is valid */
565591

566-
returnperform_default_encoding_conversion(s,len, false);
592+
if (ClientEncoding->encoding==encoding)
593+
returnperform_default_encoding_conversion(s,len, false);
594+
else
595+
return (char*)pg_do_encoding_conversion(
596+
(unsignedchar*)s,len,DatabaseEncoding->encoding,encoding);
567597
}
568598

569599
/*

‎src/include/mb/pg_wchar.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,8 @@ extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
420420

421421
externchar*pg_client_to_server(constchar*s,intlen);
422422
externchar*pg_server_to_client(constchar*s,intlen);
423+
externchar*pg_any_to_server(constchar*s,intlen,intencoding);
424+
externchar*pg_server_to_any(constchar*s,intlen,intencoding);
423425

424426
externunsigned shortBIG5toCNS(unsigned shortbig5,unsignedchar*lc);
425427
externunsigned shortCNStoBIG5(unsigned shortcns,unsignedcharlc);

‎src/test/regress/expected/copy2.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ CONTEXT: COPY x, line 1: "2001231\N\N"
4646
COPY x from stdin;
4747
ERROR: extra data after last expected column
4848
CONTEXT: COPY x, line 1: "20022324050607080"
49-
-- various COPY options: delimiters, oids, NULL string
49+
-- various COPY options: delimiters, oids, NULL string, encoding
5050
COPY x (b, c, d, e) from stdin with oids delimiter ',' null 'x';
5151
COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
52-
COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X';
52+
COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
5353
-- check results of copy in
5454
SELECT * FROM x;
5555
a | b | c | d | e
@@ -187,7 +187,7 @@ COPY y TO stdout WITH CSV QUOTE '''' DELIMITER '|';
187187
Jackson, Sam|\h
188188
It is "perfect".|
189189
''|
190-
COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
190+
COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\' ENCODING 'sql_ascii';
191191
"Jackson, Sam","\\h"
192192
"It is \"perfect\".",""
193193
"",

‎src/test/regress/sql/copy2.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ COPY x from stdin;
7272
20022324050607080
7373
\.
7474

75-
-- various COPY options: delimiters, oids, NULL string
75+
-- various COPY options: delimiters, oids, NULL string, encoding
7676
COPY x (b, c, d, e)from stdin with oids delimiter','null'x';
7777
500000,x,45,80,90
7878
500001,x,\x,\\x,\\\x
@@ -83,7 +83,7 @@ COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
8383
3000;;c;;
8484
\.
8585

86-
COPY xfrom stdin WITH DELIMITERAS':'NULLAS E'\\X';
86+
COPY xfrom stdin WITH DELIMITERAS':'NULLAS E'\\X' ENCODING'sql_ascii';
8787
4000:\X:C:\X:\X
8888
4001:1:empty::
8989
4002:2:null:\X:\X
@@ -127,7 +127,7 @@ INSERT INTO y VALUES ('', NULL);
127127

128128
COPY y TO stdout WITH CSV;
129129
COPY y TO stdout WITH CSV QUOTE'''' DELIMITER'|';
130-
COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
130+
COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\' ENCODING'sql_ascii';
131131
COPY y TO stdout WITH CSV FORCE QUOTE*;
132132

133133
-- Repeat above tests with new 9.0 option syntax

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp