Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit072132f

Browse files
committed
Add header matching mode to COPY FROM
COPY FROM supports the HEADER option to silently discard the headerline from a CSV or text file. It is possible to load by mistake afile that matches the expected format, for example, if two textcolumns have been swapped, resulting in garbage in the database.This adds a new option value HEADER MATCH that checks the column namesin the header line against the actual column names and errors out ifthey do not match.Author: Rémi Lapeyre <remi.lapeyre@lenstra.fr>Reviewed-by: Daniel Verite <daniel@manitou-mail.org>Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com>Discussion:https://www.postgresql.org/message-id/flat/CAF1-J-0PtCWMeLtswwGV2M70U26n4g33gpe1rcKQqe6wVQDrFA@mail.gmail.com
1 parentedcedcc commit072132f

File tree

8 files changed

+210
-8
lines changed

8 files changed

+210
-8
lines changed

‎contrib/file_fdw/expected/file_fdw.out

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,21 @@ CREATE FOREIGN TABLE agg_bad (
113113
) SERVER file_server
114114
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null '');
115115
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
116+
-- test header matching
117+
\set filename :abs_srcdir '/data/list1.csv'
118+
CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server
119+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
120+
SELECT * FROM header_match;
121+
1 | foo
122+
---+-----
123+
1 | bar
124+
(1 row)
125+
126+
CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server
127+
OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match');
128+
SELECT * FROM header_doesnt_match; -- ERROR
129+
ERROR: column name mismatch in header line field 1: got "1", expected "a"
130+
CONTEXT: COPY header_doesnt_match, line 1: "1,foo"
116131
-- per-column options tests
117132
\set filename :abs_srcdir '/data/text.csv'
118133
CREATE FOREIGN TABLE text_csv (
@@ -464,12 +479,14 @@ SET ROLE regress_file_fdw_superuser;
464479
-- cleanup
465480
RESET ROLE;
466481
DROP EXTENSION file_fdw CASCADE;
467-
NOTICE: drop cascades to7 other objects
482+
NOTICE: drop cascades to9 other objects
468483
DETAIL: drop cascades to server file_server
469484
drop cascades to user mapping for regress_file_fdw_superuser on server file_server
470485
drop cascades to user mapping for regress_no_priv_user on server file_server
471486
drop cascades to foreign table agg_text
472487
drop cascades to foreign table agg_csv
473488
drop cascades to foreign table agg_bad
489+
drop cascades to foreign table header_match
490+
drop cascades to foreign table header_doesnt_match
474491
drop cascades to foreign table text_csv
475492
DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user;

‎contrib/file_fdw/sql/file_fdw.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,15 @@ CREATE FOREIGN TABLE agg_bad (
103103
OPTIONS (format 'csv', filename :'filename', header 'true', delimiter ';', quote '@', escape '"', null'');
104104
ALTER FOREIGN TABLE agg_bad ADD CHECK (a >= 0);
105105
106+
-- test header matching
107+
\set filename :abs_srcdir'/data/list1.csv'
108+
CREATE FOREIGN TABLE header_match ("1" int, foo text) SERVER file_server
109+
OPTIONS (format'csv', filename :'filename', delimiter',', header'match');
110+
SELECT * FROM header_match;
111+
CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server
112+
OPTIONS (format'csv', filename :'filename', delimiter',', header'match');
113+
SELECT * FROM header_doesnt_match; -- ERROR
114+
106115
-- per-column options tests
107116
\set filename :abs_srcdir'/data/text.csv'
108117
CREATE FOREIGN TABLE text_csv (

‎doc/src/sgml/ref/copy.sgml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
3636
FREEZE [ <replaceable class="parameter">boolean</replaceable> ]
3737
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
3838
NULL '<replaceable class="parameter">null_string</replaceable>'
39-
HEADER [ <replaceable class="parameter">boolean</replaceable> ]
39+
HEADER [ <replaceable class="parameter">boolean</replaceable>| <literal>match</literal>]
4040
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
4141
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
4242
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@@ -276,7 +276,11 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
276276
<para>
277277
Specifies that the file contains a header line with the names of each
278278
column in the file. On output, the first line contains the column
279-
names from the table, and on input, the first line is ignored.
279+
names from the table. On input, the first line is discarded when this
280+
option is set to <literal>true</literal> (or equivalent Boolean value).
281+
If this option is set to <literal>match</literal>, the number and names
282+
of the columns in the header line must match the actual column names of
283+
the table, otherwise an error is raised.
280284
This option is not allowed when using <literal>binary</literal> format.
281285
</para>
282286
</listitem>

‎src/backend/commands/copy.c

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,64 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
313313
table_close(rel,NoLock);
314314
}
315315

316+
/*
317+
* Extract a CopyHeaderChoice value from a DefElem. This is like
318+
* defGetBoolean() but also accepts the special value "match".
319+
*/
320+
staticCopyHeaderChoice
321+
defGetCopyHeaderChoice(DefElem*def)
322+
{
323+
/*
324+
* If no parameter given, assume "true" is meant.
325+
*/
326+
if (def->arg==NULL)
327+
returnCOPY_HEADER_TRUE;
328+
329+
/*
330+
* Allow 0, 1, "true", "false", "on", "off", or "match".
331+
*/
332+
switch (nodeTag(def->arg))
333+
{
334+
caseT_Integer:
335+
switch (intVal(def->arg))
336+
{
337+
case0:
338+
returnCOPY_HEADER_FALSE;
339+
case1:
340+
returnCOPY_HEADER_TRUE;
341+
default:
342+
/* otherwise, error out below */
343+
break;
344+
}
345+
break;
346+
default:
347+
{
348+
char*sval=defGetString(def);
349+
350+
/*
351+
* The set of strings accepted here should match up with the
352+
* grammar's opt_boolean_or_string production.
353+
*/
354+
if (pg_strcasecmp(sval,"true")==0)
355+
returnCOPY_HEADER_TRUE;
356+
if (pg_strcasecmp(sval,"false")==0)
357+
returnCOPY_HEADER_FALSE;
358+
if (pg_strcasecmp(sval,"on")==0)
359+
returnCOPY_HEADER_TRUE;
360+
if (pg_strcasecmp(sval,"off")==0)
361+
returnCOPY_HEADER_FALSE;
362+
if (pg_strcasecmp(sval,"match")==0)
363+
returnCOPY_HEADER_MATCH;
364+
}
365+
break;
366+
}
367+
ereport(ERROR,
368+
(errcode(ERRCODE_SYNTAX_ERROR),
369+
errmsg("%s requires a Boolean value or \"match\"",
370+
def->defname)));
371+
returnCOPY_HEADER_FALSE;/* keep compiler quiet */
372+
}
373+
316374
/*
317375
* Process the statement option list for COPY.
318376
*
@@ -394,7 +452,7 @@ ProcessCopyOptions(ParseState *pstate,
394452
if (header_specified)
395453
errorConflictingDefElem(defel,pstate);
396454
header_specified= true;
397-
opts_out->header_line=defGetBoolean(defel);
455+
opts_out->header_line=defGetCopyHeaderChoice(defel);
398456
}
399457
elseif (strcmp(defel->defname,"quote")==0)
400458
{

‎src/backend/commands/copyfromparse.c

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
#include"miscadmin.h"
7373
#include"pgstat.h"
7474
#include"port/pg_bswap.h"
75+
#include"utils/builtins.h"
7576
#include"utils/memutils.h"
7677
#include"utils/rel.h"
7778

@@ -758,12 +759,58 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
758759
/* only available for text or csv input */
759760
Assert(!cstate->opts.binary);
760761

761-
/* on inputjust throw the header lineaway */
762+
/* on inputcheck that the header lineis correct if needed */
762763
if (cstate->cur_lineno==0&&cstate->opts.header_line)
763764
{
765+
ListCell*cur;
766+
TupleDesctupDesc;
767+
768+
tupDesc=RelationGetDescr(cstate->rel);
769+
764770
cstate->cur_lineno++;
765-
if (CopyReadLine(cstate))
766-
return false;/* done */
771+
done=CopyReadLine(cstate);
772+
773+
if (cstate->opts.header_line==COPY_HEADER_MATCH)
774+
{
775+
intfldnum;
776+
777+
if (cstate->opts.csv_mode)
778+
fldct=CopyReadAttributesCSV(cstate);
779+
else
780+
fldct=CopyReadAttributesText(cstate);
781+
782+
if (fldct!=list_length(cstate->attnumlist))
783+
ereport(ERROR,
784+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
785+
errmsg("wrong number of fields in header line: field count is %d, expected %d",
786+
fldct,list_length(cstate->attnumlist))));
787+
788+
fldnum=0;
789+
foreach(cur,cstate->attnumlist)
790+
{
791+
intattnum=lfirst_int(cur);
792+
char*colName=cstate->raw_fields[attnum-1];
793+
Form_pg_attributeattr=TupleDescAttr(tupDesc,attnum-1);
794+
795+
fldnum++;
796+
797+
if (colName==NULL)
798+
ereport(ERROR,
799+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
800+
errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
801+
fldnum,cstate->opts.null_print,NameStr(attr->attname))));
802+
803+
if (namestrcmp(&attr->attname,colName)!=0) {
804+
ereport(ERROR,
805+
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
806+
errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
807+
fldnum,colName,NameStr(attr->attname))));
808+
}
809+
}
810+
}
811+
812+
if (done)
813+
return false;
767814
}
768815

769816
cstate->cur_lineno++;

‎src/include/commands/copy.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,17 @@
1919
#include"parser/parse_node.h"
2020
#include"tcop/dest.h"
2121

22+
/*
23+
* Represents whether a header line should be present, and whether it must
24+
* match the actual names (which implies "true").
25+
*/
26+
typedefenumCopyHeaderChoice
27+
{
28+
COPY_HEADER_FALSE=0,
29+
COPY_HEADER_TRUE,
30+
COPY_HEADER_MATCH,
31+
}CopyHeaderChoice;
32+
2233
/*
2334
* A struct to hold COPY options, in a parsed form. All of these are related
2435
* to formatting, except for 'freeze', which doesn't really belong here, but
@@ -32,7 +43,7 @@ typedef struct CopyFormatOptions
3243
boolbinary;/* binary format? */
3344
boolfreeze;/* freeze rows on loading? */
3445
boolcsv_mode;/* Comma Separated Value format? */
35-
boolheader_line;/* header line? */
46+
CopyHeaderChoiceheader_line;/* header line? */
3647
char*null_print;/* NULL marker string (server encoding!) */
3748
intnull_print_len;/* length of same */
3849
char*null_print_client;/* same converted to file encoding */

‎src/test/regress/expected/copy.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,26 @@ INFO: progress: {"type": "FILE", "command": "COPY FROM", "relname": "tab_progre
176176
drop trigger check_after_tab_progress_reporting on tab_progress_reporting;
177177
drop function notice_after_tab_progress_reporting();
178178
drop table tab_progress_reporting;
179+
-- Test header matching feature
180+
create table header_copytest (
181+
a int,
182+
b int,
183+
c text
184+
);
185+
copy header_copytest from stdin with (header wrong_choice);
186+
ERROR: header requires a Boolean value or "match"
187+
copy header_copytest from stdin with (header match);
188+
copy header_copytest from stdin with (header match);
189+
ERROR: column name mismatch in header line field 3: got null value ("\N"), expected "c"
190+
CONTEXT: COPY header_copytest, line 1: "ab\N"
191+
copy header_copytest from stdin with (header match);
192+
ERROR: wrong number of fields in header line: field count is 2, expected 3
193+
CONTEXT: COPY header_copytest, line 1: "ab"
194+
copy header_copytest from stdin with (header match);
195+
ERROR: wrong number of fields in header line: field count is 4, expected 3
196+
CONTEXT: COPY header_copytest, line 1: "abcd"
197+
copy header_copytest from stdin with (header match);
198+
ERROR: column name mismatch in header line field 3: got "d", expected "c"
199+
CONTEXT: COPY header_copytest, line 1: "abd"
200+
copy header_copytest from stdin with (header match, format csv);
201+
drop table header_copytest;

‎src/test/regress/sql/copy.sql

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,36 @@ copy tab_progress_reporting from :'filename'
197197
droptrigger check_after_tab_progress_reportingon tab_progress_reporting;
198198
dropfunction notice_after_tab_progress_reporting();
199199
droptable tab_progress_reporting;
200+
201+
-- Test header matching feature
202+
createtableheader_copytest (
203+
aint,
204+
bint,
205+
ctext
206+
);
207+
copy header_copytestfrom stdin with (header wrong_choice);
208+
copy header_copytestfrom stdin with (header match);
209+
abc
210+
12foo
211+
\.
212+
copy header_copytestfrom stdin with (header match);
213+
ab\N
214+
12foo
215+
\.
216+
copy header_copytestfrom stdin with (header match);
217+
ab
218+
12
219+
\.
220+
copy header_copytestfrom stdin with (header match);
221+
abcd
222+
12foobar
223+
\.
224+
copy header_copytestfrom stdin with (header match);
225+
abd
226+
12foo
227+
\.
228+
copy header_copytestfrom stdin with (header match, format csv);
229+
a,b,c
230+
1,2,foo
231+
\.
232+
droptable header_copytest;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp