Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit31f3817

Browse files
committed
Allow COPY FROM to filter data using WHERE conditions
Extends the COPY FROM command with a WHERE condition, which allows doingvarious types of filtering while importing the data (random sampling,condition on a data column, etc.). Until now such filtering requiredeither preprocessing of the input data, or importing all data and thenfiltering in the database. COPY FROM ... WHERE is an easy-to-use andlow-overhead alternative for most simple cases.Author: Surafel TemesgenReviewed-by: Tomas Vondra, Masahiko Sawada, Lim MyungkyuDiscussion:https://www.postgresql.org/message-id/flat/CALAY4q_DdpWDuB5-Zyi-oTtO2uSk8pmy+dupiRe3AvAc++1imA@mail.gmail.com
1 parentd6ef7fe commit31f3817

File tree

13 files changed

+175
-4
lines changed

13 files changed

+175
-4
lines changed

‎doc/src/sgml/ref/copy.sgml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ PostgreSQL documentation
2525
COPY <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
2626
FROM { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDIN }
2727
[ [ WITH ] ( <replaceable class="parameter">option</replaceable> [, ...] ) ]
28+
[ WHERE <replaceable class="parameter">condition</replaceable> ]
2829

2930
COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
3031
TO { '<replaceable class="parameter">filename</replaceable>' | PROGRAM '<replaceable class="parameter">command</replaceable>' | STDOUT }
@@ -353,6 +354,32 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
353354
</listitem>
354355
</varlistentry>
355356

357+
<varlistentry>
358+
<term><literal>WHERE</literal></term>
359+
<listitem>
360+
<para>
361+
The optional <literal>WHERE</literal> clause has the general form
362+
<synopsis>
363+
WHERE <replaceable class="parameter">condition</replaceable>
364+
</synopsis>
365+
where <replaceable class="parameter">condition</replaceable> is
366+
any expression that evaluates to a result of type
367+
<type>boolean</type>. Any row that does not satisfy this
368+
condition will not be inserted to the table. A row satisfies the
369+
condition if it returns true when the actual row values are
370+
substituted for any variable references.
371+
</para>
372+
373+
<para>
374+
Currently, subqueries are not allowed in <literal>WHERE</literal>
375+
expressions, and the evaluation does not see any changes made by the
376+
<command>COPY</command> itself (this matters when the expression
377+
contains calls to <literal>VOLATILE</literal> functions).
378+
</para>
379+
380+
</listitem>
381+
</varlistentry>
382+
356383
</variablelist>
357384
</refsect1>
358385

‎src/backend/commands/copy.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@
3939
#include"miscadmin.h"
4040
#include"optimizer/clauses.h"
4141
#include"optimizer/planner.h"
42+
#include"optimizer/prep.h"
4243
#include"nodes/makefuncs.h"
44+
#include"parser/parse_coerce.h"
45+
#include"parser/parse_collate.h"
46+
#include"parser/parse_expr.h"
4347
#include"parser/parse_relation.h"
4448
#include"port/pg_bswap.h"
4549
#include"rewrite/rewriteHandler.h"
@@ -149,6 +153,7 @@ typedef struct CopyStateData
149153
boolconvert_selectively;/* do selective binary conversion? */
150154
List*convert_select;/* list of column names (can be NIL) */
151155
bool*convert_select_flags;/* per-column CSV/TEXT CS flags */
156+
Node*whereClause;/* WHERE condition (or NULL) */
152157

153158
/* these are just for error messages, see CopyFromErrorCallback */
154159
constchar*cur_relname;/* table name for error messages */
@@ -179,6 +184,7 @@ typedef struct CopyStateData
179184
ExprState**defexprs;/* array of default att expressions */
180185
boolvolatile_defexprs;/* is any of defexprs volatile? */
181186
List*range_table;
187+
ExprState*qualexpr;
182188

183189
TransitionCaptureState*transition_capture;
184190

@@ -800,6 +806,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
800806
Relationrel;
801807
Oidrelid;
802808
RawStmt*query=NULL;
809+
Node*whereClause=NULL;
803810

804811
/*
805812
* Disallow COPY to/from file or program except to users with the
@@ -853,6 +860,26 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
853860
NULL, false, false);
854861
rte->requiredPerms= (is_from ?ACL_INSERT :ACL_SELECT);
855862

863+
if (stmt->whereClause)
864+
{
865+
/* add rte to column namespace */
866+
addRTEtoQuery(pstate,rte, false, true, true);
867+
868+
/* Transform the raw expression tree */
869+
whereClause=transformExpr(pstate,stmt->whereClause,EXPR_KIND_COPY_WHERE);
870+
871+
/* Make sure it yields a boolean result. */
872+
whereClause=coerce_to_boolean(pstate,whereClause,"WHERE");
873+
874+
/* we have to fix its collations too */
875+
assign_expr_collations(pstate,whereClause);
876+
877+
whereClause=eval_const_expressions(NULL,whereClause);
878+
879+
whereClause= (Node*)canonicalize_qual((Expr*)whereClause, false);
880+
whereClause= (Node*)make_ands_implicit((Expr*)whereClause);
881+
}
882+
856883
tupDesc=RelationGetDescr(rel);
857884
attnums=CopyGetAttnums(tupDesc,rel,stmt->attlist);
858885
foreach(cur,attnums)
@@ -1001,6 +1028,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
10011028

10021029
cstate=BeginCopyFrom(pstate,rel,stmt->filename,stmt->is_program,
10031030
NULL,stmt->attlist,stmt->options);
1031+
cstate->whereClause=whereClause;
10041032
*processed=CopyFrom(cstate);/* copy from file to database */
10051033
EndCopyFrom(cstate);
10061034
}
@@ -2535,6 +2563,10 @@ CopyFrom(CopyState cstate)
25352563
if (cstate->rel->rd_rel->relkind==RELKIND_PARTITIONED_TABLE)
25362564
proute=ExecSetupPartitionTupleRouting(NULL,cstate->rel);
25372565

2566+
if (cstate->whereClause)
2567+
cstate->qualexpr=ExecInitQual(castNode(List,cstate->whereClause),
2568+
&mtstate->ps);
2569+
25382570
/*
25392571
* It's more efficient to prepare a bunch of tuples for insertion, and
25402572
* insert them in one heap_multi_insert() call, than call heap_insert()
@@ -2580,6 +2612,16 @@ CopyFrom(CopyState cstate)
25802612
*/
25812613
insertMethod=CIM_SINGLE;
25822614
}
2615+
elseif (cstate->whereClause!=NULL||
2616+
contain_volatile_functions(cstate->whereClause))
2617+
{
2618+
/*
2619+
* Can't support multi-inserts if there are any volatile funcation
2620+
* expressions in WHERE clause. Similarly to the trigger case above,
2621+
* such expressions may query the table we're inserting into.
2622+
*/
2623+
insertMethod=CIM_SINGLE;
2624+
}
25832625
else
25842626
{
25852627
/*
@@ -2683,6 +2725,13 @@ CopyFrom(CopyState cstate)
26832725
slot=myslot;
26842726
ExecStoreHeapTuple(tuple,slot, false);
26852727

2728+
if (cstate->whereClause)
2729+
{
2730+
econtext->ecxt_scantuple=myslot;
2731+
if (!ExecQual(cstate->qualexpr,econtext))
2732+
continue;
2733+
}
2734+
26862735
/* Determine the partition to heap_insert the tuple into */
26872736
if (proute)
26882737
{

‎src/backend/nodes/copyfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3309,6 +3309,7 @@ _copyCopyStmt(const CopyStmt *from)
33093309
COPY_SCALAR_FIELD(is_program);
33103310
COPY_STRING_FIELD(filename);
33113311
COPY_NODE_FIELD(options);
3312+
COPY_NODE_FIELD(whereClause);
33123313

33133314
returnnewnode;
33143315
}

‎src/backend/nodes/equalfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,7 @@ _equalCopyStmt(const CopyStmt *a, const CopyStmt *b)
12221222
COMPARE_SCALAR_FIELD(is_program);
12231223
COMPARE_STRING_FIELD(filename);
12241224
COMPARE_NODE_FIELD(options);
1225+
COMPARE_NODE_FIELD(whereClause);
12251226

12261227
return true;
12271228
}

‎src/backend/parser/gram.y

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2962,7 +2962,8 @@ ClosePortalStmt:
29622962
*****************************************************************************/
29632963

29642964
CopyStmt:COPYopt_binaryqualified_nameopt_column_list
2965-
copy_fromopt_programcopy_file_namecopy_delimiteropt_withcopy_options
2965+
copy_fromopt_programcopy_file_namecopy_delimiteropt_with
2966+
copy_optionswhere_clause
29662967
{
29672968
CopyStmt *n = makeNode(CopyStmt);
29682969
n->relation =$3;
@@ -2971,13 +2972,20 @@ CopyStmt:COPY opt_binary qualified_name opt_column_list
29712972
n->is_from =$5;
29722973
n->is_program =$6;
29732974
n->filename =$7;
2975+
n->whereClause =$11;
29742976

29752977
if (n->is_program && n->filename ==NULL)
29762978
ereport(ERROR,
29772979
(errcode(ERRCODE_SYNTAX_ERROR),
29782980
errmsg("STDIN/STDOUT not allowed with PROGRAM"),
29792981
parser_errposition(@8)));
29802982

2983+
if (!n->is_from && n->whereClause !=NULL)
2984+
ereport(ERROR,
2985+
(errcode(ERRCODE_SYNTAX_ERROR),
2986+
errmsg("WHERE clause not allowed with COPY TO"),
2987+
parser_errposition(@11)));
2988+
29812989
n->options = NIL;
29822990
/* Concatenate user-supplied flags*/
29832991
if ($2)

‎src/backend/parser/parse_agg.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,14 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
523523

524524
break;
525525

526+
caseEXPR_KIND_COPY_WHERE:
527+
if (isAgg)
528+
err=_("aggregate functions are not allowed in COPY FROM WHERE conditions");
529+
else
530+
err=_("grouping operations are not allowed in COPY FROM WHERE conditions");
531+
532+
break;
533+
526534
/*
527535
* There is intentionally no default: case here, so that the
528536
* compiler will warn if we add a new ParseExprKind without
@@ -902,6 +910,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
902910
caseEXPR_KIND_CALL_ARGUMENT:
903911
err=_("window functions are not allowed in CALL arguments");
904912
break;
913+
caseEXPR_KIND_COPY_WHERE:
914+
err=_("window functions are not allowed in COPY FROM WHERE conditions");
915+
break;
905916

906917
/*
907918
* There is intentionally no default: case here, so that the

‎src/backend/parser/parse_expr.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,6 +1849,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
18491849
caseEXPR_KIND_CALL_ARGUMENT:
18501850
err=_("cannot use subquery in CALL argument");
18511851
break;
1852+
caseEXPR_KIND_COPY_WHERE:
1853+
err=_("cannot use subquery in COPY FROM WHERE condition");
1854+
break;
18521855

18531856
/*
18541857
* There is intentionally no default: case here, so that the
@@ -3475,6 +3478,8 @@ ParseExprKindName(ParseExprKind exprKind)
34753478
return"PARTITION BY";
34763479
caseEXPR_KIND_CALL_ARGUMENT:
34773480
return"CALL";
3481+
caseEXPR_KIND_COPY_WHERE:
3482+
return"WHERE";
34783483

34793484
/*
34803485
* There is intentionally no default: case here, so that the

‎src/backend/parser/parse_func.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2370,6 +2370,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location)
23702370
caseEXPR_KIND_CALL_ARGUMENT:
23712371
err=_("set-returning functions are not allowed in CALL arguments");
23722372
break;
2373+
caseEXPR_KIND_COPY_WHERE:
2374+
err=_("set-returning functions are not allowed in COPY FROM WHERE conditions");
2375+
break;
23732376

23742377
/*
23752378
* There is intentionally no default: case here, so that the

‎src/include/nodes/parsenodes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,6 +1969,7 @@ typedef struct CopyStmt
19691969
boolis_program;/* is 'filename' a program to popen? */
19701970
char*filename;/* filename, or NULL for STDIN/STDOUT */
19711971
List*options;/* List of DefElem nodes */
1972+
Node*whereClause;/* WHERE condition (or NULL) */
19721973
}CopyStmt;
19731974

19741975
/* ----------------------

‎src/include/parser/parse_node.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ typedef enum ParseExprKind
6969
EXPR_KIND_TRIGGER_WHEN,/* WHEN condition in CREATE TRIGGER */
7070
EXPR_KIND_POLICY,/* USING or WITH CHECK expr in policy */
7171
EXPR_KIND_PARTITION_EXPRESSION,/* PARTITION BY expression */
72-
EXPR_KIND_CALL_ARGUMENT/* procedure argument in CALL */
72+
EXPR_KIND_CALL_ARGUMENT,/* procedure argument in CALL */
73+
EXPR_KIND_COPY_WHERE/* WHERE condition in COPY FROM */
7374
}ParseExprKind;
7475

7576

‎src/interfaces/ecpg/preproc/ecpg.addons

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ ECPG: where_or_current_clauseWHERECURRENT_POFcursor_name block
192192
char *cursor_marker = $4[0] == ':' ? mm_strdup("$0") : $4;
193193
$$ = cat_str(2,mm_strdup("where current of"), cursor_marker);
194194
}
195-
ECPG:CopyStmtCOPYopt_binaryqualified_nameopt_column_listcopy_fromopt_programcopy_file_namecopy_delimiteropt_withcopy_options addon
195+
ECPG:CopyStmtCOPYopt_binaryqualified_nameopt_column_listcopy_fromopt_programcopy_file_namecopy_delimiteropt_withcopy_optionswhere_clause addon
196196
if (strcmp($6, "from") == 0 &&
197197
(strcmp($7, "stdin") == 0 || strcmp($7, "stdout") == 0))
198198
mmerror(PARSE_ERROR, ET_WARNING, "COPY FROM STDIN is not implemented");

‎src/test/regress/expected/copy2.out

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,32 @@ CONTEXT: COPY x, line 1: "20022324050607080"
4949
COPY x (b, c, d, e) from stdin delimiter ',' null 'x';
5050
COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
5151
COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
52+
COPY x TO stdout WHERE a = 1;
53+
ERROR: WHERE clause not allowed with COPY TO
54+
LINE 1: COPY x TO stdout WHERE a = 1;
55+
^
56+
COPY x from stdin WHERE a = 50004;
57+
COPY x from stdin WHERE a > 60003;
58+
COPY x from stdin WHERE f > 60003;
59+
ERROR: column "f" does not exist
60+
LINE 1: COPY x from stdin WHERE f > 60003;
61+
^
62+
COPY x from stdin WHERE a = max(x.b);
63+
ERROR: aggregate functions are not allowed in COPY FROM WHERE conditions
64+
LINE 1: COPY x from stdin WHERE a = max(x.b);
65+
^
66+
COPY x from stdin WHERE a IN (SELECT 1 FROM x);
67+
ERROR: cannot use subquery in COPY FROM WHERE condition
68+
LINE 1: COPY x from stdin WHERE a IN (SELECT 1 FROM x);
69+
^
70+
COPY x from stdin WHERE a IN (generate_series(1,5));
71+
ERROR: set-returning functions are not allowed in COPY FROM WHERE conditions
72+
LINE 1: COPY x from stdin WHERE a IN (generate_series(1,5));
73+
^
74+
COPY x from stdin WHERE a = row_number() over(b);
75+
ERROR: window functions are not allowed in COPY FROM WHERE conditions
76+
LINE 1: COPY x from stdin WHERE a = row_number() over(b);
77+
^
5278
-- check results of copy in
5379
SELECT * FROM x;
5480
a | b | c | d | e
@@ -73,12 +99,15 @@ SELECT * FROM x;
7399
4006 | 6 | BackslashN | \N | before trigger fired
74100
4007 | 7 | XX | XX | before trigger fired
75101
4008 | 8 | Delimiter | : | before trigger fired
102+
50004 | 25 | 35 | 45 | before trigger fired
103+
60004 | 25 | 35 | 45 | before trigger fired
104+
60005 | 26 | 36 | 46 | before trigger fired
76105
1 | 1 | stuff | test_1 | after trigger fired
77106
2 | 2 | stuff | test_2 | after trigger fired
78107
3 | 3 | stuff | test_3 | after trigger fired
79108
4 | 4 | stuff | test_4 | after trigger fired
80109
5 | 5 | stuff | test_5 | after trigger fired
81-
(25 rows)
110+
(28 rows)
82111

83112
-- check copy out
84113
COPY x TO stdout;
@@ -102,6 +131,9 @@ COPY x TO stdout;
102131
40066BackslashN\\Nbefore trigger fired
103132
40077XXXXbefore trigger fired
104133
40088Delimiter:before trigger fired
134+
50004253545before trigger fired
135+
60004253545before trigger fired
136+
60005263646before trigger fired
105137
11stufftest_1after trigger fired
106138
22stufftest_2after trigger fired
107139
33stufftest_3after trigger fired
@@ -128,6 +160,9 @@ Nbefore trigger fired
128160
BackslashNbefore trigger fired
129161
XXbefore trigger fired
130162
Delimiterbefore trigger fired
163+
35before trigger fired
164+
35before trigger fired
165+
36before trigger fired
131166
stuffafter trigger fired
132167
stuffafter trigger fired
133168
stuffafter trigger fired
@@ -154,6 +189,9 @@ I'm nullbefore trigger fired
154189
6before trigger fired
155190
7before trigger fired
156191
8before trigger fired
192+
25before trigger fired
193+
25before trigger fired
194+
26before trigger fired
157195
1after trigger fired
158196
2after trigger fired
159197
3after trigger fired

‎src/test/regress/sql/copy2.sql

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,32 @@ COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
9595
4008:8:Delimiter:\::\:
9696
\.
9797

98+
COPY x TO stdoutWHERE a=1;
99+
COPY xfrom stdinWHERE a=50004;
100+
5000324344454
101+
5000425354555
102+
5000526364656
103+
\.
104+
105+
COPY xfrom stdinWHERE a>60003;
106+
6000122324252
107+
6000223334353
108+
6000324344454
109+
6000425354555
110+
6000526364656
111+
\.
112+
113+
COPY xfrom stdinWHERE f>60003;
114+
115+
COPY xfrom stdinWHERE a=max(x.b);
116+
117+
COPY xfrom stdinWHERE aIN (SELECT1FROM x);
118+
119+
COPY xfrom stdinWHERE aIN (generate_series(1,5));
120+
121+
COPY xfrom stdinWHERE a= row_number() over(b);
122+
123+
98124
-- check results of copy in
99125
SELECT*FROM x;
100126

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp