Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3ad8b84

Browse files
committed
Add some tests for encoding conversion in COPY TO/FROM
This adds a couple of tests to trigger encoding conversion when inputand server encodings do not match in COPY FROM/TO, or need_transcodingset to true in the COPY state data. These tests rely on UTF8 <-> LATIN1for the valid cases as LATIN1 accepts any bytes, and UTF8 <-> EUC_JP forsome of the invalid cases where a character cannot be understood,causing a conversion failure.Both ENCODING and client_encoding are covered. Test suggested by AndresFreund.Author: Sutou KouheiDiscussion:https://postgr.es/m/20240206222445.hzq22pb2nye7rm67@awork3.anarazel.de
1 parentbf9165b commit3ad8b84

File tree

4 files changed

+108
-1
lines changed

4 files changed

+108
-1
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
--
2+
-- Test cases for encoding with COPY commands
3+
--
4+
-- skip test if not UTF8 server encoding
5+
SELECT getdatabaseencoding() <> 'UTF8'
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit
9+
\endif
10+
-- directory paths are passed to us in environment variables
11+
\getenv abs_builddir PG_ABS_BUILDDIR
12+
\set utf8_csv :abs_builddir '/results/copyencoding_utf8.csv'
13+
CREATE TABLE copy_encoding_tab (t text);
14+
-- Valid cases
15+
-- Use ENCODING option
16+
-- U+3042 HIRAGANA LETTER A
17+
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8');
18+
-- Read UTF8 data as LATIN1: no error
19+
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1');
20+
-- Use client_encoding
21+
SET client_encoding TO UTF8;
22+
-- U+3042 HIRAGANA LETTER A
23+
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv);
24+
-- Read UTF8 data as LATIN1: no error
25+
SET client_encoding TO LATIN1;
26+
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv);
27+
RESET client_encoding;
28+
-- Invalid cases
29+
-- Use ENCODING explicitly
30+
-- U+3042 HIRAGANA LETTER A
31+
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8');
32+
-- Read UTF8 data as EUC_JP: no error
33+
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'EUC_JP');
34+
ERROR: invalid byte sequence for encoding "EUC_JP": 0xe3 0x81
35+
CONTEXT: COPY copy_encoding_tab, line 1
36+
-- Use client_encoding
37+
SET client_encoding TO UTF8;
38+
-- U+3042 HIRAGANA LETTER A
39+
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv);
40+
-- Read UTF8 data as EUC_JP: no error
41+
SET client_encoding TO EUC_JP;
42+
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv);
43+
ERROR: invalid byte sequence for encoding "EUC_JP": 0xe3 0x81
44+
CONTEXT: COPY copy_encoding_tab, line 1
45+
RESET client_encoding;
46+
DROP TABLE copy_encoding_tab;
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
--
2+
-- Test cases for encoding with COPY commands
3+
--
4+
-- skip test if not UTF8 server encoding
5+
SELECT getdatabaseencoding() <> 'UTF8'
6+
AS skip_test \gset
7+
\if :skip_test
8+
\quit

‎src/test/regress/parallel_schedule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comment
3636
# execute two copy tests in parallel, to check that copy itself
3737
# is concurrent safe.
3838
# ----------
39-
test: copy copyselect copydml insert insert_conflict
39+
test: copy copyselect copydmlcopyencodinginsert insert_conflict
4040

4141
# ----------
4242
# More groups of parallel tests

‎src/test/regress/sql/copyencoding.sql

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
--
2+
-- Test cases for encoding with COPY commands
3+
--
4+
5+
-- skip test if not UTF8 server encoding
6+
SELECT getdatabaseencoding()<>'UTF8'
7+
AS skip_test \gset
8+
\if :skip_test
9+
\quit
10+
\endif
11+
12+
-- directory paths are passed to us in environment variables
13+
\getenv abs_builddir PG_ABS_BUILDDIR
14+
15+
\set utf8_csv :abs_builddir'/results/copyencoding_utf8.csv'
16+
17+
CREATETABLEcopy_encoding_tab (ttext);
18+
19+
-- Valid cases
20+
21+
-- Use ENCODING option
22+
-- U+3042 HIRAGANA LETTER A
23+
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING'UTF8');
24+
-- Read UTF8 data as LATIN1: no error
25+
COPY copy_encoding_tabFROM :'utf8_csv' WITH (FORMAT csv, ENCODING'LATIN1');
26+
27+
-- Use client_encoding
28+
SET client_encoding TO UTF8;
29+
-- U+3042 HIRAGANA LETTER A
30+
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv);
31+
-- Read UTF8 data as LATIN1: no error
32+
SET client_encoding TO LATIN1;
33+
COPY copy_encoding_tabFROM :'utf8_csv' WITH (FORMAT csv);
34+
RESET client_encoding;
35+
36+
-- Invalid cases
37+
38+
-- Use ENCODING explicitly
39+
-- U+3042 HIRAGANA LETTER A
40+
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING'UTF8');
41+
-- Read UTF8 data as EUC_JP: no error
42+
COPY copy_encoding_tabFROM :'utf8_csv' WITH (FORMAT csv, ENCODING'EUC_JP');
43+
44+
-- Use client_encoding
45+
SET client_encoding TO UTF8;
46+
-- U+3042 HIRAGANA LETTER A
47+
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv);
48+
-- Read UTF8 data as EUC_JP: no error
49+
SET client_encoding TO EUC_JP;
50+
COPY copy_encoding_tabFROM :'utf8_csv' WITH (FORMAT csv);
51+
RESET client_encoding;
52+
53+
DROPTABLE copy_encoding_tab;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp