Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2918fce

Browse files
committed
Ignore XML declaration in xpath_internal(), for UTF8 databases.
When a value contained an XML declaration naming some other encoding,this function interpreted UTF8 bytes as the named encoding, yieldingmojibake. xml_parse() already has similar logic. This would benecessary but not sufficient for non-UTF8 databases, so preservebehavior there until the xpath facility can support such databasescomprehensively. Back-patch to 9.3 (all supported versions).Pavel Stehule and Noah MischDiscussion:https://postgr.es/m/CAFj8pRC-dM=tT=QkGi+Achkm+gwPmjyOayGuUfXVumCxkDgYWg@mail.gmail.com
1 parent5edc63b commit2918fce

File tree

5 files changed

+142
-1
lines changed

5 files changed

+142
-1
lines changed

‎src/backend/utils/adt/xml.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3845,6 +3845,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
38453845
int32xpath_len;
38463846
xmlChar*string;
38473847
xmlChar*xpath_expr;
3848+
size_txmldecl_len=0;
38483849
inti;
38493850
intndim;
38503851
Datum*ns_names_uris;
@@ -3900,6 +3901,16 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
39003901
string=pg_xmlCharStrndup(datastr,len);
39013902
xpath_expr=pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text),xpath_len);
39023903

3904+
/*
3905+
* In a UTF8 database, skip any xml declaration, which might assert
3906+
* another encoding. Ignore parse_xml_decl() failure, letting
3907+
* xmlCtxtReadMemory() report parse errors. Documentation disclaims
3908+
* xpath() support for non-ASCII data in non-UTF8 databases, so leave
3909+
* those scenarios bug-compatible with historical behavior.
3910+
*/
3911+
if (GetDatabaseEncoding()==PG_UTF8)
3912+
parse_xml_decl(string,&xmldecl_len,NULL,NULL,NULL);
3913+
39033914
xmlerrcxt=pg_xml_init(PG_XML_STRICTNESS_ALL);
39043915

39053916
PG_TRY();
@@ -3914,7 +3925,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
39143925
if (ctxt==NULL||xmlerrcxt->err_occurred)
39153926
xml_ereport(xmlerrcxt,ERROR,ERRCODE_OUT_OF_MEMORY,
39163927
"could not allocate parser context");
3917-
doc=xmlCtxtReadMemory(ctxt, (char*)string,len,NULL,NULL,0);
3928+
doc=xmlCtxtReadMemory(ctxt, (char*)string+xmldecl_len,
3929+
len-xmldecl_len,NULL,NULL,0);
39183930
if (doc==NULL||xmlerrcxt->err_occurred)
39193931
xml_ereport(xmlerrcxt,ERROR,ERRCODE_INVALID_XML_DOCUMENT,
39203932
"could not parse XML document");

‎src/test/regress/expected/xml.out

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,37 @@ SELECT xpath('/nosuchtag', '<root/>');
670670
{}
671671
(1 row)
672672

673+
-- Round-trip non-ASCII data through xpath().
674+
DO $$
675+
DECLARE
676+
xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
677+
degree_symbol text;
678+
res xml[];
679+
BEGIN
680+
-- Per the documentation, xpath() doesn't work on non-ASCII data when
681+
-- the server encoding is not UTF8. The EXCEPTION block below,
682+
-- currently dead code, will be relevant if we remove this limitation.
683+
IF current_setting('server_encoding') <> 'UTF8' THEN
684+
RAISE LOG 'skip: encoding % unsupported for xml',
685+
current_setting('server_encoding');
686+
RETURN;
687+
END IF;
688+
689+
degree_symbol := convert_from('\xc2b0', 'UTF8');
690+
res := xpath('text()', (xml_declaration ||
691+
'<x>' || degree_symbol || '</x>')::xml);
692+
IF degree_symbol <> res[1]::text THEN
693+
RAISE 'expected % (%), got % (%)',
694+
degree_symbol, convert_to(degree_symbol, 'UTF8'),
695+
res[1], convert_to(res[1]::text, 'UTF8');
696+
END IF;
697+
EXCEPTION
698+
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
699+
WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
700+
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
701+
WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
702+
END
703+
$$;
673704
-- Test xmlexists and xpath_exists
674705
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
675706
xmlexists

‎src/test/regress/expected/xml_1.out

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,41 @@ LINE 1: SELECT xpath('/nosuchtag', '<root/>');
576576
^
577577
DETAIL: This functionality requires the server to be built with libxml support.
578578
HINT: You need to rebuild PostgreSQL using --with-libxml.
579+
-- Round-trip non-ASCII data through xpath().
580+
DO $$
581+
DECLARE
582+
xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
583+
degree_symbol text;
584+
res xml[];
585+
BEGIN
586+
-- Per the documentation, xpath() doesn't work on non-ASCII data when
587+
-- the server encoding is not UTF8. The EXCEPTION block below,
588+
-- currently dead code, will be relevant if we remove this limitation.
589+
IF current_setting('server_encoding') <> 'UTF8' THEN
590+
RAISE LOG 'skip: encoding % unsupported for xml',
591+
current_setting('server_encoding');
592+
RETURN;
593+
END IF;
594+
595+
degree_symbol := convert_from('\xc2b0', 'UTF8');
596+
res := xpath('text()', (xml_declaration ||
597+
'<x>' || degree_symbol || '</x>')::xml);
598+
IF degree_symbol <> res[1]::text THEN
599+
RAISE 'expected % (%), got % (%)',
600+
degree_symbol, convert_to(degree_symbol, 'UTF8'),
601+
res[1], convert_to(res[1]::text, 'UTF8');
602+
END IF;
603+
EXCEPTION
604+
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
605+
WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
606+
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
607+
WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
608+
END
609+
$$;
610+
ERROR: unsupported XML feature
611+
DETAIL: This functionality requires the server to be built with libxml support.
612+
HINT: You need to rebuild PostgreSQL using --with-libxml.
613+
CONTEXT: PL/pgSQL function inline_code_block line 17 at assignment
579614
-- Test xmlexists and xpath_exists
580615
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
581616
ERROR: unsupported XML feature

‎src/test/regress/expected/xml_2.out

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,37 @@ SELECT xpath('/nosuchtag', '<root/>');
650650
{}
651651
(1 row)
652652

653+
-- Round-trip non-ASCII data through xpath().
654+
DO $$
655+
DECLARE
656+
xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
657+
degree_symbol text;
658+
res xml[];
659+
BEGIN
660+
-- Per the documentation, xpath() doesn't work on non-ASCII data when
661+
-- the server encoding is not UTF8. The EXCEPTION block below,
662+
-- currently dead code, will be relevant if we remove this limitation.
663+
IF current_setting('server_encoding') <> 'UTF8' THEN
664+
RAISE LOG 'skip: encoding % unsupported for xml',
665+
current_setting('server_encoding');
666+
RETURN;
667+
END IF;
668+
669+
degree_symbol := convert_from('\xc2b0', 'UTF8');
670+
res := xpath('text()', (xml_declaration ||
671+
'<x>' || degree_symbol || '</x>')::xml);
672+
IF degree_symbol <> res[1]::text THEN
673+
RAISE 'expected % (%), got % (%)',
674+
degree_symbol, convert_to(degree_symbol, 'UTF8'),
675+
res[1], convert_to(res[1]::text, 'UTF8');
676+
END IF;
677+
EXCEPTION
678+
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
679+
WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
680+
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
681+
WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
682+
END
683+
$$;
653684
-- Test xmlexists and xpath_exists
654685
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
655686
xmlexists

‎src/test/regress/sql/xml.sql

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,38 @@ SELECT xpath('count(//*)=3', '<root><sub/><sub/></root>');
189189
SELECT xpath('name(/*)','<root><sub/><sub/></root>');
190190
SELECT xpath('/nosuchtag','<root/>');
191191

192+
-- Round-trip non-ASCII data through xpath().
193+
DO $$
194+
DECLARE
195+
xml_declarationtext :='<?xml version="1.0" encoding="ISO-8859-1"?>';
196+
degree_symboltext;
197+
res xml[];
198+
BEGIN
199+
-- Per the documentation, xpath() doesn't work on non-ASCII data when
200+
-- the server encoding is not UTF8. The EXCEPTION block below,
201+
-- currently dead code, will be relevant if we remove this limitation.
202+
IF current_setting('server_encoding')<>'UTF8' THEN
203+
RAISE LOG'skip: encoding % unsupported for xml',
204+
current_setting('server_encoding');
205+
RETURN;
206+
END IF;
207+
208+
degree_symbol := convert_from('\xc2b0','UTF8');
209+
res := xpath('text()', (xml_declaration||
210+
'<x>'|| degree_symbol||'</x>')::xml);
211+
IF degree_symbol<> res[1]::text THEN
212+
RAISE'expected % (%), got % (%)',
213+
degree_symbol, convert_to(degree_symbol,'UTF8'),
214+
res[1], convert_to(res[1]::text,'UTF8');
215+
END IF;
216+
EXCEPTION
217+
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
218+
WHEN untranslatable_character THEN RAISE LOG'skip: %', SQLERRM;
219+
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
220+
WHEN undefined_function THEN RAISE LOG'skip: %', SQLERRM;
221+
END
222+
$$;
223+
192224
-- Test xmlexists and xpath_exists
193225
SELECT xmlexists('//town[text() =''Toronto'']' PASSING BY REF'<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
194226
SELECT xmlexists('//town[text() =''Cwmbran'']' PASSING BY REF'<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp