Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit37bef84

Browse files
committed
Convert xml_in to report errors softly.
The key idea here is that xml_parse must distinguish hard errorsfrom soft errors. We want to throw a hard error for libxmlinitialization failures: those might be out-of-memory, or somethingelse, but in any case they are not the fault of the input string.If we get to the point of parsing the input, and something goeswrong, we can fairly consider that to mean bad input.One thing that arguably does mean bad input, but I didn't troubleto handle softly, is encoding conversion failure while convertingthe server encoding to UTF8. This might be something to improvelater, but it seems like a pretty low-probability scenario.Discussion:https://postgr.es/m/3564577.1671142683@sss.pgh.pa.us
1 parente52f8b3 commit37bef84

File tree

5 files changed

+205
-35
lines changed

5 files changed

+205
-35
lines changed

‎src/backend/utils/adt/xml.c

Lines changed: 120 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,10 @@ struct PgXmlErrorContext
119119

120120
staticxmlParserInputPtrxmlPgEntityLoader(constchar*URL,constchar*ID,
121121
xmlParserCtxtPtrctxt);
122+
staticvoidxml_errsave(Node*escontext,PgXmlErrorContext*errcxt,
123+
intsqlcode,constchar*msg);
122124
staticvoidxml_errorHandler(void*data,xmlErrorPtrerror);
123-
staticvoidxml_ereport_by_code(intlevel,intsqlcode,
124-
constchar*msg,intcode);
125+
staticinterrdetail_for_xml_code(intcode);
125126
staticvoidchopStringInfoNewlines(StringInfostr);
126127
staticvoidappendStringInfoLineSeparator(StringInfostr);
127128

@@ -143,7 +144,8 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143144
pg_encencoding,intstandalone);
144145
staticboolxml_doctype_in_content(constxmlChar*str);
145146
staticxmlDocPtrxml_parse(text*data,XmlOptionTypexmloption_arg,
146-
boolpreserve_whitespace,intencoding);
147+
boolpreserve_whitespace,intencoding,
148+
Node*escontext);
147149
statictext*xml_xmlnodetoxmltype(xmlNodePtrcur,PgXmlErrorContext*xmlerrcxt);
148150
staticintxml_xpathobjtoxmlarray(xmlXPathObjectPtrxpathobj,
149151
ArrayBuildState*astate,
@@ -261,14 +263,18 @@ xml_in(PG_FUNCTION_ARGS)
261263
xmltype*vardata;
262264
xmlDocPtrdoc;
263265

266+
/* Build the result object. */
264267
vardata= (xmltype*)cstring_to_text(s);
265268

266269
/*
267-
* Parse the data to check if it is well-formed XML data. Assume that
268-
* ERROR occurred if parsing failed.
270+
* Parse the data to check if it is well-formed XML data.
271+
*
272+
* Note: we don't need to worry about whether a soft error is detected.
269273
*/
270-
doc=xml_parse(vardata,xmloption, true,GetDatabaseEncoding());
271-
xmlFreeDoc(doc);
274+
doc=xml_parse(vardata,xmloption, true,GetDatabaseEncoding(),
275+
fcinfo->context);
276+
if (doc!=NULL)
277+
xmlFreeDoc(doc);
272278

273279
PG_RETURN_XML_P(vardata);
274280
#else
@@ -323,9 +329,10 @@ xml_out_internal(xmltype *x, pg_enc target_encoding)
323329
returnbuf.data;
324330
}
325331

326-
xml_ereport_by_code(WARNING,ERRCODE_INTERNAL_ERROR,
327-
"could not parse XML declaration in stored value",
328-
res_code);
332+
ereport(WARNING,
333+
errcode(ERRCODE_INTERNAL_ERROR),
334+
errmsg_internal("could not parse XML declaration in stored value"),
335+
errdetail_for_xml_code(res_code));
329336
#endif
330337
returnstr;
331338
}
@@ -392,7 +399,7 @@ xml_recv(PG_FUNCTION_ARGS)
392399
* Parse the data to check if it is well-formed XML data. Assume that
393400
* xml_parse will throw ERROR if not.
394401
*/
395-
doc=xml_parse(result,xmloption, true,encoding);
402+
doc=xml_parse(result,xmloption, true,encoding,NULL);
396403
xmlFreeDoc(doc);
397404

398405
/* Now that we know what we're dealing with, convert to server encoding */
@@ -754,7 +761,7 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
754761
xmlDocPtrdoc;
755762

756763
doc=xml_parse(data,xmloption_arg,preserve_whitespace,
757-
GetDatabaseEncoding());
764+
GetDatabaseEncoding(),NULL);
758765
xmlFreeDoc(doc);
759766

760767
return (xmltype*)data;
@@ -895,7 +902,7 @@ xml_is_document(xmltype *arg)
895902
PG_TRY();
896903
{
897904
doc=xml_parse((text*)arg,XMLOPTION_DOCUMENT, true,
898-
GetDatabaseEncoding());
905+
GetDatabaseEncoding(),NULL);
899906
result= true;
900907
}
901908
PG_CATCH();
@@ -1500,17 +1507,26 @@ xml_doctype_in_content(const xmlChar *str)
15001507

15011508

15021509
/*
1503-
* Convert a C string to XML internal representation
1510+
* Convert a text object to XML internal representation
1511+
*
1512+
* data is the source data (must not be toasted!), encoding is its encoding,
1513+
* and xmloption_arg and preserve_whitespace are options for the
1514+
* transformation.
1515+
*
1516+
* Errors normally result in ereport(ERROR), but if escontext is an
1517+
* ErrorSaveContext, then "safe" errors are reported there instead, and the
1518+
* caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
15041519
*
15051520
* Note: it is caller's responsibility to xmlFreeDoc() the result,
1506-
* else a permanent memory leak will ensue!
1521+
* else a permanent memory leak will ensue! But note the result could
1522+
* be NULL after a soft error.
15071523
*
15081524
* TODO maybe libxml2's xmlreader is better? (do not construct DOM,
15091525
* yet do not use SAX - see xmlreader.c)
15101526
*/
15111527
staticxmlDocPtr
15121528
xml_parse(text*data,XmlOptionTypexmloption_arg,boolpreserve_whitespace,
1513-
intencoding)
1529+
intencoding,Node*escontext)
15141530
{
15151531
int32len;
15161532
xmlChar*string;
@@ -1519,9 +1535,20 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15191535
volatilexmlParserCtxtPtrctxt=NULL;
15201536
volatilexmlDocPtrdoc=NULL;
15211537

1538+
/*
1539+
* This step looks annoyingly redundant, but we must do it to have a
1540+
* null-terminated string in case encoding conversion isn't required.
1541+
*/
15221542
len=VARSIZE_ANY_EXHDR(data);/* will be useful later */
15231543
string=xml_text2xmlChar(data);
15241544

1545+
/*
1546+
* If the data isn't UTF8, we must translate before giving it to libxml.
1547+
*
1548+
* XXX ideally, we'd catch any encoding conversion failure and return a
1549+
* soft error. However, failure to convert to UTF8 should be pretty darn
1550+
* rare, so for now this is left undone.
1551+
*/
15251552
utf8string=pg_do_encoding_conversion(string,
15261553
len,
15271554
encoding,
@@ -1539,6 +1566,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15391566
xmlChar*version=NULL;
15401567
intstandalone=0;
15411568

1569+
/* Any errors here are reported as hard ereport's */
15421570
xmlInitParser();
15431571

15441572
ctxt=xmlNewParserCtxt();
@@ -1555,9 +1583,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15551583
res_code=parse_xml_decl(utf8string,
15561584
&count,&version,NULL,&standalone);
15571585
if (res_code!=0)
1558-
xml_ereport_by_code(ERROR,ERRCODE_INVALID_XML_CONTENT,
1559-
"invalid XML content: invalid XML declaration",
1560-
res_code);
1586+
{
1587+
errsave(escontext,
1588+
errcode(ERRCODE_INVALID_XML_CONTENT),
1589+
errmsg_internal("invalid XML content: invalid XML declaration"),
1590+
errdetail_for_xml_code(res_code));
1591+
gotofail;
1592+
}
15611593

15621594
/* Is there a DOCTYPE element? */
15631595
if (xml_doctype_in_content(utf8string+count))
@@ -1580,20 +1612,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15801612
| (preserve_whitespace ?0 :XML_PARSE_NOBLANKS));
15811613
if (doc==NULL||xmlerrcxt->err_occurred)
15821614
{
1583-
/* Use original option to decide which error code tothrow */
1615+
/* Use original option to decide which error code toreport */
15841616
if (xmloption_arg==XMLOPTION_DOCUMENT)
1585-
xml_ereport(xmlerrcxt,ERROR,ERRCODE_INVALID_XML_DOCUMENT,
1617+
xml_errsave(escontext,xmlerrcxt,
1618+
ERRCODE_INVALID_XML_DOCUMENT,
15861619
"invalid XML document");
15871620
else
1588-
xml_ereport(xmlerrcxt,ERROR,ERRCODE_INVALID_XML_CONTENT,
1621+
xml_errsave(escontext,xmlerrcxt,
1622+
ERRCODE_INVALID_XML_CONTENT,
15891623
"invalid XML content");
1624+
gotofail;
15901625
}
15911626
}
15921627
else
15931628
{
15941629
doc=xmlNewDoc(version);
1630+
if (doc==NULL||xmlerrcxt->err_occurred)
1631+
xml_ereport(xmlerrcxt,ERROR,ERRCODE_OUT_OF_MEMORY,
1632+
"could not allocate XML document");
1633+
15951634
Assert(doc->encoding==NULL);
15961635
doc->encoding=xmlStrdup((constxmlChar*)"UTF-8");
1636+
if (doc->encoding==NULL||xmlerrcxt->err_occurred)
1637+
xml_ereport(xmlerrcxt,ERROR,ERRCODE_OUT_OF_MEMORY,
1638+
"could not allocate XML document");
15971639
doc->standalone=standalone;
15981640

15991641
/* allow empty content */
@@ -1602,10 +1644,17 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
16021644
res_code=xmlParseBalancedChunkMemory(doc,NULL,NULL,0,
16031645
utf8string+count,NULL);
16041646
if (res_code!=0||xmlerrcxt->err_occurred)
1605-
xml_ereport(xmlerrcxt,ERROR,ERRCODE_INVALID_XML_CONTENT,
1647+
{
1648+
xml_errsave(escontext,xmlerrcxt,
1649+
ERRCODE_INVALID_XML_CONTENT,
16061650
"invalid XML content");
1651+
gotofail;
1652+
}
16071653
}
16081654
}
1655+
1656+
fail:
1657+
;
16091658
}
16101659
PG_CATCH();
16111660
{
@@ -1745,6 +1794,44 @@ xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
17451794
}
17461795

17471796

1797+
/*
1798+
* xml_errsave --- save an XML-related error
1799+
*
1800+
* If escontext is an ErrorSaveContext, error details are saved into it,
1801+
* and control returns normally.
1802+
*
1803+
* Otherwise, the error is thrown, so that this is equivalent to
1804+
* xml_ereport() with level == ERROR.
1805+
*
1806+
* This should be used only for errors that we're sure we do not need
1807+
* a transaction abort to clean up after.
1808+
*/
1809+
staticvoid
1810+
xml_errsave(Node*escontext,PgXmlErrorContext*errcxt,
1811+
intsqlcode,constchar*msg)
1812+
{
1813+
char*detail;
1814+
1815+
/* Defend against someone passing us a bogus context struct */
1816+
if (errcxt->magic!=ERRCXT_MAGIC)
1817+
elog(ERROR,"xml_errsave called with invalid PgXmlErrorContext");
1818+
1819+
/* Flag that the current libxml error has been reported */
1820+
errcxt->err_occurred= false;
1821+
1822+
/* Include detail only if we have some text from libxml */
1823+
if (errcxt->err_buf.len>0)
1824+
detail=errcxt->err_buf.data;
1825+
else
1826+
detail=NULL;
1827+
1828+
errsave(escontext,
1829+
(errcode(sqlcode),
1830+
errmsg_internal("%s",msg),
1831+
detail ?errdetail_internal("%s",detail) :0));
1832+
}
1833+
1834+
17481835
/*
17491836
* Error handler for libxml errors and warnings
17501837
*/
@@ -1917,15 +2004,16 @@ xml_errorHandler(void *data, xmlErrorPtr error)
19172004

19182005

19192006
/*
1920-
* Wrapper for "ereport" function for XML-related errors. The "msg"
1921-
* is the SQL-level message; some can be adopted from the SQL/XML
1922-
* standard. This function uses "code" to create a textual detail
1923-
* message. At the moment, we only need to cover those codes that we
2007+
* Convert libxml error codes into textual errdetail messages.
2008+
*
2009+
* This should be called within an ereport or errsave invocation,
2010+
* just as errdetail would be.
2011+
*
2012+
* At the moment, we only need to cover those codes that we
19242013
* may raise in this file.
19252014
*/
1926-
staticvoid
1927-
xml_ereport_by_code(intlevel,intsqlcode,
1928-
constchar*msg,intcode)
2015+
staticint
2016+
errdetail_for_xml_code(intcode)
19292017
{
19302018
constchar*det;
19312019

@@ -1954,10 +2042,7 @@ xml_ereport_by_code(int level, int sqlcode,
19542042
break;
19552043
}
19562044

1957-
ereport(level,
1958-
(errcode(sqlcode),
1959-
errmsg_internal("%s",msg),
1960-
errdetail(det,code)));
2045+
returnerrdetail(det,code);
19612046
}
19622047

19632048

@@ -4241,7 +4326,7 @@ wellformed_xml(text *data, XmlOptionType xmloption_arg)
42414326
/* We want to catch any exceptions and return false */
42424327
PG_TRY();
42434328
{
4244-
doc=xml_parse(data,xmloption_arg, true,GetDatabaseEncoding());
4329+
doc=xml_parse(data,xmloption_arg, true,GetDatabaseEncoding(),NULL);
42454330
result= true;
42464331
}
42474332
PG_CATCH();

‎src/test/regress/expected/xml.out

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,37 @@ SELECT * FROM xmltest;
1818
2 | <value>two</value>
1919
(2 rows)
2020

21+
-- test non-throwing API, too
22+
SELECT pg_input_is_valid('<value>one</value>', 'xml');
23+
pg_input_is_valid
24+
-------------------
25+
t
26+
(1 row)
27+
28+
SELECT pg_input_is_valid('<value>one</', 'xml');
29+
pg_input_is_valid
30+
-------------------
31+
f
32+
(1 row)
33+
34+
SELECT pg_input_error_message('<value>one</', 'xml');
35+
pg_input_error_message
36+
------------------------
37+
invalid XML content
38+
(1 row)
39+
40+
SELECT pg_input_is_valid('<?xml version="1.0" standalone="y"?><foo/>', 'xml');
41+
pg_input_is_valid
42+
-------------------
43+
f
44+
(1 row)
45+
46+
SELECT pg_input_error_message('<?xml version="1.0" standalone="y"?><foo/>', 'xml');
47+
pg_input_error_message
48+
----------------------------------------------
49+
invalid XML content: invalid XML declaration
50+
(1 row)
51+
2152
SELECT xmlcomment('test');
2253
xmlcomment
2354
-------------

‎src/test/regress/expected/xml_1.out

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,22 @@ SELECT * FROM xmltest;
2222
----+------
2323
(0 rows)
2424

25+
-- test non-throwing API, too
26+
SELECT pg_input_is_valid('<value>one</value>', 'xml');
27+
ERROR: unsupported XML feature
28+
DETAIL: This functionality requires the server to be built with libxml support.
29+
SELECT pg_input_is_valid('<value>one</', 'xml');
30+
ERROR: unsupported XML feature
31+
DETAIL: This functionality requires the server to be built with libxml support.
32+
SELECT pg_input_error_message('<value>one</', 'xml');
33+
ERROR: unsupported XML feature
34+
DETAIL: This functionality requires the server to be built with libxml support.
35+
SELECT pg_input_is_valid('<?xml version="1.0" standalone="y"?><foo/>', 'xml');
36+
ERROR: unsupported XML feature
37+
DETAIL: This functionality requires the server to be built with libxml support.
38+
SELECT pg_input_error_message('<?xml version="1.0" standalone="y"?><foo/>', 'xml');
39+
ERROR: unsupported XML feature
40+
DETAIL: This functionality requires the server to be built with libxml support.
2541
SELECT xmlcomment('test');
2642
ERROR: unsupported XML feature
2743
DETAIL: This functionality requires the server to be built with libxml support.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp