77 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.93 2009/08/10 05:46:50 tgl Exp $
10+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.94 2009/09/04 10:49:29 heikki Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -109,7 +109,7 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp,
109109static bool print_xml_decl (StringInfo buf ,const xmlChar * version ,
110110pg_enc encoding ,int standalone );
111111static xmlDocPtr xml_parse (text * data ,XmlOptionType xmloption_arg ,
112- bool preserve_whitespace ,xmlChar * encoding );
112+ bool preserve_whitespace ,int encoding );
113113static text * xml_xmlnodetoxmltype (xmlNodePtr cur );
114114#endif /* USE_LIBXML */
115115
@@ -183,7 +183,7 @@ xml_in(PG_FUNCTION_ARGS)
183183 * Parse the data to check if it is well-formed XML data. Assume that
184184 * ERROR occurred if parsing failed.
185185 */
186- doc = xml_parse (vardata ,xmloption , true,NULL );
186+ doc = xml_parse (vardata ,xmloption , true,GetDatabaseEncoding () );
187187xmlFreeDoc (doc );
188188
189189PG_RETURN_XML_P (vardata );
@@ -272,7 +272,8 @@ xml_recv(PG_FUNCTION_ARGS)
272272char * newstr ;
273273int nbytes ;
274274xmlDocPtr doc ;
275- xmlChar * encoding = NULL ;
275+ xmlChar * encodingStr = NULL ;
276+ int encoding ;
276277
277278/*
278279 * Read the data in raw format. We don't know yet what the encoding is, as
@@ -293,7 +294,15 @@ xml_recv(PG_FUNCTION_ARGS)
293294str = VARDATA (result );
294295str [nbytes ]= '\0' ;
295296
296- parse_xml_decl ((xmlChar * )str ,NULL ,NULL ,& encoding ,NULL );
297+ parse_xml_decl ((xmlChar * )str ,NULL ,NULL ,& encodingStr ,NULL );
298+
299+ /*
300+ * If encoding wasn't explicitly specified in the XML header, treat it as
301+ * UTF-8, as that's the default in XML. This is different from xml_in(),
302+ * where the input has to go through the normal client to server encoding
303+ * conversion.
304+ */
305+ encoding = encodingStr ?xmlChar_to_encoding (encodingStr ) :PG_UTF8 ;
297306
298307/*
299308 * Parse the data to check if it is well-formed XML data. Assume that
@@ -305,9 +314,7 @@ xml_recv(PG_FUNCTION_ARGS)
305314/* Now that we know what we're dealing with, convert to server encoding */
306315newstr = (char * )pg_do_encoding_conversion ((unsignedchar * )str ,
307316nbytes ,
308- encoding ?
309- xmlChar_to_encoding (encoding ) :
310- PG_UTF8 ,
317+ encoding ,
311318GetDatabaseEncoding ());
312319
313320if (newstr != str )
@@ -659,7 +666,8 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
659666#ifdef USE_LIBXML
660667xmlDocPtr doc ;
661668
662- doc = xml_parse (data ,xmloption_arg ,preserve_whitespace ,NULL );
669+ doc = xml_parse (data ,xmloption_arg ,preserve_whitespace ,
670+ GetDatabaseEncoding ());
663671xmlFreeDoc (doc );
664672
665673return (xmltype * )data ;
@@ -799,7 +807,8 @@ xml_is_document(xmltype *arg)
799807/* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
800808PG_TRY ();
801809{
802- doc = xml_parse ((text * )arg ,XMLOPTION_DOCUMENT , true,NULL );
810+ doc = xml_parse ((text * )arg ,XMLOPTION_DOCUMENT , true,
811+ GetDatabaseEncoding ());
803812result = true;
804813}
805814PG_CATCH ();
@@ -1152,7 +1161,7 @@ print_xml_decl(StringInfo buf, const xmlChar *version,
11521161 */
11531162static xmlDocPtr
11541163xml_parse (text * data ,XmlOptionType xmloption_arg ,bool preserve_whitespace ,
1155- xmlChar * encoding )
1164+ int encoding )
11561165{
11571166int32 len ;
11581167xmlChar * string ;
@@ -1165,9 +1174,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
11651174
11661175utf8string = pg_do_encoding_conversion (string ,
11671176len ,
1168- encoding ?
1169- xmlChar_to_encoding (encoding ) :
1170- GetDatabaseEncoding (),
1177+ encoding ,
11711178PG_UTF8 );
11721179
11731180/* Start up libxml and its parser (no-ops if already done) */