77 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.5 2006/12/24 18:25:58 tgl Exp $
10+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.6 2006/12/28 03:17:38 petere Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -58,7 +58,7 @@ static void xml_errorHandler(void *ctxt, const char *msg, ...);
5858static void xml_ereport_by_code (int level ,int sqlcode ,
5959const char * msg ,int errcode );
6060static xmlChar * xml_text2xmlChar (text * in );
61- static xmlDocPtr xml_parse (text * data ,int opts ,bool is_document );
61+ static xmlDocPtr xml_parse (text * data ,bool is_document ,bool preserve_whitespace );
6262
6363#endif /* USE_LIBXML */
6464
@@ -86,7 +86,7 @@ xml_in(PG_FUNCTION_ARGS)
8686 * that ERROR occurred if parsing failed. Do we need DTD
8787 * validation (if DTD exists)?
8888 */
89- xml_parse (vardata ,XML_PARSE_DTDATTR | XML_PARSE_DTDVALID , false );
89+ xml_parse (vardata ,false, true );
9090
9191PG_RETURN_XML_P (vardata );
9292#else
@@ -179,18 +179,7 @@ xmltype *
179179xmlparse (text * data ,bool is_document ,bool preserve_whitespace )
180180{
181181#ifdef USE_LIBXML
182- if (!preserve_whitespace )
183- ereport (WARNING ,
184- (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
185- errmsg ("XMLPARSE with STRIP WHITESPACE is not implemented" )));
186-
187- /*
188- * Note, that here we try to apply DTD defaults
189- * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d: 'Default
190- * valies defined by internal DTD are applied'. As for external
191- * DTDs, we try to support them too, (see SQL/XML:10.16.7.e)
192- */
193- xml_parse (data ,XML_PARSE_DTDATTR ,is_document );
182+ xml_parse (data ,is_document ,preserve_whitespace );
194183
195184return (xmltype * )data ;
196185#else
@@ -421,27 +410,18 @@ xml_init(void)
421410
422411/*
423412 * Convert a C string to XML internal representation
424- * (same things as for TEXT, but with checking the data for well-formedness
425- * and, moreover, validation against DTD, if needed).
426- * NOTICE: We use TEXT type as internal storage type. In the future,
427- * we plan to create own storage type (maybe several types/strategies)
428- * TODO predefined DTDs / XSDs and validation
429- * TODO validation against XML Schema
413+ *
430414 * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
431415 * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
432416 */
433417static xmlDocPtr
434- xml_parse (text * data ,int opts ,bool is_document )
418+ xml_parse (text * data ,bool is_document ,bool preserve_whitespace )
435419{
436- bool validationFailed = false;
437420int res_code ;
438421int32 len ;
439422xmlChar * string ;
440423xmlParserCtxtPtr ctxt = NULL ;
441424xmlDocPtr doc = NULL ;
442- #ifdef XML_DEBUG_DTD_CONST
443- xmlDtdPtr dtd = NULL ;
444- #endif
445425
446426len = VARSIZE (data )- VARHDRSZ ;/* will be useful later */
447427string = xml_text2xmlChar (data );
@@ -456,51 +436,40 @@ xml_parse(text *data, int opts, bool is_document)
456436xml_ereport (ERROR ,ERRCODE_INTERNAL_ERROR ,
457437"could not allocate parser context" ,ctxt );
458438
459- /* first, we try to parse the string as XML doc, then, as XML chunk */
460- if (len >=5 && strncmp ((char * )string ,"<?xml" ,5 )== 0 )
439+ if (is_document )
461440{
462- /* consider it as DOCUMENT */
441+ /*
442+ * Note, that here we try to apply DTD defaults
443+ * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d:
444+ * 'Default valies defined by internal DTD are applied'.
445+ * As for external DTDs, we try to support them too, (see
446+ * SQL/XML:10.16.7.e)
447+ */
463448doc = xmlCtxtReadMemory (ctxt , (char * )string ,len ,
464- PG_XML_DEFAULT_URI ,NULL ,opts );
449+ PG_XML_DEFAULT_URI ,NULL ,
450+ XML_PARSE_NOENT |XML_PARSE_DTDATTR
451+ | (preserve_whitespace ?0 :XML_PARSE_NOBLANKS ));
465452if (doc == NULL )
466453xml_ereport (ERROR ,ERRCODE_INVALID_XML_DOCUMENT ,
467- "could not parse XMLdata " ,ctxt );
454+ "invalid XMLdocument " ,ctxt );
468455}
469456else
470457{
471- /* attempt to parse the string as if it is an XML fragment */
472458doc = xmlNewDoc (NULL );
459+
460+ /*
461+ * FIXME: An XMLDecl is supposed to be accepted before the
462+ * content, but libxml doesn't allow this. Parse that
463+ * ourselves?
464+ */
465+
473466/* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
474467res_code = xmlParseBalancedChunkMemory (doc ,NULL ,NULL ,0 ,string ,NULL );
475468if (res_code != 0 )
476- xml_ereport_by_code (ERROR ,ERRCODE_INVALID_XML_DOCUMENT ,
477- "could not parse XMLdata " ,res_code );
469+ xml_ereport_by_code (ERROR ,ERRCODE_INVALID_XML_CONTENT ,
470+ "invalid XMLcontent " ,res_code );
478471}
479472
480- #ifdef XML_DEBUG_DTD_CONST
481- dtd = xmlParseDTD (NULL , (xmlChar * )XML_DEBUG_DTD_CONST );
482- if (dtd == NULL )
483- xml_ereport (ERROR ,ERRCODE_INVALID_XML_DOCUMENT ,
484- "could not parse DTD data" ,ctxt );
485- if (xmlValidateDtd (xmlNewValidCtxt (),doc ,dtd )!= 1 )
486- validationFailed = true;
487- #else
488- /* if dtd for our xml data is detected... */
489- if ((doc -> intSubset != NULL )|| (doc -> extSubset != NULL ))
490- {
491- /* assume inline DTD exists - validation should be performed */
492- if (ctxt -> valid == 0 )
493- {
494- /* DTD exists, but validator reported 'validation failed' */
495- validationFailed = true;
496- }
497- }
498- #endif
499-
500- if (validationFailed )
501- xml_ereport (WARNING ,ERRCODE_INVALID_XML_DOCUMENT ,
502- "validation against DTD failed" ,ctxt );
503-
504473/* TODO encoding issues
505474 * (thoughts:
506475 * CASE:
@@ -517,10 +486,6 @@ xml_parse(text *data, int opts, bool is_document)
517486 * ) */
518487/* ... */
519488
520- #ifdef XML_DEBUG_DTD_CONST
521- if (dtd )
522- xmlFreeDtd (dtd );
523- #endif
524489if (doc )
525490xmlFreeDoc (doc );
526491if (ctxt )
@@ -529,10 +494,6 @@ xml_parse(text *data, int opts, bool is_document)
529494}
530495PG_CATCH ();
531496{
532- #ifdef XML_DEBUG_DTD_CONST
533- if (dtd )
534- xmlFreeDtd (dtd );
535- #endif
536497if (doc )
537498xmlFreeDoc (doc );
538499if (ctxt )