77 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.20 2007/01/20 09:27:19 petere Exp $
10+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.21 2007/01/23 23:39:16 petere Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
5151
5252#ifdef USE_LIBXML
5353
54- #define PG_XML_DEFAULT_URI "dummy.xml"
55-
5654static StringInfo xml_err_buf = NULL ;
5755
5856static void xml_init (void );
@@ -63,7 +61,7 @@ static void xml_pfree(void *ptr);
6361static char * xml_pstrdup (const char * string );
6462#endif
6563static void xml_ereport (int level ,int sqlcode ,
66- const char * msg , void * ctxt );
64+ const char * msg );
6765static void xml_errorHandler (void * ctxt ,const char * msg , ...);
6866static void xml_ereport_by_code (int level ,int sqlcode ,
6967const char * msg ,int errcode );
@@ -667,14 +665,14 @@ xmlvalidate(PG_FUNCTION_ARGS)
667665ctxt = xmlNewParserCtxt ();
668666if (ctxt == NULL )
669667xml_ereport (ERROR ,ERRCODE_INTERNAL_ERROR ,
670- "could not allocate parser context" , ctxt );
668+ "could not allocate parser context" );
671669
672670doc = xmlCtxtReadMemory (ctxt , (char * )VARDATA (data ),
673671VARSIZE (data )- VARHDRSZ ,
674- PG_XML_DEFAULT_URI ,NULL ,0 );
672+ NULL ,NULL ,0 );
675673if (doc == NULL )
676674xml_ereport (ERROR ,ERRCODE_INVALID_XML_DOCUMENT ,
677- "could not parse XML data" , ctxt );
675+ "could not parse XML data" );
678676
679677#if 0
680678uri = xmlCreateURI ();
@@ -683,21 +681,21 @@ xmlvalidate(PG_FUNCTION_ARGS)
683681uri = xmlParseURI (dtdOrUri );
684682if (uri == NULL )
685683xml_ereport (ERROR ,ERRCODE_INTERNAL_ERROR ,
686- "not implemented yet... (TODO)" , ctxt );
684+ "not implemented yet... (TODO)" );
687685else
688686#endif
689687dtd = xmlParseDTD (NULL ,xml_text2xmlChar (dtdOrUri ));
690688
691689if (dtd == NULL )
692690xml_ereport (ERROR ,ERRCODE_INVALID_XML_DOCUMENT ,
693- "could not load DTD" , ctxt );
691+ "could not load DTD" );
694692
695693if (xmlValidateDtd (xmlNewValidCtxt (),doc ,dtd )== 1 )
696694result = true;
697695
698696if (!result )
699697xml_ereport (NOTICE ,ERRCODE_INVALID_XML_DOCUMENT ,
700- "validation against DTD failed" , ctxt );
698+ "validation against DTD failed" );
701699
702700#if 0
703701if (uri )
@@ -977,7 +975,6 @@ parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **en
977975 * Convert a C string to XML internal representation
978976 *
979977 * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
980- * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
981978 */
982979static xmlDocPtr
983980xml_parse (text * data ,bool is_document ,bool preserve_whitespace ,xmlChar * encoding )
@@ -1006,7 +1003,7 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod
10061003ctxt = xmlNewParserCtxt ();
10071004if (ctxt == NULL )
10081005xml_ereport (ERROR ,ERRCODE_INTERNAL_ERROR ,
1009- "could not allocate parser context" , ctxt );
1006+ "could not allocate parser context" );
10101007
10111008if (is_document )
10121009{
@@ -1018,13 +1015,13 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod
10181015 * SQL/XML:10.16.7.e)
10191016 */
10201017doc = xmlCtxtReadDoc (ctxt ,utf8string ,
1021- PG_XML_DEFAULT_URI ,
1018+ NULL ,
10221019"UTF-8" ,
1023- XML_PARSE_NOENT |XML_PARSE_DTDATTR
1024- | (preserve_whitespace ?0 :XML_PARSE_NOBLANKS ));
1020+ XML_PARSE_NOENT |XML_PARSE_DTDATTR
1021+ | (preserve_whitespace ?0 :XML_PARSE_NOBLANKS ));
10251022if (doc == NULL )
10261023xml_ereport (ERROR ,ERRCODE_INVALID_XML_DOCUMENT ,
1027- "invalid XML document" , ctxt );
1024+ "invalid XML document" );
10281025}
10291026else
10301027{
@@ -1036,12 +1033,14 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod
10361033doc = xmlNewDoc (NULL );
10371034
10381035res_code = parse_xml_decl (utf8string ,& count ,& version ,NULL ,& standalone );
1039-
1040- if (res_code == 0 )
1041- res_code = xmlParseBalancedChunkMemory (doc ,NULL ,NULL ,0 ,utf8string + count ,NULL );
10421036if (res_code != 0 )
10431037xml_ereport_by_code (ERROR ,ERRCODE_INVALID_XML_CONTENT ,
1044- "invalid XML content" ,res_code );
1038+ "invalid XML content: invalid XML declaration" ,res_code );
1039+
1040+ res_code = xmlParseBalancedChunkMemory (doc ,NULL ,NULL ,0 ,utf8string + count ,NULL );
1041+ if (res_code != 0 )
1042+ xml_ereport (ERROR ,ERRCODE_INVALID_XML_CONTENT ,
1043+ "invalid XML content" );
10451044
10461045doc -> version = xmlStrdup (version );
10471046doc -> encoding = xmlStrdup ((xmlChar * )"UTF-8" );
@@ -1120,49 +1119,45 @@ xml_pstrdup(const char *string)
11201119
11211120
11221121/*
1123- * Wrapper for "ereport" function.
1124- * Adds detail - libxml's native error message, if any.
1122+ * Wrapper for "ereport" function for XML-related errors. The "msg"
1123+ * is the SQL-level message; some can be adopted from the SQL/XML
1124+ * standard. This function adds libxml's native error messages, if
1125+ * any, as detail.
11251126 */
11261127static void
11271128xml_ereport (int level ,int sqlcode ,
1128- const char * msg , void * ctxt )
1129+ const char * msg )
11291130{
1130- xmlErrorPtr libxmlErr = NULL ;
1131+ char * detail ;
11311132
11321133if (xml_err_buf -> len > 0 )
11331134{
1134- ereport (DEBUG1 ,
1135- (errmsg ("%s" ,xml_err_buf -> data )));
1135+ detail = pstrdup (xml_err_buf -> data );
11361136xml_err_buf -> data [0 ]= '\0' ;
11371137xml_err_buf -> len = 0 ;
11381138}
1139+ else
1140+ detail = NULL ;
11391141
1140- if (ctxt != NULL )
1141- libxmlErr = xmlCtxtGetLastError (ctxt );
1142-
1143- if (libxmlErr == NULL )
1142+ /* libxml error messages end in '\n'; get rid of it */
1143+ if (detail )
11441144{
1145+ size_t len ;
1146+
1147+ len = strlen (detail );
1148+ if (len > 0 && detail [len - 1 ]== '\n' )
1149+ detail [len - 1 ]= '\0' ;
1150+
11451151ereport (level ,
11461152(errcode (sqlcode ),
1147- errmsg ("%s" ,msg )));
1153+ errmsg ("%s" ,msg ),
1154+ errdetail ("%s" ,detail )));
11481155}
11491156else
11501157{
1151- /* as usual, libxml error message contains '\n'; get rid of it */
1152- char * xmlErrDetail ;
1153- int xmlErrLen ,i ;
1154-
1155- xmlErrDetail = pstrdup (libxmlErr -> message );
1156- xmlErrLen = strlen (xmlErrDetail );
1157- for (i = 0 ;i < xmlErrLen ;i ++ )
1158- {
1159- if (xmlErrDetail [i ]== '\n' )
1160- xmlErrDetail [i ]= '.' ;
1161- }
11621158ereport (level ,
11631159(errcode (sqlcode ),
1164- errmsg ("%s" ,msg ),
1165- errdetail ("%s" ,xmlErrDetail )));
1160+ errmsg ("%s" ,msg )));
11661161}
11671162}
11681163
@@ -1194,182 +1189,38 @@ xml_errorHandler(void *ctxt, const char *msg,...)
11941189
11951190
11961191/*
1197- * Return error message by libxml error code
1198- * TODO make them closer to recommendations from Postgres manual
1192+ * Wrapper for "ereport" function for XML-related errors. The "msg"
1193+ * is the SQL-level message; some can be adopted from the SQL/XML
1194+ * standard. This function uses "code" to create a textual detail
1195+ * message. At the moment, we only need to cover those codes that we
1196+ * may raise in this file.
11991197 */
12001198static void
12011199xml_ereport_by_code (int level ,int sqlcode ,
12021200const char * msg ,int code )
12031201{
12041202const char * det ;
12051203
1206- if (xml_err_buf -> len > 0 )
1207- {
1208- ereport (DEBUG1 ,
1209- (errmsg ("%s" ,xml_err_buf -> data )));
1210- xml_err_buf -> data [0 ]= '\0' ;
1211- xml_err_buf -> len = 0 ;
1212- }
1213-
12141204switch (code )
12151205{
1216- case XML_ERR_INTERNAL_ERROR :
1217- det = "libxml internal error" ;
1218- break ;
1219- case XML_ERR_ENTITY_LOOP :
1220- det = "Detected an entity reference loop" ;
1221- break ;
1222- case XML_ERR_ENTITY_NOT_STARTED :
1223- det = "EntityValue: \" or ' expected" ;
1224- break ;
1225- case XML_ERR_ENTITY_NOT_FINISHED :
1226- det = "EntityValue: \" or ' expected" ;
1227- break ;
1228- case XML_ERR_ATTRIBUTE_NOT_STARTED :
1229- det = "AttValue: \" or ' expected" ;
1230- break ;
1231- case XML_ERR_LT_IN_ATTRIBUTE :
1232- det = "Unescaped '<' not allowed in attributes values" ;
1233- break ;
1234- case XML_ERR_LITERAL_NOT_STARTED :
1235- det = "SystemLiteral \" or ' expected" ;
1236- break ;
1237- case XML_ERR_LITERAL_NOT_FINISHED :
1238- det = "Unfinished System or Public ID \" or ' expected" ;
1239- break ;
1240- case XML_ERR_MISPLACED_CDATA_END :
1241- det = "Sequence ']]>' not allowed in content" ;
1242- break ;
1243- case XML_ERR_URI_REQUIRED :
1244- det = "SYSTEM or PUBLIC, the URI is missing" ;
1245- break ;
1246- case XML_ERR_PUBID_REQUIRED :
1247- det = "PUBLIC, the Public Identifier is missing" ;
1248- break ;
1249- case XML_ERR_HYPHEN_IN_COMMENT :
1250- det = "Comment must not contain '--' (double-hyphen)" ;
1251- break ;
1252- case XML_ERR_PI_NOT_STARTED :
1253- det = "xmlParsePI : no target name" ;
1254- break ;
1255- case XML_ERR_RESERVED_XML_NAME :
1256- det = "Invalid PI name" ;
1257- break ;
1258- case XML_ERR_NOTATION_NOT_STARTED :
1259- det = "NOTATION: Name expected here" ;
1260- break ;
1261- case XML_ERR_NOTATION_NOT_FINISHED :
1262- det = "'>' required to close NOTATION declaration" ;
1263- break ;
1264- case XML_ERR_VALUE_REQUIRED :
1265- det = "Entity value required" ;
1266- break ;
1267- case XML_ERR_URI_FRAGMENT :
1268- det = "Fragment not allowed" ;
1269- break ;
1270- case XML_ERR_ATTLIST_NOT_STARTED :
1271- det = "'(' required to start ATTLIST enumeration" ;
1272- break ;
1273- case XML_ERR_NMTOKEN_REQUIRED :
1274- det = "NmToken expected in ATTLIST enumeration" ;
1275- break ;
1276- case XML_ERR_ATTLIST_NOT_FINISHED :
1277- det = "')' required to finish ATTLIST enumeration" ;
1278- break ;
1279- case XML_ERR_MIXED_NOT_STARTED :
1280- det = "MixedContentDecl : '|' or ')*' expected" ;
1281- break ;
1282- case XML_ERR_PCDATA_REQUIRED :
1283- det = "MixedContentDecl : '#PCDATA' expected" ;
1284- break ;
1285- case XML_ERR_ELEMCONTENT_NOT_STARTED :
1286- det = "ContentDecl : Name or '(' expected" ;
1287- break ;
1288- case XML_ERR_ELEMCONTENT_NOT_FINISHED :
1289- det = "ContentDecl : ',' '|' or ')' expected" ;
1290- break ;
1291- case XML_ERR_PEREF_IN_INT_SUBSET :
1292- det = "PEReference: forbidden within markup decl in internal subset" ;
1293- break ;
1294- case XML_ERR_GT_REQUIRED :
1295- det = "Expected '>'" ;
1296- break ;
1297- case XML_ERR_CONDSEC_INVALID :
1298- det = "XML conditional section '[' expected" ;
1299- break ;
1300- case XML_ERR_EXT_SUBSET_NOT_FINISHED :
1301- det = "Content error in the external subset" ;
1302- break ;
1303- case XML_ERR_CONDSEC_INVALID_KEYWORD :
1304- det = "conditional section INCLUDE or IGNORE keyword expected" ;
1305- break ;
1306- case XML_ERR_CONDSEC_NOT_FINISHED :
1307- det = "XML conditional section not closed" ;
1308- break ;
1309- case XML_ERR_XMLDECL_NOT_STARTED :
1310- det = "Text declaration '<?xml' required" ;
1311- break ;
1312- case XML_ERR_XMLDECL_NOT_FINISHED :
1313- det = "parsing XML declaration: '?>' expected" ;
1314- break ;
1315- case XML_ERR_EXT_ENTITY_STANDALONE :
1316- det = "external parsed entities cannot be standalone" ;
1317- break ;
1318- case XML_ERR_ENTITYREF_SEMICOL_MISSING :
1319- det = "EntityRef: expecting ';'" ;
1320- break ;
1321- case XML_ERR_DOCTYPE_NOT_FINISHED :
1322- det = "DOCTYPE improperly terminated" ;
1323- break ;
1324- case XML_ERR_LTSLASH_REQUIRED :
1325- det = "EndTag: '</' not found" ;
1326- break ;
1327- case XML_ERR_EQUAL_REQUIRED :
1328- det = "Expected '='" ;
1329- break ;
1330- case XML_ERR_STRING_NOT_CLOSED :
1331- det = "String not closed expecting \" or '" ;
1332- break ;
1333- case XML_ERR_STRING_NOT_STARTED :
1334- det = "String not started expecting ' or \"" ;
1335- break ;
1336- case XML_ERR_ENCODING_NAME :
1337- det = "Invalid XML encoding name" ;
1338- break ;
1339- case XML_ERR_STANDALONE_VALUE :
1340- det = "Standalone accepts only 'yes' or 'no'" ;
1341- break ;
1342- case XML_ERR_DOCUMENT_EMPTY :
1343- det = "Document is empty" ;
1344- break ;
1345- case XML_ERR_DOCUMENT_END :
1346- det = "Extra content at the end of the document" ;
1347- break ;
1348- case XML_ERR_NOT_WELL_BALANCED :
1349- det = "Chunk is not well balanced" ;
1350- break ;
1351- case XML_ERR_EXTRA_CONTENT :
1352- det = "Extra content at the end of well balanced chunk" ;
1353- break ;
1354- case XML_ERR_VERSION_MISSING :
1355- det = "Malformed declaration expecting version" ;
1356- break ;
1357- /* more err codes... Please, keep the order! */
1358- case XML_ERR_ATTRIBUTE_WITHOUT_VALUE :/* 41 */
1359- det = "Attribute without value" ;
1360- break ;
1361- case XML_ERR_ATTRIBUTE_REDEFINED :
1362- det = "Attribute defined more than once in the same element" ;
1363- break ;
1364- case XML_ERR_COMMENT_NOT_FINISHED :/* 45 */
1365- det = "Comment is not finished" ;
1366- break ;
1367- case XML_ERR_NAME_REQUIRED :/* 68 */
1368- det = "Element name not found" ;
1369- break ;
1370- case XML_ERR_TAG_NOT_FINISHED :/* 77 */
1371- det = "Closing tag not found" ;
1372- break ;
1206+ case XML_ERR_INVALID_CHAR :
1207+ det = "Invalid character value" ;
1208+ break ;
1209+ case XML_ERR_SPACE_REQUIRED :
1210+ det = "Space required" ;
1211+ break ;
1212+ case XML_ERR_STANDALONE_VALUE :
1213+ det = "standalone accepts only 'yes' or 'no'" ;
1214+ break ;
1215+ case XML_ERR_VERSION_MISSING :
1216+ det = "Malformed declaration expecting version" ;
1217+ break ;
1218+ case XML_ERR_MISSING_ENCODING :
1219+ det = "Missing encoding in text declaration" ;
1220+ break ;
1221+ case XML_ERR_XMLDECL_NOT_FINISHED :
1222+ det = "Parsing XML declaration: '?>' expected" ;
1223+ break ;
13731224default :
13741225det = "Unrecognized libxml error code: %d" ;
13751226break ;