|
7 | 7 | * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
8 | 8 | * Portions Copyright (c) 1994, Regents of the University of California
|
9 | 9 | *
|
10 |
| - * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.11 2007/01/06 19:18:36 petere Exp $ |
| 10 | + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.12 2007/01/07 00:13:55 petere Exp $ |
11 | 11 | *
|
12 | 12 | *-------------------------------------------------------------------------
|
13 | 13 | */
|
@@ -489,6 +489,122 @@ xml_init(void)
|
489 | 489 | }
|
490 | 490 |
|
491 | 491 |
|
| 492 | +/* |
| 493 | + * SQL/XML allows storing "XML documents" or "XML content". "XML |
| 494 | + * documents" are specified by the XML specification and are parsed |
| 495 | + * easily by libxml. "XML content" is specified by SQL/XML as the |
| 496 | + * production "XMLDecl? content". But libxml can only parse the |
| 497 | + * "content" part, so we have to parse the XML declaration ourselves |
| 498 | + * to complete this. |
| 499 | + */ |
| 500 | + |
| 501 | +#defineCHECK_XML_SPACE(p) if (!xmlIsBlank_ch(*(p))) return XML_ERR_SPACE_REQUIRED |
| 502 | +#defineSKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++ |
| 503 | + |
| 504 | +staticint |
| 505 | +parse_xml_decl(constxmlChar*str,size_t*len,xmlChar**encoding,int*standalone) |
| 506 | +{ |
| 507 | +constxmlChar*p; |
| 508 | +constxmlChar*save_p; |
| 509 | + |
| 510 | +p=str; |
| 511 | + |
| 512 | +if (xmlStrncmp(p, (xmlChar*)"<?xml",5)!=0) |
| 513 | +gotofinished; |
| 514 | + |
| 515 | +p+=5; |
| 516 | + |
| 517 | +/* version */ |
| 518 | +CHECK_XML_SPACE(p); |
| 519 | +SKIP_XML_SPACE(p); |
| 520 | +if (xmlStrncmp(p, (xmlChar*)"version",7)!=0) |
| 521 | +returnXML_ERR_VERSION_MISSING; |
| 522 | +p+=7; |
| 523 | +SKIP_XML_SPACE(p); |
| 524 | +if (*p!='=') |
| 525 | +returnXML_ERR_VERSION_MISSING; |
| 526 | +p+=1; |
| 527 | +SKIP_XML_SPACE(p); |
| 528 | +if (xmlStrncmp(p, (xmlChar*)"'1.0'",5)!=0&&xmlStrncmp(p, (xmlChar*)"\"1.0\"",5)!=0) |
| 529 | +returnXML_ERR_VERSION_MISSING; |
| 530 | +p+=5; |
| 531 | + |
| 532 | +/* encoding */ |
| 533 | +save_p=p; |
| 534 | +SKIP_XML_SPACE(p); |
| 535 | +if (xmlStrncmp(p, (xmlChar*)"encoding",8)==0) |
| 536 | +{ |
| 537 | +CHECK_XML_SPACE(save_p); |
| 538 | +p+=8; |
| 539 | +SKIP_XML_SPACE(p); |
| 540 | +if (*p!='=') |
| 541 | +returnXML_ERR_MISSING_ENCODING; |
| 542 | +p+=1; |
| 543 | +SKIP_XML_SPACE(p); |
| 544 | + |
| 545 | +if (*p=='\''||*p=='"') |
| 546 | +{ |
| 547 | +constxmlChar*q; |
| 548 | + |
| 549 | +q=xmlStrchr(p+1,*p); |
| 550 | +if (!q) |
| 551 | +returnXML_ERR_MISSING_ENCODING; |
| 552 | + |
| 553 | +*encoding=xmlStrndup(p+1,q-p-1); |
| 554 | +p=q+1; |
| 555 | +} |
| 556 | +else |
| 557 | +returnXML_ERR_MISSING_ENCODING; |
| 558 | +} |
| 559 | +else |
| 560 | +{ |
| 561 | +p=save_p; |
| 562 | +*encoding=NULL; |
| 563 | +} |
| 564 | + |
| 565 | +/* standalone */ |
| 566 | +save_p=p; |
| 567 | +SKIP_XML_SPACE(p); |
| 568 | +if (xmlStrncmp(p, (xmlChar*)"standalone",10)==0) |
| 569 | +{ |
| 570 | +CHECK_XML_SPACE(save_p); |
| 571 | +p+=10; |
| 572 | +SKIP_XML_SPACE(p); |
| 573 | +if (*p!='=') |
| 574 | +returnXML_ERR_STANDALONE_VALUE; |
| 575 | +p+=1; |
| 576 | +SKIP_XML_SPACE(p); |
| 577 | +if (xmlStrncmp(p, (xmlChar*)"'yes'",5)==0||xmlStrncmp(p, (xmlChar*)"\"yes\"",5)==0) |
| 578 | +{ |
| 579 | +*standalone=1; |
| 580 | +p+=5; |
| 581 | +} |
| 582 | +elseif (xmlStrncmp(p, (xmlChar*)"'no'",4)==0||xmlStrncmp(p, (xmlChar*)"\"no\"",4)==0) |
| 583 | +{ |
| 584 | +*standalone=0; |
| 585 | +p+=4; |
| 586 | +} |
| 587 | +else |
| 588 | +returnXML_ERR_STANDALONE_VALUE; |
| 589 | +} |
| 590 | +else |
| 591 | +{ |
| 592 | +p=save_p; |
| 593 | +*standalone=-1; |
| 594 | +} |
| 595 | + |
| 596 | +SKIP_XML_SPACE(p); |
| 597 | +if (xmlStrncmp(p, (xmlChar*)"?>",2)!=0) |
| 598 | +returnXML_ERR_XMLDECL_NOT_FINISHED; |
| 599 | +p+=2; |
| 600 | + |
| 601 | +finished: |
| 602 | +if (len) |
| 603 | +*len= (p-str); |
| 604 | +returnXML_ERR_OK; |
| 605 | +} |
| 606 | + |
| 607 | + |
492 | 608 | /*
|
493 | 609 | * Convert a C string to XML internal representation
|
494 | 610 | *
|
@@ -536,19 +652,23 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace)
|
536 | 652 | }
|
537 | 653 | else
|
538 | 654 | {
|
| 655 | +size_tcount; |
| 656 | +xmlChar*encoding=NULL; |
| 657 | +intstandalone=-1; |
| 658 | + |
539 | 659 | doc=xmlNewDoc(NULL);
|
540 | 660 |
|
541 |
| -/* |
542 |
| - * FIXME: An XMLDecl is supposed to be accepted before the |
543 |
| - * content, but libxml doesn't allow this. Parse that |
544 |
| - * ourselves? |
545 |
| - */ |
| 661 | +res_code=parse_xml_decl(string,&count,&encoding,&standalone); |
546 | 662 |
|
547 | 663 | /* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
|
548 |
| -res_code=xmlParseBalancedChunkMemory(doc,NULL,NULL,0,string,NULL); |
| 664 | +if (res_code==0) |
| 665 | +res_code=xmlParseBalancedChunkMemory(doc,NULL,NULL,0,string+count,NULL); |
549 | 666 | if (res_code!=0)
|
550 | 667 | xml_ereport_by_code(ERROR,ERRCODE_INVALID_XML_CONTENT,
|
551 | 668 | "invalid XML content",res_code);
|
| 669 | + |
| 670 | +doc->encoding=encoding; |
| 671 | +doc->standalone=standalone; |
552 | 672 | }
|
553 | 673 |
|
554 | 674 | /* TODO encoding issues
|
|