libxml2 |
Character encoding conversion functions.More...
Data Structures | |
| struct | _xmlCharEncodingHandler |
| A character encoding conversion handler for non UTF-8 encodings.More... | |
Typedefs | |
| typedef int(* | xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
| Convert characters to UTF-8. | |
| typedef int(* | xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
| Convert characters from UTF-8. | |
| typedefxmlCharEncError(* | xmlCharEncConvFunc) (void *vctxt, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush) |
| Convert between character encodings. | |
| typedef void(* | xmlCharEncConvCtxtDtor) (void *vctxt) |
| Free a conversion context. | |
| typedef struct_xmlCharEncodingHandler | xmlCharEncodingHandler |
| Character encoding converter. | |
| typedefxmlParserErrors(* | xmlCharEncConvImpl) (void *vctxt, const char *name,xmlCharEncFlags flags,xmlCharEncodingHandler **out) |
| If this function returns XML_ERR_OK, it must fill theout pointer with an encoding handler. | |
Enumerations | |
| enum | xmlCharEncError |
| Encoding conversion errors.More... | |
| enum | xmlCharEncoding |
| Predefined values for some standard encodings.More... | |
| enum | xmlCharEncFlags |
| Encoding conversion flags.More... | |
Functions | |
| void | xmlInitCharEncodingHandlers (void) |
| void | xmlCleanupCharEncodingHandlers (void) |
| Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases. | |
| void | xmlRegisterCharEncodingHandler (xmlCharEncodingHandler *handler) |
| Register the char encoding handler. | |
| xmlParserErrors | xmlLookupCharEncodingHandler (xmlCharEncoding enc,xmlCharEncodingHandler **out) |
| Find or create a handler matching the encoding. | |
| xmlParserErrors | xmlOpenCharEncodingHandler (const char *name, int output,xmlCharEncodingHandler **out) |
| Find or create a handler matching the encoding. | |
| xmlParserErrors | xmlCreateCharEncodingHandler (const char *name,xmlCharEncFlags flags,xmlCharEncConvImpl impl, void *implCtxt,xmlCharEncodingHandler **out) |
| Find or create a handler matching the encoding. | |
| xmlCharEncodingHandler * | xmlGetCharEncodingHandler (xmlCharEncoding enc) |
| xmlCharEncodingHandler * | xmlFindCharEncodingHandler (const char *name) |
| If the encoding is UTF-8, this will return a no-op handler that shouldn't be used. | |
| xmlCharEncodingHandler * | xmlNewCharEncodingHandler (const char *name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output) |
| Create and registers anxmlCharEncodingHandler. | |
| xmlParserErrors | xmlCharEncNewCustomHandler (const char *name,xmlCharEncConvFunc input,xmlCharEncConvFunc output,xmlCharEncConvCtxtDtor ctxtDtor, void *inputCtxt, void *outputCtxt,xmlCharEncodingHandler **out) |
| Create a customxmlCharEncodingHandler. | |
| int | xmlAddEncodingAlias (const char *name, const char *alias) |
| Registers an aliasalias for an encoding namedname. | |
| int | xmlDelEncodingAlias (const char *alias) |
| Unregisters an encoding alias. | |
| const char * | xmlGetEncodingAlias (const char *alias) |
| Lookup an encoding name for the given alias. | |
| void | xmlCleanupEncodingAliases (void) |
| Unregisters all aliases. | |
| xmlCharEncoding | xmlParseCharEncoding (const char *name) |
| Compare the string to the encoding schemes already known. | |
| const char * | xmlGetCharEncodingName (xmlCharEncoding enc) |
| The "canonical" name for XML encoding. | |
| xmlCharEncoding | xmlDetectCharEncoding (const unsigned char *in, int len) |
| Guess the encoding of the entity using the first bytes of the entity content according to the non-normative appendix F of the XML-1.0 recommendation. | |
| int | xmlCharEncOutFunc (xmlCharEncodingHandler *handler, struct_xmlBuffer *out, struct_xmlBuffer *in) |
| Generic front-end for output encoding conversion. | |
| int | xmlCharEncInFunc (xmlCharEncodingHandler *handler, struct_xmlBuffer *out, struct_xmlBuffer *in) |
| Generic front-end for input encoding conversion. | |
| int | xmlCharEncFirstLine (xmlCharEncodingHandler *handler, struct_xmlBuffer *out, struct_xmlBuffer *in) |
| DEPERECATED: Don't use. | |
| int | xmlCharEncCloseFunc (xmlCharEncodingHandler *handler) |
| Releases anxmlCharEncodingHandler. | |
| int | xmlUTF8ToIsolat1 (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
| Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 block of chars out. | |
| int | xmlIsolat1ToUTF8 (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
| Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out. | |
Character encoding conversion functions.
| typedef void(* xmlCharEncConvCtxtDtor) (void *vctxt) |
Free a conversion context.
| vctxt | conversion context |
| typedefxmlCharEncError(* xmlCharEncConvFunc) (void *vctxt, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush) |
Convert between character encodings.
The value ofinlen after return is the number of bytes consumed andoutlen is the number of bytes produced.
If the converter can consume partial multi-byte sequences, theflush flag can be used to detect truncated sequences at EOF. Otherwise, the flag can be ignored.
| vctxt | conversion context |
| out | a pointer to an array of bytes to store the result |
| outlen | the length ofout |
| in | a pointer to an array of input bytes |
| inlen | the length ofin |
| flush | end of input |
| typedefxmlParserErrors(* xmlCharEncConvImpl) (void *vctxt, const char *name,xmlCharEncFlags flags,xmlCharEncodingHandler **out) |
If this function returns XML_ERR_OK, it must fill theout pointer with an encoding handler.
The handler can be obtained fromxmlCharEncNewCustomHandler.
flags can contain XML_ENC_INPUT, XML_ENC_OUTPUT or both.
| vctxt | user data |
| name | encoding name |
| flags | bit mask of flags |
| out | pointer to resulting handler |
| typedef int(* xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
Convert characters to UTF-8.
On success, the value ofinlen after return is the number of bytes consumed andoutlen is the number of bytes produced.
| out | a pointer to an array of bytes to store the UTF-8 result |
| outlen | the length ofout |
| in | a pointer to an array of chars in the original encoding |
| inlen | the length ofin |
| typedef int(* xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
Convert characters from UTF-8.
On success, the value ofinlen after return is the number of bytes consumed andoutlen is the number of bytes produced.
| out | a pointer to an array of bytes to store the result |
| outlen | the length ofout |
| in | a pointer to an array of UTF-8 chars |
| inlen | the length ofin |
| enumxmlCharEncError |
| enumxmlCharEncFlags |
| enumxmlCharEncoding |
Predefined values for some standard encodings.
| int xmlAddEncodingAlias | ( | const char * | name, |
| const char * | alias ) |
Registers an aliasalias for an encoding namedname.
Existing aliases will be overwritten.
| name | the encoding name as parsed, in UTF-8 format (ASCII actually) |
| alias | the alias name as parsed, in UTF-8 format (ASCII actually) |
| int xmlCharEncCloseFunc | ( | xmlCharEncodingHandler * | handler | ) |
Releases anxmlCharEncodingHandler.
Must be called after a handler is no longer in use.
| handler | encoding handler |
| int xmlCharEncFirstLine | ( | xmlCharEncodingHandler * | handler, |
| struct_xmlBuffer * | out, | ||
| struct_xmlBuffer * | in ) |
DEPERECATED: Don't use.
| int xmlCharEncInFunc | ( | xmlCharEncodingHandler * | handler, |
| struct_xmlBuffer * | out, | ||
| struct_xmlBuffer * | in ) |
Generic front-end for input encoding conversion.
| xmlParserErrors xmlCharEncNewCustomHandler | ( | const char * | name, |
| xmlCharEncConvFunc | input, | ||
| xmlCharEncConvFunc | output, | ||
| xmlCharEncConvCtxtDtor | ctxtDtor, | ||
| void * | inputCtxt, | ||
| void * | outputCtxt, | ||
| xmlCharEncodingHandler ** | out ) |
Create a customxmlCharEncodingHandler.
| name | the encoding name |
| input | input callback which converts to UTF-8 |
| output | output callback which converts from UTF-8 |
| ctxtDtor | context destructor |
| inputCtxt | context for input callback |
| outputCtxt | context for output callback |
| out | pointer to resulting handler |
| int xmlCharEncOutFunc | ( | xmlCharEncodingHandler * | handler, |
| struct_xmlBuffer * | out, | ||
| struct_xmlBuffer * | in ) |
Generic front-end for output encoding conversion.
A first call within set to NULL has to be made to write a BOM.
When using GNU libiconv, unsupported characters in the output encoding will be automatically replaced with a numeric character reference.
| void xmlCleanupCharEncodingHandlers | ( | void | ) |
Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases.
| void xmlCleanupEncodingAliases | ( | void | ) |
Unregisters all aliases.
| xmlParserErrors xmlCreateCharEncodingHandler | ( | const char * | name, |
| xmlCharEncFlags | flags, | ||
| xmlCharEncConvImpl | impl, | ||
| void * | implCtxt, | ||
| xmlCharEncodingHandler ** | out ) |
Find or create a handler matching the encoding.
The following converters are looked up in order:
The handler must be closed withxmlCharEncCloseFunc.
If the encoding is UTF-8, a NULL handler and no error code will be returned.
flags can contain XML_ENC_INPUT, XML_ENC_OUTPUT or both.
| name | a string describing the char encoding. |
| flags | bit mask of flags |
| impl | a conversion implementation (optional) |
| implCtxt | user data for conversion implementation (optional) |
| out | pointer to result |
| int xmlDelEncodingAlias | ( | const char * | alias | ) |
Unregisters an encoding alias.
| alias | the alias name as parsed, in UTF-8 format (ASCII actually) |
| xmlCharEncoding xmlDetectCharEncoding | ( | const unsigned char * | in, |
| int | len ) |
Guess the encoding of the entity using the first bytes of the entity content according to the non-normative appendix F of the XML-1.0 recommendation.
| in | a pointer to the first bytes of the XML entity, must be at least 2 bytes long (at least 4 if encoding is UTF4 variant). |
| len | pointer to the length of the buffer |
| xmlCharEncodingHandler * xmlFindCharEncodingHandler | ( | const char * | name | ) |
If the encoding is UTF-8, this will return a no-op handler that shouldn't be used.
| name | a string describing the char encoding. |
| xmlCharEncodingHandler * xmlGetCharEncodingHandler | ( | xmlCharEncoding | enc | ) |
| enc | anxmlCharEncoding value. |
| const char * xmlGetCharEncodingName | ( | xmlCharEncoding | enc | ) |
The "canonical" name for XML encoding.
C.f.http://www.w3.org/TR/REC-xml#charencoding Section 4.3.3 Character Encoding in Entities
| enc | the encoding |
| const char * xmlGetEncodingAlias | ( | const char * | alias | ) |
Lookup an encoding name for the given alias.
| alias | the alias name as parsed, in UTF-8 format (ASCII actually) |
| void xmlInitCharEncodingHandlers | ( | void | ) |
| int xmlIsolat1ToUTF8 | ( | unsigned char * | out, |
| int * | outlen, | ||
| const unsigned char * | in, | ||
| int * | inlen ) |
Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out.
The value ofinlen after return is the number of bytes consumed. The value ofoutlen after return is the number of bytes produced.
| out | a pointer to an array of bytes to store the result |
| outlen | the length ofout |
| in | a pointer to an array of ISO Latin 1 chars |
| inlen | the length ofin |
| xmlParserErrors xmlLookupCharEncodingHandler | ( | xmlCharEncoding | enc, |
| xmlCharEncodingHandler ** | out ) |
Find or create a handler matching the encoding.
The following converters are looked up in order:
The handler must be closed withxmlCharEncCloseFunc.
If the encoding is UTF-8, a NULL handler and no error code will be returned.
| enc | anxmlCharEncoding value. |
| out | pointer to result |
| xmlCharEncodingHandler * xmlNewCharEncodingHandler | ( | const char * | name, |
| xmlCharEncodingInputFunc | input, | ||
| xmlCharEncodingOutputFunc | output ) |
Create and registers anxmlCharEncodingHandler.
| name | the encoding name, in UTF-8 format (ASCII actually) |
| input | thexmlCharEncodingInputFunc to read that encoding |
| output | thexmlCharEncodingOutputFunc to write that encoding |
| xmlParserErrors xmlOpenCharEncodingHandler | ( | const char * | name, |
| int | output, | ||
| xmlCharEncodingHandler ** | out ) |
Find or create a handler matching the encoding.
The following converters are looked up in order:
The handler must be closed withxmlCharEncCloseFunc.
If the encoding is UTF-8, a NULL handler and no error code will be returned.
| name | a string describing the char encoding. |
| output | boolean, use handler for output |
| out | pointer to result |
| xmlCharEncoding xmlParseCharEncoding | ( | const char * | name | ) |
Compare the string to the encoding schemes already known.
Note that the comparison is case insensitive accordingly to the section [XML] 4.3.3 Character Encoding in Entities.
| name | the encoding name as parsed, in UTF-8 format (ASCII actually) |
| void xmlRegisterCharEncodingHandler | ( | xmlCharEncodingHandler * | handler | ) |
Register the char encoding handler.
| handler | thexmlCharEncodingHandler handler block |
| int xmlUTF8ToIsolat1 | ( | unsigned char * | out, |
| int * | outlen, | ||
| const unsigned char * | in, | ||
| int * | inlen ) |
Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 block of chars out.
The value ofinlen after return is the number of bytes consumed. The value ofoutlen after return is the number of bytes produced.
| out | a pointer to an array of bytes to store the result |
| outlen | the length ofout |
| in | a pointer to an array of UTF-8 chars |
| inlen | the length ofin |