|
4 | 4 | * (currently mule internal code (mic) is used)
|
5 | 5 | * Tatsuo Ishii
|
6 | 6 | *
|
7 |
| - * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $ |
| 7 | + * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $ |
8 | 8 | */
|
9 | 9 | #include"postgres.h"
|
10 | 10 |
|
@@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
|
555 | 555 | returnresult;
|
556 | 556 | }
|
557 | 557 |
|
| 558 | + |
| 559 | + |
| 560 | +#ifdefUSE_WIDE_UPPER_LOWER |
| 561 | + |
| 562 | +/* |
| 563 | + * wchar2char --- convert wide characters to multibyte format |
| 564 | + * |
| 565 | + * This has the same API as the standard wcstombs() function; in particular, |
| 566 | + * tolen is the maximum number of bytes to store at *to, and *from must be |
| 567 | + * zero-terminated. The output will be zero-terminated iff there is room. |
| 568 | + */ |
| 569 | +size_t |
| 570 | +wchar2char(char*to,constwchar_t*from,size_ttolen) |
| 571 | +{ |
| 572 | +size_tresult; |
| 573 | + |
| 574 | +if (tolen==0) |
| 575 | +return0; |
| 576 | + |
| 577 | +#ifdefWIN32 |
| 578 | +/* |
| 579 | + * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, |
| 580 | + * and for some reason mbstowcs and wcstombs won't do this for us, |
| 581 | + * so we use MultiByteToWideChar(). |
| 582 | + */ |
| 583 | +if (GetDatabaseEncoding()==PG_UTF8) |
| 584 | +{ |
| 585 | +result=WideCharToMultiByte(CP_UTF8,0,from,-1,to,tolen, |
| 586 | +NULL,NULL); |
| 587 | +/* A zero return is failure */ |
| 588 | +if (result <=0) |
| 589 | +result=-1; |
| 590 | +else |
| 591 | +{ |
| 592 | +Assert(result <=tolen); |
| 593 | +/* Microsoft counts the zero terminator in the result */ |
| 594 | +result--; |
| 595 | +} |
| 596 | +} |
| 597 | +else |
| 598 | +#endif/* WIN32 */ |
| 599 | +result=wcstombs(to,from,tolen); |
| 600 | +returnresult; |
| 601 | +} |
| 602 | + |
| 603 | +/* |
| 604 | + * char2wchar --- convert multibyte characters to wide characters |
| 605 | + * |
| 606 | + * This has almost the API of mbstowcs(), except that *from need not be |
| 607 | + * null-terminated; instead, the number of input bytes is specified as |
| 608 | + * fromlen. Also, we ereport() rather than returning -1 for invalid |
| 609 | + * input encoding.tolen is the maximum number of wchar_t's to store at *to. |
| 610 | + * The output will be zero-terminated iff there is room. |
| 611 | + */ |
| 612 | +size_t |
| 613 | +char2wchar(wchar_t*to,size_ttolen,constchar*from,size_tfromlen) |
| 614 | +{ |
| 615 | +size_tresult; |
| 616 | + |
| 617 | +if (tolen==0) |
| 618 | +return0; |
| 619 | + |
| 620 | +#ifdefWIN32 |
| 621 | +/* See WIN32 "Unicode" comment above */ |
| 622 | +if (GetDatabaseEncoding()==PG_UTF8) |
| 623 | +{ |
| 624 | +/* Win32 API does not work for zero-length input */ |
| 625 | +if (fromlen==0) |
| 626 | +result=0; |
| 627 | +else |
| 628 | +{ |
| 629 | +result=MultiByteToWideChar(CP_UTF8,0,from,fromlen,to,tolen-1); |
| 630 | +/* A zero return is failure */ |
| 631 | +if (result==0) |
| 632 | +result=-1; |
| 633 | +} |
| 634 | + |
| 635 | +if (result!=-1) |
| 636 | +{ |
| 637 | +Assert(result<tolen); |
| 638 | +/* Append trailing null wchar (MultiByteToWideChar() does not) */ |
| 639 | +to[result]=0; |
| 640 | +} |
| 641 | +} |
| 642 | +else |
| 643 | +#endif/* WIN32 */ |
| 644 | +{ |
| 645 | +if (lc_ctype_is_c()) |
| 646 | +{ |
| 647 | +/* |
| 648 | + * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be |
| 649 | + * allocated with sufficient space |
| 650 | + */ |
| 651 | +result=pg_mb2wchar_with_len(from, (pg_wchar*)to,fromlen); |
| 652 | +} |
| 653 | +else |
| 654 | +{ |
| 655 | +/* mbstowcs requires ending '\0' */ |
| 656 | +char*str=pnstrdup(from,fromlen); |
| 657 | + |
| 658 | +result=mbstowcs(to,str,tolen); |
| 659 | +pfree(str); |
| 660 | +} |
| 661 | +} |
| 662 | + |
| 663 | +if (result==-1) |
| 664 | +{ |
| 665 | +/* |
| 666 | + * Invalid multibyte character encountered. We try to give a useful |
| 667 | + * error message by letting pg_verifymbstr check the string. But it's |
| 668 | + * possible that the string is OK to us, and not OK to mbstowcs --- |
| 669 | + * this suggests that the LC_CTYPE locale is different from the |
| 670 | + * database encoding. Give a generic error message if verifymbstr |
| 671 | + * can't find anything wrong. |
| 672 | + */ |
| 673 | +pg_verifymbstr(from,fromlen, false);/* might not return */ |
| 674 | +/* but if it does ... */ |
| 675 | +ereport(ERROR, |
| 676 | +(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), |
| 677 | +errmsg("invalid multibyte character for locale"), |
| 678 | +errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); |
| 679 | +} |
| 680 | + |
| 681 | +returnresult; |
| 682 | +} |
| 683 | + |
| 684 | +#endif |
| 685 | + |
558 | 686 | /* convert a multibyte string to a wchar */
|
559 | 687 | int
|
560 | 688 | pg_mb2wchar(constchar*from,pg_wchar*to)
|
|