Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9acae56

Browse files
committed
Inline basic UTF-8 functions.
Shows a measurable speedup when processing UTF-8 data, such as withthe new builtin collation provider.Discussion:https://postgr.es/m/163f4e2190cdf67f67016044e503c5004547e5a9.camel@j-davis.comReviewed-by: Peter Eisentraut
1 parent2b52086 commit9acae56

File tree

2 files changed

+61
-61
lines changed

2 files changed

+61
-61
lines changed

‎src/common/wchar.c

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -476,39 +476,6 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
476476
}
477477

478478

479-
/*
480-
* Map a Unicode code point to UTF-8. utf8string must have at least
481-
* unicode_utf8len(c) bytes available.
482-
*/
483-
unsignedchar*
484-
unicode_to_utf8(pg_wcharc,unsignedchar*utf8string)
485-
{
486-
if (c <=0x7F)
487-
{
488-
utf8string[0]=c;
489-
}
490-
elseif (c <=0x7FF)
491-
{
492-
utf8string[0]=0xC0 | ((c >>6)&0x1F);
493-
utf8string[1]=0x80 | (c&0x3F);
494-
}
495-
elseif (c <=0xFFFF)
496-
{
497-
utf8string[0]=0xE0 | ((c >>12)&0x0F);
498-
utf8string[1]=0x80 | ((c >>6)&0x3F);
499-
utf8string[2]=0x80 | (c&0x3F);
500-
}
501-
else
502-
{
503-
utf8string[0]=0xF0 | ((c >>18)&0x07);
504-
utf8string[1]=0x80 | ((c >>12)&0x3F);
505-
utf8string[2]=0x80 | ((c >>6)&0x3F);
506-
utf8string[3]=0x80 | (c&0x3F);
507-
}
508-
509-
returnutf8string;
510-
}
511-
512479
/*
513480
* Trivial conversion from pg_wchar to UTF-8.
514481
* caller should allocate enough space for "to"
@@ -670,34 +637,6 @@ ucs_wcwidth(pg_wchar ucs)
670637
return1;
671638
}
672639

673-
/*
674-
* Convert a UTF-8 character to a Unicode code point.
675-
* This is a one-character version of pg_utf2wchar_with_len.
676-
*
677-
* No error checks here, c must point to a long-enough string.
678-
*/
679-
pg_wchar
680-
utf8_to_unicode(constunsignedchar*c)
681-
{
682-
if ((*c&0x80)==0)
683-
return (pg_wchar)c[0];
684-
elseif ((*c&0xe0)==0xc0)
685-
return (pg_wchar) (((c[0]&0x1f) <<6) |
686-
(c[1]&0x3f));
687-
elseif ((*c&0xf0)==0xe0)
688-
return (pg_wchar) (((c[0]&0x0f) <<12) |
689-
((c[1]&0x3f) <<6) |
690-
(c[2]&0x3f));
691-
elseif ((*c&0xf8)==0xf0)
692-
return (pg_wchar) (((c[0]&0x07) <<18) |
693-
((c[1]&0x3f) <<12) |
694-
((c[2]&0x3f) <<6) |
695-
(c[3]&0x3f));
696-
else
697-
/* that is an invalid code on purpose */
698-
return0xffffffff;
699-
}
700-
701640
staticint
702641
pg_utf_dsplen(constunsignedchar*s)
703642
{

‎src/include/mb/pg_wchar.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,67 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
555555
return ((first&0x3FF) <<10)+0x10000+ (second&0x3FF);
556556
}
557557

558+
/*
559+
* Convert a UTF-8 character to a Unicode code point.
560+
* This is a one-character version of pg_utf2wchar_with_len.
561+
*
562+
* No error checks here, c must point to a long-enough string.
563+
*/
564+
staticinlinepg_wchar
565+
utf8_to_unicode(constunsignedchar*c)
566+
{
567+
if ((*c&0x80)==0)
568+
return (pg_wchar)c[0];
569+
elseif ((*c&0xe0)==0xc0)
570+
return (pg_wchar) (((c[0]&0x1f) <<6) |
571+
(c[1]&0x3f));
572+
elseif ((*c&0xf0)==0xe0)
573+
return (pg_wchar) (((c[0]&0x0f) <<12) |
574+
((c[1]&0x3f) <<6) |
575+
(c[2]&0x3f));
576+
elseif ((*c&0xf8)==0xf0)
577+
return (pg_wchar) (((c[0]&0x07) <<18) |
578+
((c[1]&0x3f) <<12) |
579+
((c[2]&0x3f) <<6) |
580+
(c[3]&0x3f));
581+
else
582+
/* that is an invalid code on purpose */
583+
return0xffffffff;
584+
}
585+
586+
/*
587+
* Map a Unicode code point to UTF-8. utf8string must have at least
588+
* unicode_utf8len(c) bytes available.
589+
*/
590+
staticinlineunsignedchar*
591+
unicode_to_utf8(pg_wcharc,unsignedchar*utf8string)
592+
{
593+
if (c <=0x7F)
594+
{
595+
utf8string[0]=c;
596+
}
597+
elseif (c <=0x7FF)
598+
{
599+
utf8string[0]=0xC0 | ((c >>6)&0x1F);
600+
utf8string[1]=0x80 | (c&0x3F);
601+
}
602+
elseif (c <=0xFFFF)
603+
{
604+
utf8string[0]=0xE0 | ((c >>12)&0x0F);
605+
utf8string[1]=0x80 | ((c >>6)&0x3F);
606+
utf8string[2]=0x80 | (c&0x3F);
607+
}
608+
else
609+
{
610+
utf8string[0]=0xF0 | ((c >>18)&0x07);
611+
utf8string[1]=0x80 | ((c >>12)&0x3F);
612+
utf8string[2]=0x80 | ((c >>6)&0x3F);
613+
utf8string[3]=0x80 | (c&0x3F);
614+
}
615+
616+
returnutf8string;
617+
}
618+
558619
/*
559620
* Number of bytes needed to represent the given char in UTF8.
560621
*/

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp