Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb80e106

Browse files
committed
Add mbverifystr() functions specific to each encoding.
This makes pg_verify_mbstr() function faster, by allowing more efficientencoding-specific implementations. All the implementations included inthis commit are pretty naive, they just call the same encoding-specificverifychar functions that were used previously, but that already gives aperformance boost because the tight character-at-a-time loop is simpler.Reviewed-by: John NaylorDiscussion:https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01@iki.fi
1 parenta3367aa commitb80e106

File tree

9 files changed

+493
-101
lines changed

9 files changed

+493
-101
lines changed

‎src/backend/commands/extension.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ read_extension_script_file(const ExtensionControlFile *control,
682682
src_encoding=control->encoding;
683683

684684
/* make sure that source string is valid in the expected encoding */
685-
pg_verify_mbstr_len(src_encoding,src_str,len, false);
685+
(void)pg_verify_mbstr(src_encoding,src_str,len, false);
686686

687687
/*
688688
* Convert the encoding to the database encoding. read_whole_file

‎src/backend/utils/mb/conv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ LocalToUtf(const unsigned char *iso, int len,
653653
continue;
654654
}
655655

656-
l=pg_encoding_verifymb(encoding, (constchar*)iso,len);
656+
l=pg_encoding_verifymbchar(encoding, (constchar*)iso,len);
657657
if (l<0)
658658
break;
659659

‎src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
8787
continue;
8888
}
8989

90-
l=pg_encoding_verifymb(PG_EUC_JIS_2004, (constchar*)euc,len);
90+
l=pg_encoding_verifymbchar(PG_EUC_JIS_2004, (constchar*)euc,len);
9191

9292
if (l<0)
9393
report_invalid_encoding(PG_EUC_JIS_2004,
@@ -238,7 +238,7 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
238238
continue;
239239
}
240240

241-
l=pg_encoding_verifymb(PG_SHIFT_JIS_2004, (constchar*)sjis,len);
241+
l=pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (constchar*)sjis,len);
242242

243243
if (l<0||l>len)
244244
report_invalid_encoding(PG_SHIFT_JIS_2004,

‎src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
291291
len--;
292292
continue;
293293
}
294-
l=pg_encoding_verifymb(PG_MULE_INTERNAL, (constchar*)mic,len);
294+
l=pg_encoding_verifymbchar(PG_MULE_INTERNAL, (constchar*)mic,len);
295295
if (l<0)
296296
report_invalid_encoding(PG_MULE_INTERNAL,
297297
(constchar*)mic,len);
@@ -381,7 +381,7 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
381381
len--;
382382
continue;
383383
}
384-
l=pg_encoding_verifymb(PG_EUC_JP, (constchar*)euc,len);
384+
l=pg_encoding_verifymbchar(PG_EUC_JP, (constchar*)euc,len);
385385
if (l<0)
386386
report_invalid_encoding(PG_EUC_JP,
387387
(constchar*)euc,len);
@@ -431,7 +431,7 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
431431
len--;
432432
continue;
433433
}
434-
l=pg_encoding_verifymb(PG_MULE_INTERNAL, (constchar*)mic,len);
434+
l=pg_encoding_verifymbchar(PG_MULE_INTERNAL, (constchar*)mic,len);
435435
if (l<0)
436436
report_invalid_encoding(PG_MULE_INTERNAL,
437437
(constchar*)mic,len);
@@ -485,7 +485,7 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
485485
len--;
486486
continue;
487487
}
488-
l=pg_encoding_verifymb(PG_EUC_JP, (constchar*)euc,len);
488+
l=pg_encoding_verifymbchar(PG_EUC_JP, (constchar*)euc,len);
489489
if (l<0)
490490
report_invalid_encoding(PG_EUC_JP,
491491
(constchar*)euc,len);
@@ -580,7 +580,7 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
580580
len--;
581581
continue;
582582
}
583-
l=pg_encoding_verifymb(PG_SJIS, (constchar*)sjis,len);
583+
l=pg_encoding_verifymbchar(PG_SJIS, (constchar*)sjis,len);
584584
if (l<0)
585585
report_invalid_encoding(PG_SJIS,
586586
(constchar*)sjis,len);

‎src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
7676
c1=*euc;
7777
if (IS_HIGHBIT_SET(c1))
7878
{
79-
l=pg_encoding_verifymb(PG_EUC_KR, (constchar*)euc,len);
79+
l=pg_encoding_verifymbchar(PG_EUC_KR, (constchar*)euc,len);
8080
if (l!=2)
8181
report_invalid_encoding(PG_EUC_KR,
8282
(constchar*)euc,len);
@@ -122,7 +122,7 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
122122
len--;
123123
continue;
124124
}
125-
l=pg_encoding_verifymb(PG_MULE_INTERNAL, (constchar*)mic,len);
125+
l=pg_encoding_verifymbchar(PG_MULE_INTERNAL, (constchar*)mic,len);
126126
if (l<0)
127127
report_invalid_encoding(PG_MULE_INTERNAL,
128128
(constchar*)mic,len);

‎src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
148148
c1=*euc;
149149
if (IS_HIGHBIT_SET(c1))
150150
{
151-
l=pg_encoding_verifymb(PG_EUC_TW, (constchar*)euc,len);
151+
l=pg_encoding_verifymbchar(PG_EUC_TW, (constchar*)euc,len);
152152
if (l<0)
153153
report_invalid_encoding(PG_EUC_TW,
154154
(constchar*)euc,len);
@@ -213,7 +213,7 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
213213
len--;
214214
continue;
215215
}
216-
l=pg_encoding_verifymb(PG_MULE_INTERNAL, (constchar*)mic,len);
216+
l=pg_encoding_verifymbchar(PG_MULE_INTERNAL, (constchar*)mic,len);
217217
if (l<0)
218218
report_invalid_encoding(PG_MULE_INTERNAL,
219219
(constchar*)mic,len);
@@ -272,7 +272,7 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
272272
len--;
273273
continue;
274274
}
275-
l=pg_encoding_verifymb(PG_BIG5, (constchar*)big5,len);
275+
l=pg_encoding_verifymbchar(PG_BIG5, (constchar*)big5,len);
276276
if (l<0)
277277
report_invalid_encoding(PG_BIG5,
278278
(constchar*)big5,len);
@@ -321,7 +321,7 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
321321
len--;
322322
continue;
323323
}
324-
l=pg_encoding_verifymb(PG_MULE_INTERNAL, (constchar*)mic,len);
324+
l=pg_encoding_verifymbchar(PG_MULE_INTERNAL, (constchar*)mic,len);
325325
if (l<0)
326326
report_invalid_encoding(PG_MULE_INTERNAL,
327327
(constchar*)mic,len);

‎src/backend/utils/mb/mbutils.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ pg_convert(PG_FUNCTION_ARGS)
519519
/* make sure that source string is valid */
520520
len=VARSIZE_ANY_EXHDR(string);
521521
src_str=VARDATA_ANY(string);
522-
pg_verify_mbstr_len(src_encoding,src_str,len, false);
522+
(void)pg_verify_mbstr(src_encoding,src_str,len, false);
523523

524524
/* perform conversion */
525525
dest_str= (char*)pg_do_encoding_conversion((unsignedchar*)unconstify(char*,src_str),
@@ -1215,10 +1215,10 @@ static bool
12151215
pg_generic_charinc(unsignedchar*charptr,intlen)
12161216
{
12171217
unsignedchar*lastbyte=charptr+len-1;
1218-
mbverifiermbverify;
1218+
mbchar_verifiermbverify;
12191219

12201220
/* We can just invoke the character verifier directly. */
1221-
mbverify=pg_wchar_table[GetDatabaseEncoding()].mbverify;
1221+
mbverify=pg_wchar_table[GetDatabaseEncoding()].mbverifychar;
12221222

12231223
while (*lastbyte< (unsignedchar)255)
12241224
{
@@ -1445,8 +1445,7 @@ pg_database_encoding_max_length(void)
14451445
bool
14461446
pg_verifymbstr(constchar*mbstr,intlen,boolnoError)
14471447
{
1448-
return
1449-
pg_verify_mbstr_len(GetDatabaseEncoding(),mbstr,len,noError) >=0;
1448+
returnpg_verify_mbstr(GetDatabaseEncoding(),mbstr,len,noError);
14501449
}
14511450

14521451
/*
@@ -1456,7 +1455,18 @@ pg_verifymbstr(const char *mbstr, int len, bool noError)
14561455
bool
14571456
pg_verify_mbstr(intencoding,constchar*mbstr,intlen,boolnoError)
14581457
{
1459-
returnpg_verify_mbstr_len(encoding,mbstr,len,noError) >=0;
1458+
intoklen;
1459+
1460+
Assert(PG_VALID_ENCODING(encoding));
1461+
1462+
oklen=pg_wchar_table[encoding].mbverifystr((constunsignedchar*)mbstr,len);
1463+
if (oklen!=len)
1464+
{
1465+
if (noError)
1466+
return false;
1467+
report_invalid_encoding(encoding,mbstr+oklen,len-oklen);
1468+
}
1469+
return true;
14601470
}
14611471

14621472
/*
@@ -1469,11 +1479,14 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
14691479
* If OK, return length of string in the encoding.
14701480
* If a problem is found, return -1 when noError is
14711481
* true; when noError is false, ereport() a descriptive message.
1482+
*
1483+
* Note: We cannot use the faster encoding-specific mbverifystr() function
1484+
* here, because we need to count the number of characters in the string.
14721485
*/
14731486
int
14741487
pg_verify_mbstr_len(intencoding,constchar*mbstr,intlen,boolnoError)
14751488
{
1476-
mbverifiermbverify;
1489+
mbchar_verifiermbverifychar;
14771490
intmb_len;
14781491

14791492
Assert(PG_VALID_ENCODING(encoding));
@@ -1493,7 +1506,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
14931506
}
14941507

14951508
/* fetch function pointer just once */
1496-
mbverify=pg_wchar_table[encoding].mbverify;
1509+
mbverifychar=pg_wchar_table[encoding].mbverifychar;
14971510

14981511
mb_len=0;
14991512

@@ -1516,7 +1529,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
15161529
report_invalid_encoding(encoding,mbstr,len);
15171530
}
15181531

1519-
l= (*mbverify) ((constunsignedchar*)mbstr,len);
1532+
l= (*mbverifychar) ((constunsignedchar*)mbstr,len);
15201533

15211534
if (l<0)
15221535
{

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp