Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd3b2e5e

Browse files
committed
Refactor convert_case() to prepare for optimizations.
Upcoming optimizations will add complexity to convert_case(). Thispatch reorganizes slightly so that the complexity can be containedwithin the logic to convert the case of a single character, ratherthan mixing it in with logic to iterate through the string.Reviewed-by: Alexander Borisov <lex.borisov@gmail.com>Discussion:https://postgr.es/m/44005c3d-88f4-4a26-981f-fd82dfa8e313@gmail.com
1 parent3abe9dc commitd3b2e5e

File tree

1 file changed

+101
-52
lines changed

1 file changed

+101
-52
lines changed

‎src/common/unicode_case.c

Lines changed: 101 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,20 @@
2020
#include"common/unicode_category.h"
2121
#include"mb/pg_wchar.h"
2222

23+
enumCaseMapResult
24+
{
25+
CASEMAP_SELF,
26+
CASEMAP_SIMPLE,
27+
CASEMAP_SPECIAL,
28+
};
29+
2330
staticconstpg_case_map*find_case_map(pg_wcharucs);
2431
staticsize_tconvert_case(char*dst,size_tdstsize,constchar*src,ssize_tsrclen,
2532
CaseKindstr_casekind,boolfull,WordBoundaryNextwbnext,
2633
void*wbstate);
27-
staticboolcheck_special_conditions(intconditions,constchar*str,
28-
size_tlen,size_toffset);
34+
staticenumCaseMapResultcasemap(pg_wcharu1,CaseKindcasekind,boolfull,
35+
constchar*src,size_tsrclen,size_tsrcoff,
36+
pg_wchar*u2,constpg_wchar**special);
2937

3038
pg_wchar
3139
unicode_lowercase_simple(pg_wcharcode)
@@ -214,8 +222,9 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
214222
{
215223
pg_wcharu1=utf8_to_unicode((unsignedchar*)src+srcoff);
216224
intu1len=unicode_utf8len(u1);
217-
constpg_case_map*casemap=find_case_map(u1);
218-
constpg_special_case*special=NULL;
225+
pg_wcharsimple=0;
226+
constpg_wchar*special=NULL;
227+
enumCaseMapResultcasemap_result;
219228

220229
if (str_casekind==CaseTitle)
221230
{
@@ -228,56 +237,47 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
228237
chr_casekind=CaseLower;
229238
}
230239

231-
/*
232-
* Find special case that matches the conditions, if any.
233-
*
234-
* Note: only a single special mapping per codepoint is currently
235-
* supported, though Unicode allows for multiple special mappings for
236-
* a single codepoint.
237-
*/
238-
if (full&&casemap&&casemap->special_case)
239-
{
240-
int16conditions=casemap->special_case->conditions;
241-
242-
Assert(casemap->special_case->codepoint==u1);
243-
if (check_special_conditions(conditions,src,srclen,srcoff))
244-
special=casemap->special_case;
245-
}
240+
casemap_result=casemap(u1,chr_casekind,full,src,srclen,srcoff,
241+
&simple,&special);
246242

247-
/* perform mapping, update result_len, and write to dst */
248-
if (special)
243+
switch (casemap_result)
249244
{
250-
for (inti=0;i<MAX_CASE_EXPANSION;i++)
251-
{
252-
pg_wcharu2=special->map[chr_casekind][i];
253-
size_tu2len=unicode_utf8len(u2);
254-
255-
if (u2=='\0')
256-
break;
257-
258-
if (result_len+u2len <=dstsize)
259-
unicode_to_utf8(u2, (unsignedchar*)dst+result_len);
260-
261-
result_len+=u2len;
262-
}
263-
}
264-
elseif (casemap)
265-
{
266-
pg_wcharu2=casemap->simplemap[chr_casekind];
267-
pg_wcharu2len=unicode_utf8len(u2);
268-
269-
if (result_len+u2len <=dstsize)
270-
unicode_to_utf8(u2, (unsignedchar*)dst+result_len);
271-
272-
result_len+=u2len;
273-
}
274-
else
275-
{
276-
/* no mapping; copy bytes from src */
277-
if (result_len+u1len <=dstsize)
278-
memcpy(dst+result_len,src+srcoff,u1len);
279-
280-
result_len+=u1len;
245+
caseCASEMAP_SELF:
246+
/* no mapping; copy bytes from src */
247+
Assert(simple==0);
248+
Assert(special==NULL);
249+
if (result_len+u1len <=dstsize)
250+
memcpy(dst+result_len,src+srcoff,u1len);
251+
252+
result_len+=u1len;
253+
break;
254+
caseCASEMAP_SIMPLE:
255+
{
256+
/* replace with single character */
257+
pg_wcharu2=simple;
258+
pg_wcharu2len=unicode_utf8len(u2);
259+
260+
Assert(special==NULL);
261+
if (result_len+u2len <=dstsize)
262+
unicode_to_utf8(u2, (unsignedchar*)dst+result_len);
263+
264+
result_len+=u2len;
265+
}
266+
break;
267+
caseCASEMAP_SPECIAL:
268+
/* replace with up to MAX_CASE_EXPANSION characters */
269+
Assert(simple==0);
270+
for (inti=0;i<MAX_CASE_EXPANSION&&special[i];i++)
271+
{
272+
pg_wcharu2=special[i];
273+
size_tu2len=unicode_utf8len(u2);
274+
275+
if (result_len+u2len <=dstsize)
276+
unicode_to_utf8(u2, (unsignedchar*)dst+result_len);
277+
278+
result_len+=u2len;
279+
}
280+
break;
281281
}
282282

283283
srcoff+=u1len;
@@ -351,6 +351,10 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)
351351
return true;
352352
}
353353

354+
/*
355+
* Unicode allows for special casing to be applied only under certain
356+
* circumstances. The only currently-supported condition is Final_Sigma.
357+
*/
354358
staticbool
355359
check_special_conditions(intconditions,constchar*str,size_tlen,
356360
size_toffset)
@@ -365,6 +369,51 @@ check_special_conditions(int conditions, const char *str, size_t len,
365369
return false;
366370
}
367371

372+
/*
373+
* Map the given character to the requested case.
374+
*
375+
* If full is true, and a special case mapping is found and the conditions are
376+
* met, 'special' is set to the mapping result (which is an array of up to
377+
* MAX_CASE_EXPANSION characters) and CASEMAP_SPECIAL is returned.
378+
*
379+
* Otherwise, search for a simple mapping, and if found, set 'simple' to the
380+
* result and return CASEMAP_SIMPLE.
381+
*
382+
* If no mapping is found, return CASEMAP_SELF, and the caller should copy the
383+
* character without modification.
384+
*/
385+
staticenumCaseMapResult
386+
casemap(pg_wcharu1,CaseKindcasekind,boolfull,
387+
constchar*src,size_tsrclen,size_tsrcoff,
388+
pg_wchar*simple,constpg_wchar**special)
389+
{
390+
constpg_case_map*map;
391+
392+
if (u1<0x80)
393+
{
394+
*simple=case_map[u1].simplemap[casekind];
395+
396+
returnCASEMAP_SIMPLE;
397+
}
398+
399+
map=find_case_map(u1);
400+
401+
if (map==NULL)
402+
returnCASEMAP_SELF;
403+
404+
if (full&&map->special_case!=NULL&&
405+
check_special_conditions(map->special_case->conditions,
406+
src,srclen,srcoff))
407+
{
408+
*special=map->special_case->map[casekind];
409+
returnCASEMAP_SPECIAL;
410+
}
411+
412+
*simple=map->simplemap[casekind];
413+
414+
returnCASEMAP_SIMPLE;
415+
}
416+
368417
/* find entry in simple case map, if any */
369418
staticconstpg_case_map*
370419
find_case_map(pg_wcharucs)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp