Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commited87e19

Browse files
committed
Mop-up for commit85feb77.
Adjust commentary in regc_pg_locale.c to remove mention of the possibilityof not having <wctype.h> functions, since we no longer consider that.Eliminate duplicate code in wparser_def.c by generalizing the p_iswhatmacro to take a parameter saying what to return for non-ASCII charsin C locale. (That's not really a consequence of theUSE_WIDE_UPPER_LOWER-ectomy, but I noticed it while doing that.)
1 parent85feb77 commited87e19

File tree

2 files changed

+40
-97
lines changed

2 files changed

+40
-97
lines changed

‎src/backend/regex/regc_pg_locale.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,20 @@
2929
*
3030
* 2. In the "default" collation (which is supposed to obey LC_CTYPE):
3131
*
32-
* 2a. When working in UTF8 encoding, we use the <wctype.h> functions if
33-
*available.This assumes that every platform uses Unicode codepoints
34-
*directlyas the wchar_t representation of Unicode. On some platforms
32+
* 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
33+
* This assumes that every platform uses Unicode codepoints directly
34+
* as the wchar_t representation of Unicode. On some platforms
3535
* wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
3636
*
37-
* 2b. In all other encodings,or on machines that lack <wctype.h>, we use
38-
*the <ctype.h> functions for pg_wcharvalues up to 255, and punt for values
39-
*above that. This isonly100% correctin single-byte encodings such as
40-
*LATINn. However, non-Unicodemultibyte encodings are mostly Far Eastern
41-
*character sets for which theproperties being tested here aren't very
42-
*relevant for higher code valuesanyway. The difficulty with using the
43-
*<wctype.h> functions withnon-Unicode multibyte encodings is that we can
44-
*have no certainty thatthe platform's wchar_t representation matches
45-
*what we do in pg_wcharconversions.
37+
* 2b. In all other encodings,we use the <ctype.h> functions for pg_wchar
38+
* values up to 255, and punt for values above that. This is 100% correct
39+
* only in single-byte encodings such as LATINn. However, non-Unicode
40+
* multibyte encodings are mostly Far Eastern character sets for which the
41+
* properties being tested here aren't very relevant for higher code values
42+
* anyway. The difficulty with using the <wctype.h> functions with
43+
* non-Unicode multibyte encodings is that we can have no certainty that
44+
* the platform's wchar_t representation matches what we do in pg_wchar
45+
* conversions.
4646
*
4747
* 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
4848
* Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>

‎src/backend/tsearch/wparser_def.c

Lines changed: 28 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -427,94 +427,45 @@ TParserCopyClose(TParser *prs)
427427
*- if locale is C then we use pgwstr instead of wstr.
428428
*/
429429

430-
#definep_iswhat(type)\
430+
#definep_iswhat(type,nonascii)\
431+
\
431432
static int\
432-
p_is##type(TParser *prs) {\
433-
Assert( prs->state );\
434-
if ( prs->usewide )\
433+
p_is##type(TParser *prs)\
434+
{\
435+
Assert(prs->state);\
436+
if (prs->usewide)\
435437
{\
436-
if (prs->pgwstr)\
438+
if (prs->pgwstr)\
437439
{\
438440
unsigned int c = *(prs->pgwstr + prs->state->poschar);\
439-
if (c > 0x7f)\
440-
return0;\
441-
return is##type( c );\
441+
if (c > 0x7f)\
442+
returnnonascii;\
443+
return is##type(c);\
442444
}\
443-
return isw##type( *(prs->wstr + prs->state->poschar ) );\
445+
return isw##type(*(prs->wstr + prs->state->poschar));\
444446
}\
445-
\
446-
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
447-
}\
447+
return is##type(*(unsigned char *) (prs->str + prs->state->posbyte));\
448+
}\
448449
\
449450
static int\
450-
p_isnot##type(TParser *prs) {\
451+
p_isnot##type(TParser *prs)\
452+
{\
451453
return !p_is##type(prs);\
452454
}
453455

454-
staticint
455-
p_isalnum(TParser*prs)
456-
{
457-
Assert(prs->state);
458-
459-
if (prs->usewide)
460-
{
461-
if (prs->pgwstr)
462-
{
463-
unsignedintc=*(prs->pgwstr+prs->state->poschar);
464-
465-
/*
466-
* any non-ascii symbol with multibyte encoding with C-locale is
467-
* an alpha character
468-
*/
469-
if (c>0x7f)
470-
return1;
471-
472-
returnisalnum(c);
473-
}
474-
475-
returniswalnum(*(prs->wstr+prs->state->poschar));
476-
}
477-
478-
returnisalnum(*(unsignedchar*) (prs->str+prs->state->posbyte));
479-
}
480-
staticint
481-
p_isnotalnum(TParser*prs)
482-
{
483-
return !p_isalnum(prs);
484-
}
485-
486-
staticint
487-
p_isalpha(TParser*prs)
488-
{
489-
Assert(prs->state);
490-
491-
if (prs->usewide)
492-
{
493-
if (prs->pgwstr)
494-
{
495-
unsignedintc=*(prs->pgwstr+prs->state->poschar);
496-
497-
/*
498-
* any non-ascii symbol with multibyte encoding with C-locale is
499-
* an alpha character
500-
*/
501-
if (c>0x7f)
502-
return1;
503-
504-
returnisalpha(c);
505-
}
506-
507-
returniswalpha(*(prs->wstr+prs->state->poschar));
508-
}
509-
510-
returnisalpha(*(unsignedchar*) (prs->str+prs->state->posbyte));
511-
}
512-
513-
staticint
514-
p_isnotalpha(TParser*prs)
515-
{
516-
return !p_isalpha(prs);
517-
}
456+
/*
457+
* In C locale with a multibyte encoding, any non-ASCII symbol is considered
458+
* an alpha character, but not a member of other char classes.
459+
*/
460+
p_iswhat(alnum,1)
461+
p_iswhat(alpha,1)
462+
p_iswhat(digit,0)
463+
p_iswhat(lower,0)
464+
p_iswhat(print,0)
465+
p_iswhat(punct,0)
466+
p_iswhat(space,0)
467+
p_iswhat(upper,0)
468+
p_iswhat(xdigit,0)
518469

519470
/* p_iseq should be used only for ascii symbols */
520471

@@ -525,14 +476,6 @@ p_iseq(TParser *prs, char c)
525476
return ((prs->state->charlen==1&&*(prs->str+prs->state->posbyte)==c)) ?1 :0;
526477
}
527478

528-
p_iswhat(digit)
529-
p_iswhat(lower)
530-
p_iswhat(print)
531-
p_iswhat(punct)
532-
p_iswhat(space)
533-
p_iswhat(upper)
534-
p_iswhat(xdigit)
535-
536479
staticint
537480
p_isEOF(TParser*prs)
538481
{

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp