@@ -249,11 +249,9 @@ typedef struct TParser
249249/* string and position information */
250250char * str ;/* multibyte string */
251251int lenstr ;/* length of mbstring */
252- #ifdef USE_WIDE_UPPER_LOWER
253252wchar_t * wstr ;/* wide character string */
254253pg_wchar * pgwstr ;/* wide character string for C-locale */
255254bool usewide ;
256- #endif
257255
258256/* State of parse */
259257int charmaxlen ;
@@ -302,8 +300,6 @@ TParserInit(char *str, int len)
302300prs -> str = str ;
303301prs -> lenstr = len ;
304302
305- #ifdef USE_WIDE_UPPER_LOWER
306-
307303/*
308304 * Use wide char code only when max encoding length > 1.
309305 */
@@ -331,7 +327,6 @@ TParserInit(char *str, int len)
331327}
332328else
333329prs -> usewide = false;
334- #endif
335330
336331prs -> state = newTParserPosition (NULL );
337332prs -> state -> state = TPS_Base ;
@@ -368,15 +363,12 @@ TParserCopyInit(const TParser *orig)
368363prs -> charmaxlen = orig -> charmaxlen ;
369364prs -> str = orig -> str + orig -> state -> posbyte ;
370365prs -> lenstr = orig -> lenstr - orig -> state -> posbyte ;
371-
372- #ifdef USE_WIDE_UPPER_LOWER
373366prs -> usewide = orig -> usewide ;
374367
375368if (orig -> pgwstr )
376369prs -> pgwstr = orig -> pgwstr + orig -> state -> poschar ;
377370if (orig -> wstr )
378371prs -> wstr = orig -> wstr + orig -> state -> poschar ;
379- #endif
380372
381373prs -> state = newTParserPosition (NULL );
382374prs -> state -> state = TPS_Base ;
@@ -401,12 +393,10 @@ TParserClose(TParser *prs)
401393prs -> state = ptr ;
402394}
403395
404- #ifdef USE_WIDE_UPPER_LOWER
405396if (prs -> wstr )
406397pfree (prs -> wstr );
407398if (prs -> pgwstr )
408399pfree (prs -> pgwstr );
409- #endif
410400
411401#ifdef WPARSER_TRACE
412402fprintf (stderr ,"closing parser\n" );
@@ -445,96 +435,45 @@ TParserCopyClose(TParser *prs)
445435 *- if locale is C then we use pgwstr instead of wstr.
446436 */
447437
448- #ifdef USE_WIDE_UPPER_LOWER
449-
450- #define p_iswhat (type )\
438+ #define p_iswhat (type ,nonascii )\
439+ \
451440static int\
452- p_is##type(TParser *prs) {\
453- Assert( prs->state );\
454- if ( prs->usewide )\
441+ p_is##type(TParser *prs)\
442+ {\
443+ Assert(prs->state);\
444+ if (prs->usewide)\
455445{\
456- if ( prs->pgwstr )\
446+ if (prs->pgwstr)\
457447{\
458448unsigned int c = *(prs->pgwstr + prs->state->poschar);\
459- if ( c > 0x7f )\
460- return0; \
461- return is##type( c ); \
449+ if (c > 0x7f)\
450+ returnnonascii; \
451+ return is##type(c); \
462452}\
463- return isw##type( *( prs->wstr + prs->state->poschar ) ); \
453+ return isw##type(*( prs->wstr + prs->state->poschar)); \
464454}\
465- \
466- return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
467- }\
455+ return is##type(*(unsigned char *) (prs->str + prs->state->posbyte));\
456+ }\
468457\
469458static int\
470- p_isnot##type(TParser *prs) {\
459+ p_isnot##type(TParser *prs)\
460+ {\
471461return !p_is##type(prs);\
472462}
473463
474- static int
475- p_isalnum (TParser * prs )
476- {
477- Assert (prs -> state );
478-
479- if (prs -> usewide )
480- {
481- if (prs -> pgwstr )
482- {
483- unsignedint c = * (prs -> pgwstr + prs -> state -> poschar );
484-
485- /*
486- * any non-ascii symbol with multibyte encoding with C-locale is
487- * an alpha character
488- */
489- if (c > 0x7f )
490- return 1 ;
491-
492- return isalnum (c );
493- }
494-
495- return iswalnum (* (prs -> wstr + prs -> state -> poschar ));
496- }
497-
498- return isalnum (* (unsignedchar * ) (prs -> str + prs -> state -> posbyte ));
499- }
500- static int
501- p_isnotalnum (TParser * prs )
502- {
503- return !p_isalnum (prs );
504- }
505-
506- static int
507- p_isalpha (TParser * prs )
508- {
509- Assert (prs -> state );
510-
511- if (prs -> usewide )
512- {
513- if (prs -> pgwstr )
514- {
515- unsignedint c = * (prs -> pgwstr + prs -> state -> poschar );
516-
517- /*
518- * any non-ascii symbol with multibyte encoding with C-locale is
519- * an alpha character
520- */
521- if (c > 0x7f )
522- return 1 ;
523-
524- return isalpha (c );
525- }
526-
527- return iswalpha (* (prs -> wstr + prs -> state -> poschar ));
528- }
529-
530- return isalpha (* (unsignedchar * ) (prs -> str + prs -> state -> posbyte ));
531- }
532-
533- static int
534- p_isnotalpha (TParser * prs )
535- {
536- return !p_isalpha (prs );
537- }
464+ /*
465+ * In C locale with a multibyte encoding, any non-ASCII symbol is considered
466+ * an alpha character, but not a member of other char classes.
467+ */
468+ p_iswhat (alnum ,1 )
469+ p_iswhat (alpha ,1 )
470+ p_iswhat (digit ,0 )
471+ p_iswhat (lower ,0 )
472+ p_iswhat (print ,0 )
473+ p_iswhat (punct ,0 )
474+ p_iswhat (space ,0 )
475+ p_iswhat (upper ,0 )
476+ p_iswhat (xdigit ,0 )
538477
539478/* p_iseq should be used only for ascii symbols */
540479
@@ -544,39 +483,6 @@ p_iseq(TParser *prs, char c)
544483Assert (prs -> state );
545484return ((prs -> state -> charlen == 1 && * (prs -> str + prs -> state -> posbyte )== c )) ?1 :0 ;
546485}
547- #else /* USE_WIDE_UPPER_LOWER */
548-
549- #define p_iswhat (type )\
550- static int\
551- p_is##type(TParser *prs) {\
552- Assert( prs->state );\
553- return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) );\
554- }\
555- \
556- static int\
557- p_isnot##type(TParser *prs) {\
558- return !p_is##type(prs);\
559- }
560-
561-
562- static int
563- p_iseq (TParser * prs ,char c )
564- {
565- Assert (prs -> state );
566- return (* (prs -> str + prs -> state -> posbyte )== c ) ?1 :0 ;
567- }
568-
569- p_iswhat (alnum )
570- p_iswhat (alpha )
571- #endif /* USE_WIDE_UPPER_LOWER */
572-
573- p_iswhat (digit )
574- p_iswhat (lower )
575- p_iswhat (print )
576- p_iswhat (punct )
577- p_iswhat (space )
578- p_iswhat (upper )
579- p_iswhat (xdigit )
580486
581487static int
582488p_isEOF (TParser * prs )
@@ -793,8 +699,6 @@ p_isspecial(TParser *prs)
793699if (pg_dsplen (prs -> str + prs -> state -> posbyte )== 0 )
794700return 1 ;
795701
796- #ifdef USE_WIDE_UPPER_LOWER
797-
798702/*
799703 * Unicode Characters in the 'Mark, Spacing Combining' Category That
800704 * characters are not alpha although they are not breakers of word too.
@@ -1058,7 +962,6 @@ p_isspecial(TParser *prs)
1058962StopHigh = StopMiddle ;
1059963}
1060964}
1061- #endif
1062965
1063966return 0 ;
1064967}