11#include <stdio.h>
22#include <stdlib.h>
33#include <string.h>
4- #include <ctype.h>
54
65#include "regis.h"
6+ #include "ts_locale.h"
77#include "common.h"
88
9- int
9+ bool
1010RS_isRegis (const char * str )
1111{
1212unsignedchar * ptr = (unsignedchar * )str ;
1313
1414while (ptr && * ptr )
15- if (isalpha ( * ptr )|| * ptr == '[' || * ptr == ']' || * ptr == '^' )
16- ptr ++ ;
15+ if (t_isalpha ( ptr )|| t_iseq ( ptr , '[' ) || t_iseq ( ptr , ']' ) || t_iseq ( ptr , '^' ) )
16+ ptr += pg_mblen ( ptr ) ;
1717else
18- return 0 ;
19- return 1 ;
18+ return false;
19+
20+ return true;
2021}
2122
2223#define RS_IN_ONEOF 1
@@ -38,34 +39,32 @@ newRegisNode(RegisNode * prev, int len)
3839return ptr ;
3940}
4041
41- int
42- RS_compile (Regis * r ,int issuffix ,const char * str )
42+ void
43+ RS_compile (Regis * r ,bool issuffix ,char * str )
4344{
44- int i ,
45- len = strlen (str );
45+ int len = strlen (str );
4646int state = RS_IN_WAIT ;
47+ char * c = (char * )str ;
4748RegisNode * ptr = NULL ;
4849
4950memset (r ,0 ,sizeof (Regis ));
5051r -> issuffix = (issuffix ) ?1 :0 ;
5152
52- for ( i = 0 ; i < len ; i ++ )
53+ while ( * c )
5354{
54- unsignedchar c = * (((unsignedchar * )str )+ i );
55-
5655if (state == RS_IN_WAIT )
5756{
58- if (isalpha (c ))
57+ if (t_isalpha (c ))
5958{
6059if (ptr )
6160ptr = newRegisNode (ptr ,len );
6261else
6362ptr = r -> node = newRegisNode (NULL ,len );
64- ptr -> data [ 0 ] = c ;
63+ COPYCHAR ( ptr -> data , c ) ;
6564ptr -> type = RSF_ONEOF ;
66- ptr -> len = 1 ;
65+ ptr -> len = pg_mblen ( c ) ;
6766}
68- else if (c == '[' )
67+ else if (t_iseq ( c , '[' ) )
6968{
7069if (ptr )
7170ptr = newRegisNode (ptr ,len );
@@ -75,38 +74,39 @@ RS_compile(Regis * r, int issuffix, const char *str)
7574state = RS_IN_ONEOF ;
7675}
7776else
78- ts_error (ERROR ,"Error in regis: %s at pos %d\n " ,str , i + 1 );
77+ ts_error (ERROR ,"Error in regis: %s" ,str );
7978}
8079else if (state == RS_IN_ONEOF )
8180{
82- if (c == '^' )
81+ if (t_iseq ( c , '^' ) )
8382{
8483ptr -> type = RSF_NONEOF ;
8584state = RS_IN_NONEOF ;
8685}
87- else if (isalpha (c ))
86+ else if (t_isalpha (c ))
8887{
89- ptr -> data [ 0 ] = c ;
90- ptr -> len = 1 ;
88+ COPYCHAR ( ptr -> data , c ) ;
89+ ptr -> len = pg_mblen ( c ) ;
9190state = RS_IN_ONEOF_IN ;
9291}
9392else
94- ts_error (ERROR ,"Error in regis: %s at pos %d\n " ,str , i + 1 );
93+ ts_error (ERROR ,"Error in regis: %s" ,str );
9594}
9695else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF )
9796{
98- if (isalpha (c ))
97+ if (t_isalpha (c ))
9998{
100- ptr -> data [ ptr -> len ] = c ;
101- ptr -> len ++ ;
99+ COPYCHAR ( ptr -> data + ptr -> len , c ) ;
100+ ptr -> len += pg_mblen ( c ) ;
102101}
103- else if (c == ']' )
102+ else if (t_iseq ( c , ']' ) )
104103state = RS_IN_WAIT ;
105104else
106- ts_error (ERROR ,"Error in regis: %s at pos %d\n " ,str , i + 1 );
105+ ts_error (ERROR ,"Error in regis: %s" ,str );
107106}
108107else
109- ts_error (ERROR ,"Internal error in RS_compile: %d\n" ,state );
108+ ts_error (ERROR ,"Internal error in RS_compile: %d" ,state );
109+ c += pg_mblen (c );
110110}
111111
112112ptr = r -> node ;
@@ -115,8 +115,6 @@ RS_compile(Regis * r, int issuffix, const char *str)
115115r -> nchar ++ ;
116116ptr = ptr -> next ;
117117}
118-
119- return 0 ;
120118}
121119
122120void
@@ -135,51 +133,77 @@ RS_free(Regis * r)
135133r -> node = NULL ;
136134}
137135
138- int
139- RS_execute (Regis * r ,const char * str ,int len )
136+ #ifdef TS_USE_WIDE
137+ static bool
138+ mb_strchr (char * str ,char * c ) {
139+ int clen = pg_mblen (c ),plen ,i ;
140+ char * ptr = str ;
141+ bool res = false;
142+
143+ clen = pg_mblen (c );
144+ while (* ptr && !res ) {
145+ plen = pg_mblen (ptr );
146+ if (plen == clen ) {
147+ i = plen ;
148+ res = true;
149+ while (i -- )
150+ if (* (ptr + i )!= * (c + i ) ) {
151+ res = false;
152+ break ;
153+ }
154+ }
155+
156+ ptr += plen ;
157+ }
158+
159+ return res ;
160+ }
161+ #else
162+ #define mb_strchr (s ,c )( (strchr((s),*(c)) == NULL) ? false : true )
163+ #endif
164+
165+
166+ bool
167+ RS_execute (Regis * r ,char * str )
140168{
141169RegisNode * ptr = r -> node ;
142- unsignedchar * c ;
170+ char * c = str ;
171+ int len = 0 ;
143172
144- if (len < 0 )
145- len = strlen (str );
173+ while (* c ) {
174+ len ++ ;
175+ c += pg_mblen (c );
176+ }
146177
147178if (len < r -> nchar )
148179return 0 ;
149180
150- if (r -> issuffix )
151- c = ((unsignedchar * )str )+ len - r -> nchar ;
152- else
153- c = (unsignedchar * )str ;
181+ c = str ;
182+ if (r -> issuffix ) {
183+ len -= r -> nchar ;
184+ while (len -- > 0 )
185+ c += pg_mblen (c );
186+ }
187+
154188
155189while (ptr )
156190{
157191switch (ptr -> type )
158192{
159193case RSF_ONEOF :
160- if (ptr -> len == 0 )
161- {
162- if (* c != * (ptr -> data ))
163- return 0 ;
164- }
165- else if (strchr ((char * )ptr -> data ,* c )== NULL )
166- return 0 ;
194+ if (mb_strchr ((char * )ptr -> data ,c )!= true )
195+ return false;
167196break ;
168197case RSF_NONEOF :
169- if (ptr -> len == 0 )
170- {
171- if (* c == * (ptr -> data ))
172- return 0 ;
173- }
174- else if (strchr ((char * )ptr -> data ,* c )!= NULL )
175- return 0 ;
198+ if (mb_strchr ((char * )ptr -> data ,c )== true )
199+ return false;
176200break ;
177201default :
178202ts_error (ERROR ,"RS_execute: Unknown type node: %d\n" ,ptr -> type );
179203}
180204ptr = ptr -> next ;
181- c ++ ;
205+ c += pg_mblen ( c ) ;
182206}
183207
184- return 1 ;
208+ return true ;
185209}