@@ -74,42 +74,6 @@ def fetch(f):
74
74
sys .stderr .write ("cannot load %s" % f )
75
75
exit (1 )
76
76
77
- def load_gencats (f ):
78
- fetch (f )
79
- gencats = {}
80
-
81
- udict = {};
82
- range_start = - 1 ;
83
- for line in fileinput .input (f ):
84
- data = line .split (';' );
85
- if len (data )!= 15 :
86
- continue
87
- cp = int (data [0 ],16 );
88
- if is_surrogate (cp ):
89
- continue
90
- if range_start >= 0 :
91
- for i in range (range_start ,cp ):
92
- udict [i ]= data ;
93
- range_start = - 1 ;
94
- if data [1 ].endswith (", First>" ):
95
- range_start = cp ;
96
- continue ;
97
- udict [cp ]= data ;
98
-
99
- for code in udict :
100
- [code_org ,name ,gencat ,combine ,bidi ,
101
- decomp ,deci ,digit ,num ,mirror ,
102
- old ,iso ,upcase ,lowcase ,titlecase ]= udict [code ];
103
-
104
- # place letter in categories as appropriate
105
- for cat in [gencat ,"Assigned" ]+ expanded_categories .get (gencat , []):
106
- if cat not in gencats :
107
- gencats [cat ]= []
108
- gencats [cat ].append (code )
109
-
110
- gencats = group_cats (gencats )
111
- return gencats
112
-
113
77
def group_cats (cats ):
114
78
cats_out = {}
115
79
for cat in cats :
@@ -230,36 +194,6 @@ def emit_util_mod(f):
230
194
}).is_ok()
231
195
}
232
196
233
- #[inline]
234
- fn is_alphabetic(c: char) -> bool {
235
- if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
236
- c.is_alphabetic()
237
- } else {
238
- match c {
239
- 'a' ..= 'z' | 'A' ..= 'Z' => true,
240
- c if c > '\\ x7f' => super::derived_property::Alphabetic(c),
241
- _ => false,
242
- }
243
- }
244
- }
245
-
246
- #[inline]
247
- fn is_numeric(c: char) -> bool {
248
- if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
249
- c.is_numeric()
250
- } else {
251
- match c {
252
- '0' ..= '9' => true,
253
- c if c > '\\ x7f' => super::general_category::N(c),
254
- _ => false,
255
- }
256
- }
257
- }
258
-
259
- #[inline]
260
- pub fn is_alphanumeric(c: char) -> bool {
261
- is_alphabetic(c) || is_numeric(c)
262
- }
263
197
}
264
198
265
199
""" )
@@ -396,20 +330,13 @@ def emit_break_module(f, break_table, break_cats, name):
396
330
/// The version of [Unicode](http://www.unicode.org/)
397
331
/// that this version of unicode-segmentation is based on.
398
332
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
399
- """ % UNICODE_VERSION )
400
-
401
- rf .write ("""
402
- const UNICODE_VERSION_U8: (u8, u8, u8) = (%s, %s, %s);
403
333
""" % UNICODE_VERSION )
404
334
405
335
# download and parse all the data
406
- gencats = load_gencats ("UnicodeData.txt" )
407
- derived = load_properties ("DerivedCoreProperties.txt" , ["Alphabetic" , ("InCB" ,"Consonant" ), ("InCB" ,"Extend" ), ("InCB" ,"Linker" )])
336
+ derived = load_properties ("DerivedCoreProperties.txt" , [("InCB" ,"Consonant" ), ("InCB" ,"Extend" ), ("InCB" ,"Linker" )])
408
337
409
338
emit_util_mod (rf )
410
- for (name ,cat ,pfuns )in ("general_category" ,gencats , ["N" ]), \
411
- ("derived_property" ,derived , ["Alphabetic" , ("InCB" ,"Extend" )]):
412
- emit_property_module (rf ,name ,cat ,pfuns )
339
+ emit_property_module (rf ,"derived_property" ,derived , [("InCB" ,"Extend" )])
413
340
414
341
rf .write ("""pub fn is_incb_linker(c: char) -> bool {
415
342
matches!(c,""" )