3535// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
3636
3737#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
38+
39+ use super::ScriptExtension;
3840'''
3941
4042UNICODE_VERSION = (12 ,0 ,0 )
@@ -183,44 +185,69 @@ def emit_search(f):
183185}
184186""" )
185187
186- def emit_enums (f ,script_list ,extension_list ,longforms , intersections ):
188+ def emit_enums (f ,script_list ,extension_list ,longforms ):
187189"""
188190 Emit the Script and ScriptExtension enums as well as any related utility functions
189191 """
192+
190193f .write ("""
191194#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
192195#[non_exhaustive]
193196#[allow(non_camel_case_types)]
197+ #[repr(u8)]
194198/// A value of the `Script` property
195199pub enum Script {
196200 /// Unknown script
197- Unknown,
201+ Unknown = 0xFF,
202+ /// Zyyy
203+ Common = 0xFE,
204+ /// Zinh,
205+ Inherited = 0xFD,
198206""" )
199- for script in script_list :
200- f .write (" /// %s\n %s,\n " % (script ,longforms [script ]))
201- f .write ("""}
202- #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
203- #[non_exhaustive]
204- /// A value for the `Script_Extension` property
205- ///
206- /// [`ScriptExtension`] is one or more [`Script`]
207- ///
208- /// This is essentially an optimized version of `Vec<Script>`,
209- /// optimized by script sets and intersections actually present in Unicode.
210- pub enum ScriptExtension {
211- /// A single script
212- Single(Script),
207+ for (i ,script )in enumerate (script_list ):
208+ f .write (" /// %s\n %s = %s,\n " % (script ,longforms [script ],i ))
209+ f .write ("}\n " )
210+ f .write ("pub const NEXT_SCRIPT: u8 = %s;" % len (script_list ))
211+ f .write ("""
212+
213+ pub mod script_extensions {
214+ use crate::ScriptExtension;
215+ pub const COMMON: ScriptExtension = ScriptExtension::new_common();
216+ pub const INHERITED: ScriptExtension = ScriptExtension::new_inherited();
217+ pub const UNKNOWN: ScriptExtension = ScriptExtension::new_unknown();
213218""" )
219+ for (i ,script )in enumerate (script_list ):
220+ first = 0
221+ second = 0
222+ third = 0
223+ # need to replace L because `hex()` will spit out an L suffix for larger numbers
224+ if i < 64 :
225+ first = hex (1 << i ).replace ("L" ,"" )
226+ elif i < 128 :
227+ second = hex (1 << (i - 64 )).replace ("L" ,"" )
228+ else :
229+ third = hex (1 << (i - 128 )).replace ("L" ,"" )
230+ f .write (" /// %s\n pub const %s: ScriptExtension = ScriptExtension::new(%s, %s, %s);\n " %
231+ (longforms [script ],longforms [script ].upper (),first ,second ,third ))
232+ if script != longforms [script ]:
233+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " %
234+ (longforms [script ],script .upper (),longforms [script ].upper ()))
214235for ext in extension_list :
215236longform = ", " .join ([longforms [s ]for s in ext ])
216- f .write (" /// %s\n %s,\n " % (longform ,"" .join (ext )))
237+ name = "_" .join ([s .upper ()for s in ext ])
238+ expr = ext [0 ].upper ()
239+ for e in ext [1 :]:
240+ expr = "%s.union(%s)" % (expr ,e .upper ())
241+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " % (longform ,name ,expr ))
217242f .write ("""}
218243
219244impl Script {
220245 #[inline]
221246 pub(crate) fn inner_full_name(self) -> &'static str {
222247 match self {
223248 Script::Unknown => "Unknown",
249+ Script::Common => "Common",
250+ Script::Inherited => "Inherited",
224251""" )
225252for script in script_list :
226253f .write (" Script::%s =>\" %s\" ,\n " % (longforms [script ],longforms [script ]))
@@ -231,119 +258,29 @@ def emit_enums(f, script_list, extension_list, longforms, intersections):
231258 pub(crate) fn inner_short_name(self) -> &'static str {
232259 match self {
233260 Script::Unknown => "",
261+ Script::Common => "Zyyy",
262+ Script::Inherited => "Zinh",
234263""" )
235264for script in script_list :
236265f .write (" Script::%s =>\" %s\" ,\n " % (longforms [script ],script ))
237266f .write (""" }
238267 }
239- }
240-
241- impl ScriptExtension {
242- #[inline]
243- #[cfg(feature = "with_std")]
244- pub(crate) fn inner_scripts(self) -> Vec<Script> {
245- match self {
246- ScriptExtension::Single(s) => vec![s],
247- """ )
248- for ext in extension_list :
249- scripts = ", " .join (["Script::%s" % longforms [s ]for s in ext ])
250- f .write (" %s => vec![%s],\n " % (extension_name (ext ),scripts ))
251- f .write (""" _ => unreachable!()
252- }
253- }
254-
255- #[inline]
256- pub(crate) fn inner_contains_script(self, other: Script) -> bool {
257- match self {
258- ScriptExtension::Single(s) => s == other,
259- """ )
260- for ext in extension_list :
261- scripts = " || " .join (["other == Script::%s" % longforms [s ]for s in ext ])
262- f .write (" %s => %s,\n " % (extension_name (ext ),scripts ))
263- f .write (""" }
264- }
265268
266269 #[inline]
267- pub(crate) fn inner_intersect(self, other: Self) -> Self {
268- match (self, other) {
269- (ScriptExtension::Single(Script::Unknown), _) |
270- (_, ScriptExtension::Single(Script::Unknown)) => ScriptExtension::Single(Script::Unknown),
271- (a, b) if a == b => a,
272- (ScriptExtension::Single(Script::Common), a) |
273- (ScriptExtension::Single(Script::Inherited), a) |
274- (a, ScriptExtension::Single(Script::Common)) |
275- (a, ScriptExtension::Single(Script::Inherited)) => a,
276- (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) if o.inner_contains_script(s) => ScriptExtension::Single(s),
270+ pub(crate) fn for_integer(value: u8) -> Self {
271+ match value {
277272""" )
278- for (e1 , e2 , i )in intersections :
279- f .write ("(%s, %s) => %s,\n " % (extension_name ( e1 ), extension_name ( e2 ), extension_name ( i ,longforms ) ))
280- f .write (""" _ =>ScriptExtension::Single(Script::Unknown ),
273+ for (i , script )in enumerate ( script_list ) :
274+ f .write ("%s =>Script:: %s,\n " % (i ,longforms [ script ] ))
275+ f .write (""" _ =>unreachable!( ),
281276 }
282277 }
283278}
284279""" )
285280
286-
287- def compute_intersections_elements (extension_list ):
288- """
289- Compute all intersections between the script extensions.
290- This will add new elements to extension_list, be sure to call it first!
291- """
292-
293- # This is the only third-level intersection
294- # It's easier to hardcode things here rather than
295- # do the below calculation in a loop
296- extension_list .append (['Deva' ,'Knda' ,'Tirh' ])
297- intersections = []
298- # Some intersections will not exist in extension_list and we'll need to add them
299- new_elements = []
300- sets = [(e ,set (e ))for e in extension_list ]
301- for (e1 ,s1 )in sets :
302- for (e2 ,s2 )in sets :
303- if e1 == e2 :
304- continue
305- intersection = s1 .intersection (s2 )
306- if len (intersection )> 0 :
307- intersection = [i for i in intersection ]
308- intersection .sort ()
309- if len (intersection )> 1 and intersection not in extension_list and intersection not in new_elements :
310- new_elements .append (intersection )
311- if (e1 ,e2 ,intersection )not in intersections :
312- intersections .append ((e1 ,e2 ,intersection ))
313- extension_list .extend (new_elements )
314-
315- # We now go through the newly added second-level extension values and calculate their intersections
316- # with the original set and each other
317- new_sets = [(e ,set (e ))for e in new_elements ]
318- sets = [(e ,set (e ))for e in extension_list ]
319- for (e1 ,s1 )in new_sets :
320- for (e2 ,s2 )in sets :
321- if e1 == e2 :
322- continue
323- intersection = s1 .intersection (s2 )
324- if len (intersection )> 0 :
325- intersection = [i for i in intersection ]
326- intersection .sort ()
327- if len (intersection )> 1 and intersection not in extension_list :
328- raise "Found new third-level intersection, please hardcode it"
329- # The previous routine would automatically get both versions
330- # of an intersection because it would iterate each pair in both orders,
331- # but here we're working on an asymmetric pair, so we insert both in order to not
332- # miss anything
333- if (e1 ,e2 ,intersection )not in intersections :
334- intersections .append ((e1 ,e2 ,intersection ))
335- if (e2 ,e1 ,intersection )not in intersections :
336- intersections .append ((e2 ,e1 ,intersection ))
337-
338- intersections .sort ()
339- return intersections
340-
341- def extension_name (ext ,longforms = {}):
281+ def extension_name (ext ):
342282"""Get the rust source for a given ScriptExtension"""
343- if len (ext )== 1 :
344- return "ScriptExtension::Single(Script::%s)" % longforms [ext [0 ]]
345- else :
346- return "ScriptExtension::%s" % "" .join (ext )
283+ return "script_extensions::%s" % "_" .join ([e .upper ()for e in ext ])
347284
348285
349286
@@ -370,8 +307,10 @@ def extension_name(ext, longforms={}):
370307script_list = []
371308
372309for script in scripts :
373- script_list .append (shortforms [script ])
310+ if script not in ["Common" ,"Unknown" ,"Inherited" ]:
311+ script_list .append (shortforms [script ])
374312script_table .extend ([(x ,y ,shortforms [script ])for (x ,y )in scripts [script ]])
313+ script_list .sort ()
375314script_table .sort (key = lambda w :w [0 ])
376315
377316
@@ -389,14 +328,13 @@ def extension_name(ext, longforms={}):
389328extension_table .extend ([(x ,y ,output_ext )for (x ,y )in extensions [ext ]])
390329extension_table .sort (key = lambda w :w [0 ])
391330
392- intersections = compute_intersections_elements (extension_list )
393331
394- emit_enums (rf ,script_list ,extension_list ,longforms , intersections )
332+ emit_enums (rf ,script_list ,extension_list ,longforms )
395333emit_search (rf )
396334
397335emit_table (rf ,"SCRIPTS" ,script_table ,t_type = "&'static [(char, char, Script)]" ,
398336is_pub = False ,pfun = lambda x :"(%s,%s, Script::%s)" % (escape_char (x [0 ]),escape_char (x [1 ]),longforms [x [2 ]]))
399337emit_table (rf ,"SCRIPT_EXTENSIONS" ,extension_table ,t_type = "&'static [(char, char, ScriptExtension)]" ,
400- is_pub = False ,pfun = lambda x :"(%s,%s,%s)" % (escape_char (x [0 ]),escape_char (x [1 ]),extension_name (x [2 ], longforms )))
338+ is_pub = False ,pfun = lambda x :"(%s,%s,%s)" % (escape_char (x [0 ]),escape_char (x [1 ]),extension_name (x [2 ])))
401339
402340# emit_table(rf, "FOObar", properties)