3535// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
3636
3737#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
38+
39+ use super::ScriptExtension;
3840'''
3941
4042UNICODE_VERSION = (12 ,0 ,0 )
@@ -183,182 +185,102 @@ def emit_search(f):
183185}
184186""" )
185187
186- def emit_enums (f ,script_list ,extension_list ,longforms , intersections ):
188+ def emit_enums (f ,script_list ,extension_list ,longforms ):
187189"""
188190 Emit the Script and ScriptExtension enums as well as any related utility functions
189191 """
192+
190193f .write ("""
191- use core::convert::TryFrom;
192194#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
193195#[non_exhaustive]
194196#[allow(non_camel_case_types)]
195- /// A value of the Script property
197+ #[repr(u8)]
198+ /// A value of the `Script` property
196199pub enum Script {
197200 /// Unknown script
198- Unknown,
201+ Unknown = 0xFF,
202+ /// Zyyy
203+ Common = 0xFE,
204+ /// Zinh,
205+ Inherited = 0xFD,
199206""" )
200- for script in script_list :
201- f .write (" /// %s\n %s,\n " % (script ,longforms [script ]))
202- f .write ("""}
203- #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
204- #[non_exhaustive]
205- /// A value for the Script_Extension property
206- ///
207- /// Script_Extension is one or more Script
208- ///
209- /// This is essentially an optimized version of Vec<Script>,
210- /// optimized by script sets and intersections actually present in Unicode.
211- pub enum ScriptExtension {
212- /// A single script
213- Single(Script),
207+ for (i ,script )in enumerate (script_list ):
208+ f .write (" /// %s\n %s = %s,\n " % (script ,longforms [script ],i ))
209+ f .write ("}\n " )
210+ f .write ("pub const NEXT_SCRIPT: u8 = %s;" % len (script_list ))
211+ f .write ("""
212+
213+ pub mod script_extensions {
214+ use crate::ScriptExtension;
215+ pub const COMMON: ScriptExtension = ScriptExtension::new_common();
216+ pub const INHERITED: ScriptExtension = ScriptExtension::new_inherited();
217+ pub const UNKNOWN: ScriptExtension = ScriptExtension::new_unknown();
214218""" )
219+ for (i ,script )in enumerate (script_list ):
220+ first = 0
221+ second = 0
222+ third = 0
223+ # need to replace L because `hex()` will spit out an L suffix for larger numbers
224+ if i < 64 :
225+ first = hex (1 << i ).replace ("L" ,"" )
226+ elif i < 128 :
227+ second = hex (1 << (i - 64 )).replace ("L" ,"" )
228+ else :
229+ third = hex (1 << (i - 128 )).replace ("L" ,"" )
230+ f .write (" /// %s\n pub const %s: ScriptExtension = ScriptExtension::new(%s, %s, %s);\n " %
231+ (longforms [script ],longforms [script ].upper (),first ,second ,third ))
232+ if script != longforms [script ]:
233+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " %
234+ (longforms [script ],script .upper (),longforms [script ].upper ()))
215235for ext in extension_list :
216236longform = ", " .join ([longforms [s ]for s in ext ])
217- f .write (" /// %s\n %s,\n " % (longform ,"" .join (ext )))
237+ name = "_" .join ([s .upper ()for s in ext ])
238+ expr = ext [0 ].upper ()
239+ for e in ext [1 :]:
240+ expr = "%s.union(%s)" % (expr ,e .upper ())
241+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " % (longform ,name ,expr ))
218242f .write ("""}
219243
220- impl From<Script> for ScriptExtension {
221- fn from(script: Script) -> Self {
222- ScriptExtension::Single(script)
223- }
224- }
225-
226- impl TryFrom<ScriptExtension> for Script {
227- type Error = ();
228- fn try_from(ext: ScriptExtension) -> Result<Self, ()> {
229- match ext {
230- ScriptExtension::Single(s) => Ok(s),
231- _ => Err(())
232- }
233- }
234- }
235-
236244impl Script {
245+ #[inline]
237246 pub(crate) fn inner_full_name(self) -> &'static str {
238247 match self {
239248 Script::Unknown => "Unknown",
249+ Script::Common => "Common",
250+ Script::Inherited => "Inherited",
240251""" )
241252for script in script_list :
242253f .write (" Script::%s =>\" %s\" ,\n " % (longforms [script ],longforms [script ]))
243254f .write (""" }
244255 }
245256
257+ #[inline]
246258 pub(crate) fn inner_short_name(self) -> &'static str {
247259 match self {
248260 Script::Unknown => "",
261+ Script::Common => "Zyyy",
262+ Script::Inherited => "Zinh",
249263""" )
250264for script in script_list :
251265f .write (" Script::%s =>\" %s\" ,\n " % (longforms [script ],script ))
252266f .write (""" }
253267 }
254- }
255268
256- impl ScriptExtension {
257269 #[inline]
258- #[cfg(feature = "with_std")]
259- pub(crate) fn inner_scripts(self) -> Vec<Script> {
260- match self {
261- ScriptExtension::Single(s) => vec![s],
270+ pub(crate) fn for_integer(value: u8) -> Self {
271+ match value {
262272""" )
263- for ext in extension_list :
264- scripts = ", " .join (["Script::%s" % longforms [s ]for s in ext ])
265- f .write (" %s => vec![%s],\n " % (extension_name (ext ),scripts ))
266- f .write (""" _ => unreachable!()
267- }
268- }
269-
270- #[inline]
271- pub(crate) fn inner_contains_script(self, other: Script) -> bool {
272- match self {
273- ScriptExtension::Single(s) => s == other,
274- """ )
275- for ext in extension_list :
276- scripts = " || " .join (["other == Script::%s" % longforms [s ]for s in ext ])
277- f .write (" %s => %s,\n " % (extension_name (ext ),scripts ))
278- f .write (""" }
279- }
280-
281- #[inline]
282- pub(crate) fn inner_intersect(self, other: Self) -> Self {
283- match (self, other) {
284- (ScriptExtension::Single(Script::Unknown), _) |
285- (_, ScriptExtension::Single(Script::Unknown)) => ScriptExtension::Single(Script::Unknown),
286- (a, b) if a == b => a,
287- (ScriptExtension::Single(Script::Common), a) |
288- (ScriptExtension::Single(Script::Inherited), a) |
289- (a, ScriptExtension::Single(Script::Common)) |
290- (a, ScriptExtension::Single(Script::Inherited)) => a,
291- (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) if o.inner_contains_script(s) => ScriptExtension::Single(s),
292- """ )
293- for (e1 ,e2 ,i )in intersections :
294- f .write (" (%s, %s) => %s,\n " % (extension_name (e1 ),extension_name (e2 ),extension_name (i ,longforms )))
295- f .write (""" _ => ScriptExtension::Single(Script::Unknown),
273+ for (i ,script )in enumerate (script_list ):
274+ f .write (" %s => Script::%s,\n " % (i ,longforms [script ]))
275+ f .write (""" _ => unreachable!(),
296276 }
297277 }
298278}
299279""" )
300280
301-
302- def compute_intersections_elements (extension_list ):
303- """
304- Compute all intersections between the script extensions.
305- This will add new elements to extension_list, be sure to call it first!
306- """
307-
308- # This is the only third-level intersection
309- # It's easier to hardcode things here rather than
310- # do the below calculation in a loop
311- extension_list .append (['Deva' ,'Knda' ,'Tirh' ])
312- intersections = []
313- # Some intersections will not exist in extension_list and we'll need to add them
314- new_elements = []
315- sets = [(e ,set (e ))for e in extension_list ]
316- for (e1 ,s1 )in sets :
317- for (e2 ,s2 )in sets :
318- if e1 == e2 :
319- continue
320- intersection = s1 .intersection (s2 )
321- if len (intersection )> 0 :
322- intersection = [i for i in intersection ]
323- intersection .sort ()
324- if len (intersection )> 1 and intersection not in extension_list and intersection not in new_elements :
325- new_elements .append (intersection )
326- if (e1 ,e2 ,intersection )not in intersections :
327- intersections .append ((e1 ,e2 ,intersection ))
328- extension_list .extend (new_elements )
329-
330- # We now go through the newly added second-level extension values and calculate their intersections
331- # with the original set and each other
332- new_sets = [(e ,set (e ))for e in new_elements ]
333- sets = [(e ,set (e ))for e in extension_list ]
334- for (e1 ,s1 )in new_sets :
335- for (e2 ,s2 )in sets :
336- if e1 == e2 :
337- continue
338- intersection = s1 .intersection (s2 )
339- if len (intersection )> 0 :
340- intersection = [i for i in intersection ]
341- intersection .sort ()
342- if len (intersection )> 1 and intersection not in extension_list :
343- raise "Found new third-level intersection, please hardcode it"
344- # The previous routine would automatically get both versions
345- # of an intersection because it would iterate each pair in both orders,
346- # but here we're working on an asymmetric pair, so we insert both in order to not
347- # miss anything
348- if (e1 ,e2 ,intersection )not in intersections :
349- intersections .append ((e1 ,e2 ,intersection ))
350- if (e2 ,e1 ,intersection )not in intersections :
351- intersections .append ((e2 ,e1 ,intersection ))
352-
353- intersections .sort ()
354- return intersections
355-
356- def extension_name (ext ,longforms = {}):
281+ def extension_name (ext ):
357282"""Get the rust source for a given ScriptExtension"""
358- if len (ext )== 1 :
359- return "ScriptExtension::Single(Script::%s)" % longforms [ext [0 ]]
360- else :
361- return "ScriptExtension::%s" % "" .join (ext )
283+ return "script_extensions::%s" % "_" .join ([e .upper ()for e in ext ])
362284
363285
364286
@@ -385,8 +307,10 @@ def extension_name(ext, longforms={}):
385307script_list = []
386308
387309for script in scripts :
388- script_list .append (shortforms [script ])
310+ if script not in ["Common" ,"Unknown" ,"Inherited" ]:
311+ script_list .append (shortforms [script ])
389312script_table .extend ([(x ,y ,shortforms [script ])for (x ,y )in scripts [script ]])
313+ script_list .sort ()
390314script_table .sort (key = lambda w :w [0 ])
391315
392316
@@ -404,14 +328,13 @@ def extension_name(ext, longforms={}):
404328extension_table .extend ([(x ,y ,output_ext )for (x ,y )in extensions [ext ]])
405329extension_table .sort (key = lambda w :w [0 ])
406330
407- intersections = compute_intersections_elements (extension_list )
408331
409- emit_enums (rf ,script_list ,extension_list ,longforms , intersections )
332+ emit_enums (rf ,script_list ,extension_list ,longforms )
410333emit_search (rf )
411334
412335emit_table (rf ,"SCRIPTS" ,script_table ,t_type = "&'static [(char, char, Script)]" ,
413336is_pub = False ,pfun = lambda x :"(%s,%s, Script::%s)" % (escape_char (x [0 ]),escape_char (x [1 ]),longforms [x [2 ]]))
414337emit_table (rf ,"SCRIPT_EXTENSIONS" ,extension_table ,t_type = "&'static [(char, char, ScriptExtension)]" ,
415- is_pub = False ,pfun = lambda x :"(%s,%s,%s)" % (escape_char (x [0 ]),escape_char (x [1 ]),extension_name (x [2 ], longforms )))
338+ is_pub = False ,pfun = lambda x :"(%s,%s,%s)" % (escape_char (x [0 ]),escape_char (x [1 ]),extension_name (x [2 ])))
416339
417340# emit_table(rf, "FOObar", properties)