Movatterモバイル変換


[0]ホーム

URL:


{-# LANGUAGE Trustworthy #-}{-# LANGUAGE CPP, NoImplicitPrelude, StandaloneDeriving #-}{-# OPTIONS_HADDOCK not-home #-}------------------------------------------------------------------------------- |-- Module      :  GHC.Unicode-- Copyright   :  (c) The University of Glasgow, 2003-- License     :  see libraries/base/LICENSE---- Maintainer  :  cvs-ghc@haskell.org-- Stability   :  internal-- Portability :  non-portable (GHC extensions)---- Implementations for the character predicates (isLower, isUpper, etc.)-- and the conversions (toUpper, toLower).  The implementation uses-- libunicode on Unix systems if that is available.-------------------------------------------------------------------------------moduleGHC.Unicode(GeneralCategory(..),generalCategory,isAscii,isLatin1,isControl,isAsciiUpper,isAsciiLower,isPrint,isSpace,isUpper,isLower,isAlpha,isDigit,isOctDigit,isHexDigit,isAlphaNum,isPunctuation,isSymbol,toUpper,toLower,toTitle,wgencat)whereimportGHC.BaseimportGHC.Char(chr)importGHC.RealimportGHC.Enum(Enum(..),Bounded(..))importGHC.Ix(Ix(..))importGHC.Num-- Data.Char.chr already imports this and we need to define a Show instance-- for GeneralCategoryimportGHC.Show(Show)#include "HsBaseConfig.h"-- | Unicode General Categories (column 2 of the UnicodeData table) in-- the order they are listed in the Unicode standard (the Unicode-- Character Database, in particular).---- ==== __Examples__---- Basic usage:---- >>> :t OtherLetter-- OtherLetter :: GeneralCategory---- 'Eq' instance:---- >>> UppercaseLetter == UppercaseLetter-- True-- >>> UppercaseLetter == LowercaseLetter-- False---- 'Ord' instance:---- >>> NonSpacingMark <= MathSymbol-- True---- 'Enum' instance:---- >>> enumFromTo ModifierLetter SpacingCombiningMark-- [ModifierLetter,OtherLetter,NonSpacingMark,SpacingCombiningMark]---- 'Text.Read.Read' instance:---- >>> read "DashPunctuation" :: GeneralCategory-- DashPunctuation-- >>> read "17" :: GeneralCategory-- *** Exception: Prelude.read: no parse---- 'Show' instance:---- >>> show EnclosingMark-- "EnclosingMark"---- 'Bounded' instance:---- >>> minBound :: GeneralCategory-- UppercaseLetter-- >>> maxBound :: GeneralCategory-- NotAssigned---- 'Ix' instance:----  >>> import Data.Ix ( index )--  >>> index (OtherLetter,Control) FinalQuote--  12--  >>> index (OtherLetter,Control) Format--  *** Exception: Error in array index--dataGeneralCategory=UppercaseLetter-- ^ Lu: Letter, Uppercase|LowercaseLetter-- ^ Ll: Letter, Lowercase|TitlecaseLetter-- ^ Lt: Letter, Titlecase|ModifierLetter-- ^ Lm: Letter, Modifier|OtherLetter-- ^ Lo: Letter, Other|NonSpacingMark-- ^ Mn: Mark, Non-Spacing|SpacingCombiningMark-- ^ Mc: Mark, Spacing Combining|EnclosingMark-- ^ Me: Mark, Enclosing|DecimalNumber-- ^ Nd: Number, Decimal|LetterNumber-- ^ Nl: Number, Letter|OtherNumber-- ^ No: Number, Other|ConnectorPunctuation-- ^ Pc: Punctuation, Connector|DashPunctuation-- ^ Pd: Punctuation, Dash|OpenPunctuation-- ^ Ps: Punctuation, Open|ClosePunctuation-- ^ Pe: Punctuation, Close|InitialQuote-- ^ Pi: Punctuation, Initial quote|FinalQuote-- ^ Pf: Punctuation, Final quote|OtherPunctuation-- ^ Po: Punctuation, Other|MathSymbol-- ^ Sm: Symbol, Math|CurrencySymbol-- ^ Sc: Symbol, Currency|ModifierSymbol-- ^ Sk: Symbol, Modifier|OtherSymbol-- ^ So: Symbol, Other|Space-- ^ Zs: Separator, Space|LineSeparator-- ^ Zl: Separator, Line|ParagraphSeparator-- ^ Zp: Separator, Paragraph|Control-- ^ Cc: Other, Control|Format-- ^ Cf: Other, Format|Surrogate-- ^ Cs: Other, Surrogate|PrivateUse-- ^ Co: Other, Private Use|NotAssigned-- ^ Cn: Other, Not Assignedderiving(Show-- ^ @since 2.01,Eq-- ^ @since 2.01,Ord-- ^ @since 2.01,Enum-- ^ @since 2.01,Bounded-- ^ @since 2.01,Ix-- ^ @since 2.01)-- | The Unicode general category of the character. This relies on the-- 'Enum' instance of 'GeneralCategory', which must remain in the-- same order as the categories are presented in the Unicode-- standard.---- ==== __Examples__---- Basic usage:---- >>> generalCategory 'a'-- LowercaseLetter-- >>> generalCategory 'A'-- UppercaseLetter-- >>> generalCategory '0'-- DecimalNumber-- >>> generalCategory '%'-- OtherPunctuation-- >>> generalCategory '♥'-- OtherSymbol-- >>> generalCategory '\31'-- Control-- >>> generalCategory ' '-- Space--generalCategory::Char->GeneralCategorygeneralCategory :: Char -> GeneralCategorygeneralCategoryCharc=Int -> GeneralCategoryforall a. Enum a => Int -> atoEnum(Int -> GeneralCategory) -> Int -> GeneralCategoryforall a b. (a -> b) -> a -> b$Int -> Intforall a b. (Integral a, Num b) => a -> bfromIntegral(Int -> Int) -> Int -> Intforall a b. (a -> b) -> a -> b$Int -> Intwgencat(Int -> Int) -> Int -> Intforall a b. (a -> b) -> a -> b$Int -> Intforall a b. (Integral a, Num b) => a -> bfromIntegral(Int -> Int) -> Int -> Intforall a b. (a -> b) -> a -> b$Char -> IntordCharc-- | Selects the first 128 characters of the Unicode character set,-- corresponding to the ASCII character set.isAscii::Char->BoolisAscii :: Char -> BoolisAsciiCharc=CharcChar -> Char -> Boolforall a. Ord a => a -> a -> Bool<Char'\x80'-- | Selects the first 256 characters of the Unicode character set,-- corresponding to the ISO 8859-1 (Latin-1) character set.isLatin1::Char->BoolisLatin1 :: Char -> BoolisLatin1Charc=CharcChar -> Char -> Boolforall a. Ord a => a -> a -> Bool<=Char'\xff'-- | Selects ASCII lower-case letters,-- i.e. characters satisfying both 'isAscii' and 'isLower'.isAsciiLower::Char->BoolisAsciiLower :: Char -> BoolisAsciiLowerCharc=CharcChar -> Char -> Boolforall a. Ord a => a -> a -> Bool>=Char'a'Bool -> Bool -> Bool&&CharcChar -> Char -> Boolforall a. Ord a => a -> a -> Bool<=Char'z'-- | Selects ASCII upper-case letters,-- i.e. characters satisfying both 'isAscii' and 'isUpper'.isAsciiUpper::Char->BoolisAsciiUpper :: Char -> BoolisAsciiUpperCharc=CharcChar -> Char -> Boolforall a. Ord a => a -> a -> Bool>=Char'A'Bool -> Bool -> Bool&&CharcChar -> Char -> Boolforall a. Ord a => a -> a -> Bool<=Char'Z'-- | Selects control characters, which are the non-printing characters of-- the Latin-1 subset of Unicode.isControl::Char->Bool-- | Selects printable Unicode characters-- (letters, numbers, marks, punctuation, symbols and spaces).isPrint::Char->Bool-- | Returns 'True' for any Unicode space character, and the control-- characters @\\t@, @\\n@, @\\r@, @\\f@, @\\v@.isSpace::Char->Bool-- isSpace includes non-breaking space-- The magic 0x377 isn't really that magical. As of 2014, all the codepoints-- at or below 0x377 have been assigned, so we shouldn't have to worry about-- any new spaces appearing below there. It would probably be best to-- use branchless ||, but currently the eqLit transformation will undo that,-- so we'll do it like this until there's a way around that.isSpace :: Char -> BoolisSpaceCharc|WorducWord -> Word -> Boolforall a. Ord a => a -> a -> Bool<=Word0x377=WorducWord -> Word -> Boolforall a. Eq a => a -> a -> Bool==Word32Bool -> Bool -> Bool||WorducWord -> Word -> Wordforall a. Num a => a -> a -> a-Word0x9Word -> Word -> Boolforall a. Ord a => a -> a -> Bool<=Word4Bool -> Bool -> Bool||WorducWord -> Word -> Boolforall a. Eq a => a -> a -> Bool==Word0xa0|Boolotherwise=Int -> Intiswspace(Char -> IntordCharc)Int -> Int -> Boolforall a. Eq a => a -> a -> Bool/=Int0whereuc :: Worduc=Int -> Wordforall a b. (Integral a, Num b) => a -> bfromIntegral(Char -> IntordCharc)::Word-- | Selects upper-case or title-case alphabetic Unicode characters (letters).-- Title case is used by a small number of letter ligatures like the-- single-character form of /Lj/.isUpper::Char->Bool-- | Selects lower-case alphabetic Unicode characters (letters).isLower::Char->Bool-- | Selects alphabetic Unicode characters (lower-case, upper-case and-- title-case letters, plus letters of caseless scripts and modifiers letters).-- This function is equivalent to 'Data.Char.isLetter'.isAlpha::Char->Bool-- | Selects alphabetic or numeric Unicode characters.---- Note that numeric digits outside the ASCII range, as well as numeric-- characters which aren't digits, are selected by this function but not by-- 'isDigit'. Such characters may be part of identifiers but are not used by-- the printer and reader to represent numbers.isAlphaNum::Char->Bool-- | Selects ASCII digits, i.e. @\'0\'@..@\'9\'@.isDigit::Char->BoolisDigit :: Char -> BoolisDigitCharc=(Int -> Wordforall a b. (Integral a, Num b) => a -> bfromIntegral(Char -> IntordCharcInt -> Int -> Intforall a. Num a => a -> a -> a-Char -> IntordChar'0')::Word)Word -> Word -> Boolforall a. Ord a => a -> a -> Bool<=Word9-- We use an addition and an unsigned comparison instead of two signed-- comparisons because it's usually faster and puts less strain on branch-- prediction. It likely also enables some CSE when combined with functions-- that follow up with an actual conversion.-- | Selects ASCII octal digits, i.e. @\'0\'@..@\'7\'@.isOctDigit::Char->BoolisOctDigit :: Char -> BoolisOctDigitCharc=(Int -> Wordforall a b. (Integral a, Num b) => a -> bfromIntegral(Char -> IntordCharcInt -> Int -> Intforall a. Num a => a -> a -> a-Char -> IntordChar'0')::Word)Word -> Word -> Boolforall a. Ord a => a -> a -> Bool<=Word7-- | Selects ASCII hexadecimal digits,-- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@.isHexDigit::Char->BoolisHexDigit :: Char -> BoolisHexDigitCharc=Char -> BoolisDigitCharcBool -> Bool -> Bool||(Int -> Wordforall a b. (Integral a, Num b) => a -> bfromIntegral(Char -> IntordCharcInt -> Int -> Intforall a. Num a => a -> a -> a-Char -> IntordChar'A')::Word)Word -> Word -> Boolforall a. Ord a => a -> a -> Bool<=Word5Bool -> Bool -> Bool||(Int -> Wordforall a b. (Integral a, Num b) => a -> bfromIntegral(Char -> IntordCharcInt -> Int -> Intforall a. Num a => a -> a -> a-Char -> IntordChar'a')::Word)Word -> Word -> Boolforall a. Ord a => a -> a -> Bool<=Word5-- | Selects Unicode punctuation characters, including various kinds-- of connectors, brackets and quotes.---- This function returns 'True' if its argument has one of the-- following 'GeneralCategory's, or 'False' otherwise:---- * 'ConnectorPunctuation'-- * 'DashPunctuation'-- * 'OpenPunctuation'-- * 'ClosePunctuation'-- * 'InitialQuote'-- * 'FinalQuote'-- * 'OtherPunctuation'---- These classes are defined in the-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,-- part of the Unicode standard. The same document defines what is-- and is not a \"Punctuation\".---- ==== __Examples__---- Basic usage:---- >>> isPunctuation 'a'-- False-- >>> isPunctuation '7'-- False-- >>> isPunctuation '♥'-- False-- >>> isPunctuation '"'-- True-- >>> isPunctuation '?'-- True-- >>> isPunctuation '—'-- True--isPunctuation::Char->BoolisPunctuation :: Char -> BoolisPunctuationCharc=caseChar -> GeneralCategorygeneralCategoryCharcofGeneralCategoryConnectorPunctuation->BoolTrueGeneralCategoryDashPunctuation->BoolTrueGeneralCategoryOpenPunctuation->BoolTrueGeneralCategoryClosePunctuation->BoolTrueGeneralCategoryInitialQuote->BoolTrueGeneralCategoryFinalQuote->BoolTrueGeneralCategoryOtherPunctuation->BoolTrueGeneralCategory_->BoolFalse-- | Selects Unicode symbol characters, including mathematical and-- currency symbols.---- This function returns 'True' if its argument has one of the-- following 'GeneralCategory's, or 'False' otherwise:---- * 'MathSymbol'-- * 'CurrencySymbol'-- * 'ModifierSymbol'-- * 'OtherSymbol'---- These classes are defined in the-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,-- part of the Unicode standard. The same document defines what is-- and is not a \"Symbol\".---- ==== __Examples__---- Basic usage:---- >>> isSymbol 'a'-- False-- >>> isSymbol '6'-- False-- >>> isSymbol '='-- True---- The definition of \"math symbol\" may be a little-- counter-intuitive depending on one's background:---- >>> isSymbol '+'-- True-- >>> isSymbol '-'-- False--isSymbol::Char->BoolisSymbol :: Char -> BoolisSymbolCharc=caseChar -> GeneralCategorygeneralCategoryCharcofGeneralCategoryMathSymbol->BoolTrueGeneralCategoryCurrencySymbol->BoolTrueGeneralCategoryModifierSymbol->BoolTrueGeneralCategoryOtherSymbol->BoolTrueGeneralCategory_->BoolFalse-- | Convert a letter to the corresponding upper-case letter, if any.-- Any other character is returned unchanged.toUpper::Char->Char-- | Convert a letter to the corresponding lower-case letter, if any.-- Any other character is returned unchanged.toLower::Char->Char-- | Convert a letter to the corresponding title-case or upper-case-- letter, if any.  (Title case differs from upper case only for a small-- number of ligature letters.)-- Any other character is returned unchanged.toTitle::Char->Char-- ------------------------------------------------------------------------------- Implementation with the supplied auto-generated Unicode character properties-- table-- Regardless of the O/S and Library, use the functions contained in WCsubst.cisAlpha :: Char -> BoolisAlphaCharc=Int -> Intiswalpha(Char -> IntordCharc)Int -> Int -> Boolforall a. Eq a => a -> a -> Bool/=Int0isAlphaNum :: Char -> BoolisAlphaNumCharc=Int -> Intiswalnum(Char -> IntordCharc)Int -> Int -> Boolforall a. Eq a => a -> a -> Bool/=Int0isControl :: Char -> BoolisControlCharc=Int -> Intiswcntrl(Char -> IntordCharc)Int -> Int -> Boolforall a. Eq a => a -> a -> Bool/=Int0isPrint :: Char -> BoolisPrintCharc=Int -> Intiswprint(Char -> IntordCharc)Int -> Int -> Boolforall a. Eq a => a -> a -> Bool/=Int0isUpper :: Char -> BoolisUpperCharc=Int -> Intiswupper(Char -> IntordCharc)Int -> Int -> Boolforall a. Eq a => a -> a -> Bool/=Int0isLower :: Char -> BoolisLowerCharc=Int -> Intiswlower(Char -> IntordCharc)Int -> Int -> Boolforall a. Eq a => a -> a -> Bool/=Int0toLower :: Char -> ChartoLowerCharc=Int -> Charchr(Int -> Inttowlower(Char -> IntordCharc))toUpper :: Char -> ChartoUpperCharc=Int -> Charchr(Int -> Inttowupper(Char -> IntordCharc))toTitle :: Char -> ChartoTitleCharc=Int -> Charchr(Int -> Inttowtitle(Char -> IntordCharc))foreignimportccallunsafe"u_iswalpha"iswalpha::Int->Intforeignimportccallunsafe"u_iswalnum"iswalnum::Int->Intforeignimportccallunsafe"u_iswcntrl"iswcntrl::Int->Intforeignimportccallunsafe"u_iswspace"iswspace::Int->Intforeignimportccallunsafe"u_iswprint"iswprint::Int->Intforeignimportccallunsafe"u_iswlower"iswlower::Int->Intforeignimportccallunsafe"u_iswupper"iswupper::Int->Intforeignimportccallunsafe"u_towlower"towlower::Int->Intforeignimportccallunsafe"u_towupper"towupper::Int->Intforeignimportccallunsafe"u_towtitle"towtitle::Int->Intforeignimportccallunsafe"u_gencat"wgencat::Int->Int

[8]ページ先頭

©2009-2025 Movatter.jp