Movatterモバイル変換


[0]ホーム

URL:


{-# LANGUAGE Trustworthy #-}{-# LANGUAGE NoImplicitPrelude           , BangPatterns           , NondecreasingIndentation           , MagicHash  #-}{-# OPTIONS_GHC  -funbox-strict-fields #-}------------------------------------------------------------------------------- |-- Module      :  GHC.IO.Encoding.UTF16-- Copyright   :  (c) The University of Glasgow, 2009-- License     :  see libraries/base/LICENSE---- Maintainer  :  libraries@haskell.org-- Stability   :  internal-- Portability :  non-portable---- UTF-16 Codecs for the IO library---- Portions Copyright   : (c) Tom Harper 2008-2009,--                        (c) Bryan O'Sullivan 2009,--                        (c) Duncan Coutts 2009-------------------------------------------------------------------------------moduleGHC.IO.Encoding.UTF16(utf16,mkUTF16,utf16_decode,utf16_encode,utf16be,mkUTF16be,utf16be_decode,utf16be_encode,utf16le,mkUTF16le,utf16le_decode,utf16le_encode,)whereimportGHC.BaseimportGHC.RealimportGHC.Num-- import GHC.IOimportGHC.IO.BufferimportGHC.IO.Encoding.FailureimportGHC.IO.Encoding.TypesimportGHC.WordimportData.BitsimportGHC.IORef-- ------------------------------------------------------------------------------- The UTF-16 codec: either UTF16BE or UTF16LE with a BOMutf16::TextEncodingutf16=mkUTF16ErrorOnCodingFailure-- | @since 4.4.0.0mkUTF16::CodingFailureMode->TextEncodingmkUTF16cfm=TextEncoding{textEncodingName="UTF-16",mkTextDecoder=utf16_DFcfm,mkTextEncoder=utf16_EFcfm}utf16_DF::CodingFailureMode->IO(TextDecoder(MaybeDecodeBuffer))utf16_DFcfm=doseen_bom<-newIORefNothingreturn(BufferCodec{encode=utf16_decodeseen_bom,recover=recoverDecodecfm,close=return(),getState=readIORefseen_bom,setState=writeIORefseen_bom})utf16_EF::CodingFailureMode->IO(TextEncoderBool)utf16_EFcfm=dodone_bom<-newIORefFalsereturn(BufferCodec{encode=utf16_encodedone_bom,recover=recoverEncodecfm,close=return(),getState=readIORefdone_bom,setState=writeIORefdone_bom})utf16_encode::IORefBool->EncodeBufferutf16_encodedone_bominputoutput@Buffer{bufRaw=oraw,bufL=_,bufR=ow,bufSize=os}=dob<-readIORefdone_bomifbthenutf16_native_encodeinputoutputelseifos-ow<2thenreturn(OutputUnderflow,input,output)elsedowriteIORefdone_bomTruewriteWord8Buforawowbom1writeWord8Buforaw(ow+1)bom2utf16_native_encodeinputoutput{bufR=ow+2}utf16_decode::IORef(MaybeDecodeBuffer)->DecodeBufferutf16_decodeseen_bominput@Buffer{bufRaw=iraw,bufL=ir,bufR=iw,bufSize=_}output=domb<-readIORefseen_bomcasembofJustdecode->decodeinputoutputNothing->ifiw-ir<2thenreturn(InputUnderflow,input,output)elsedoc0<-readWord8Bufirawirc1<-readWord8Bufiraw(ir+1)case()of_|c0==bomB&&c1==bomL->dowriteIORefseen_bom(Justutf16be_decode)utf16be_decodeinput{bufL=ir+2}output|c0==bomL&&c1==bomB->dowriteIORefseen_bom(Justutf16le_decode)utf16le_decodeinput{bufL=ir+2}output|otherwise->dowriteIORefseen_bom(Justutf16_native_decode)utf16_native_decodeinputoutputbomB,bomL,bom1,bom2::Word8bomB=0xfebomL=0xff-- choose UTF-16BE by default for UTF-16 outpututf16_native_decode::DecodeBufferutf16_native_decode=utf16be_decodeutf16_native_encode::EncodeBufferutf16_native_encode=utf16be_encodebom1=bomBbom2=bomL-- ------------------------------------------------------------------------------- UTF16LE and UTF16BEutf16be::TextEncodingutf16be=mkUTF16beErrorOnCodingFailure-- | @since 4.4.0.0mkUTF16be::CodingFailureMode->TextEncodingmkUTF16becfm=TextEncoding{textEncodingName="UTF-16BE",mkTextDecoder=utf16be_DFcfm,mkTextEncoder=utf16be_EFcfm}utf16be_DF::CodingFailureMode->IO(TextDecoder())utf16be_DFcfm=return(BufferCodec{encode=utf16be_decode,recover=recoverDecodecfm,close=return(),getState=return(),setState=const$return()})utf16be_EF::CodingFailureMode->IO(TextEncoder())utf16be_EFcfm=return(BufferCodec{encode=utf16be_encode,recover=recoverEncodecfm,close=return(),getState=return(),setState=const$return()})utf16le::TextEncodingutf16le=mkUTF16leErrorOnCodingFailure-- | @since 4.4.0.0mkUTF16le::CodingFailureMode->TextEncodingmkUTF16lecfm=TextEncoding{textEncodingName="UTF16-LE",mkTextDecoder=utf16le_DFcfm,mkTextEncoder=utf16le_EFcfm}utf16le_DF::CodingFailureMode->IO(TextDecoder())utf16le_DFcfm=return(BufferCodec{encode=utf16le_decode,recover=recoverDecodecfm,close=return(),getState=return(),setState=const$return()})utf16le_EF::CodingFailureMode->IO(TextEncoder())utf16le_EFcfm=return(BufferCodec{encode=utf16le_encode,recover=recoverEncodecfm,close=return(),getState=return(),setState=const$return()})utf16be_decode::DecodeBufferutf16be_decodeinput@Buffer{bufRaw=iraw,bufL=ir0,bufR=iw,bufSize=_}output@Buffer{bufRaw=oraw,bufL=_,bufR=ow0,bufSize=os}=letloop!ir!ow|ow>=os=doneOutputUnderflowirow|ir>=iw=doneInputUnderflowirow|ir+1==iw=doneInputUnderflowirow|otherwise=doc0<-readWord8Bufirawirc1<-readWord8Bufiraw(ir+1)letx1=fromIntegralc0`shiftL`8+fromIntegralc1ifvalidate1x1thendoow'<-writeCharBuforawow(unsafeChr(fromIntegralx1))loop(ir+2)ow'elseifiw-ir<4thendoneInputUnderflowirowelsedoc2<-readWord8Bufiraw(ir+2)c3<-readWord8Bufiraw(ir+3)letx2=fromIntegralc2`shiftL`8+fromIntegralc3ifnot(validate2x1x2)theninvalidelsedoow'<-writeCharBuforawow(chr2x1x2)loop(ir+4)ow'whereinvalid=doneInvalidSequenceirow-- lambda-lifted, to avoid thunks being built in the inner-loop:donewhy!ir!ow=return(why,ifir==iwtheninput{bufL=0,bufR=0}elseinput{bufL=ir},output{bufR=ow})inloopir0ow0utf16le_decode::DecodeBufferutf16le_decodeinput@Buffer{bufRaw=iraw,bufL=ir0,bufR=iw,bufSize=_}output@Buffer{bufRaw=oraw,bufL=_,bufR=ow0,bufSize=os}=letloop!ir!ow|ow>=os=doneOutputUnderflowirow|ir>=iw=doneInputUnderflowirow|ir+1==iw=doneInputUnderflowirow|otherwise=doc0<-readWord8Bufirawirc1<-readWord8Bufiraw(ir+1)letx1=fromIntegralc1`shiftL`8+fromIntegralc0ifvalidate1x1thendoow'<-writeCharBuforawow(unsafeChr(fromIntegralx1))loop(ir+2)ow'elseifiw-ir<4thendoneInputUnderflowirowelsedoc2<-readWord8Bufiraw(ir+2)c3<-readWord8Bufiraw(ir+3)letx2=fromIntegralc3`shiftL`8+fromIntegralc2ifnot(validate2x1x2)theninvalidelsedoow'<-writeCharBuforawow(chr2x1x2)loop(ir+4)ow'whereinvalid=doneInvalidSequenceirow-- lambda-lifted, to avoid thunks being built in the inner-loop:donewhy!ir!ow=return(why,ifir==iwtheninput{bufL=0,bufR=0}elseinput{bufL=ir},output{bufR=ow})inloopir0ow0utf16be_encode::EncodeBufferutf16be_encodeinput@Buffer{bufRaw=iraw,bufL=ir0,bufR=iw,bufSize=_}output@Buffer{bufRaw=oraw,bufL=_,bufR=ow0,bufSize=os}=letdonewhy!ir!ow=return(why,ifir==iwtheninput{bufL=0,bufR=0}elseinput{bufL=ir},output{bufR=ow})loop!ir!ow|ir>=iw=doneInputUnderflowirow|os-ow<2=doneOutputUnderflowirow|otherwise=do(c,ir')<-readCharBufirawircaseordcofx|x<0x10000->ifisSurrogatecthendoneInvalidSequenceirowelsedowriteWord8Buforawow(fromIntegral(x`shiftR`8))writeWord8Buforaw(ow+1)(fromIntegralx)loopir'(ow+2)|otherwise->doifos-ow<4thendoneOutputUnderflowirowelsedoletn1=x-0x10000c1=fromIntegral(n1`shiftR`18+0xD8)c2=fromIntegral(n1`shiftR`10)n2=n1.&.0x3FFc3=fromIntegral(n2`shiftR`8+0xDC)c4=fromIntegraln2--writeWord8Buforawowc1writeWord8Buforaw(ow+1)c2writeWord8Buforaw(ow+2)c3writeWord8Buforaw(ow+3)c4loopir'(ow+4)inloopir0ow0utf16le_encode::EncodeBufferutf16le_encodeinput@Buffer{bufRaw=iraw,bufL=ir0,bufR=iw,bufSize=_}output@Buffer{bufRaw=oraw,bufL=_,bufR=ow0,bufSize=os}=letdonewhy!ir!ow=return(why,ifir==iwtheninput{bufL=0,bufR=0}elseinput{bufL=ir},output{bufR=ow})loop!ir!ow|ir>=iw=doneInputUnderflowirow|os-ow<2=doneOutputUnderflowirow|otherwise=do(c,ir')<-readCharBufirawircaseordcofx|x<0x10000->ifisSurrogatecthendoneInvalidSequenceirowelsedowriteWord8Buforawow(fromIntegralx)writeWord8Buforaw(ow+1)(fromIntegral(x`shiftR`8))loopir'(ow+2)|otherwise->ifos-ow<4thendoneOutputUnderflowirowelsedoletn1=x-0x10000c1=fromIntegral(n1`shiftR`18+0xD8)c2=fromIntegral(n1`shiftR`10)n2=n1.&.0x3FFc3=fromIntegral(n2`shiftR`8+0xDC)c4=fromIntegraln2--writeWord8Buforawowc2writeWord8Buforaw(ow+1)c1writeWord8Buforaw(ow+2)c4writeWord8Buforaw(ow+3)c3loopir'(ow+4)inloopir0ow0chr2::Word16->Word16->Charchr2(W16#a#)(W16#b#)=C#(chr#(upper#+#lower#+#0x10000#))where!x#=word2Int#a#!y#=word2Int#b#!upper#=uncheckedIShiftL#(x#-#0xD800#)10#!lower#=y#-#0xDC00#{-# INLINEchr2#-}validate1::Word16->Boolvalidate1x1=(x1>=0&&x1<0xD800)||x1>0xDFFF{-# INLINEvalidate1#-}validate2::Word16->Word16->Boolvalidate2x1x2=x1>=0xD800&&x1<=0xDBFF&&x2>=0xDC00&&x2<=0xDFFF{-# INLINEvalidate2#-}

[8]ページ先頭

©2009-2025 Movatter.jp