Movatterモバイル変換
[0]ホーム
{-# LANGUAGE Trustworthy #-}{-# LANGUAGE NoImplicitPrelude , BangPatterns , NondecreasingIndentation , MagicHash #-}{-# OPTIONS_GHC -funbox-strict-fields #-}------------------------------------------------------------------------------- |-- Module : GHC.IO.Encoding.UTF32-- Copyright : (c) The University of Glasgow, 2009-- License : see libraries/base/LICENSE---- Maintainer : libraries@haskell.org-- Stability : internal-- Portability : non-portable---- UTF-32 Codecs for the IO library---- Portions Copyright : (c) Tom Harper 2008-2009,-- (c) Bryan O'Sullivan 2009,-- (c) Duncan Coutts 2009-------------------------------------------------------------------------------moduleGHC.IO.Encoding.UTF32(utf32,mkUTF32,utf32_decode,utf32_encode,utf32be,mkUTF32be,utf32be_decode,utf32be_encode,utf32le,mkUTF32le,utf32le_decode,utf32le_encode,)whereimportGHC.BaseimportGHC.RealimportGHC.Num-- import GHC.IOimportGHC.IO.BufferimportGHC.IO.Encoding.FailureimportGHC.IO.Encoding.TypesimportGHC.WordimportData.BitsimportGHC.IORef-- ------------------------------------------------------------------------------- The UTF-32 codec: either UTF-32BE or UTF-32LE with a BOMutf32::TextEncodingutf32=mkUTF32ErrorOnCodingFailure-- | @since 4.4.0.0mkUTF32::CodingFailureMode->TextEncodingmkUTF32cfm=TextEncoding{textEncodingName="UTF-32",mkTextDecoder=utf32_DFcfm,mkTextEncoder=utf32_EFcfm}utf32_DF::CodingFailureMode->IO(TextDecoder(MaybeDecodeBuffer))utf32_DFcfm=doseen_bom<-newIORefNothingreturn(BufferCodec{encode=utf32_decodeseen_bom,recover=recoverDecodecfm,close=return(),getState=readIORefseen_bom,setState=writeIORefseen_bom})utf32_EF::CodingFailureMode->IO(TextEncoderBool)utf32_EFcfm=dodone_bom<-newIORefFalsereturn(BufferCodec{encode=utf32_encodedone_bom,recover=recoverEncodecfm,close=return(),getState=readIORefdone_bom,setState=writeIORefdone_bom})utf32_encode::IORefBool->EncodeBufferutf32_encodedone_bominputoutput@Buffer{bufRaw=oraw,bufL=_,bufR=ow,bufSize=os}=dob<-readIORefdone_bomifbthenutf32_native_encodeinputoutputelseifos-ow<4thenreturn(OutputUnderflow,input,output)elsedowriteIORefdone_bomTruewriteWord8Buforawowbom0writeWord8Buforaw(ow+1)bom1writeWord8Buforaw(ow+2)bom2writeWord8Buforaw(ow+3)bom3utf32_native_encodeinputoutput{bufR=ow+4}utf32_decode::IORef(MaybeDecodeBuffer)->DecodeBufferutf32_decodeseen_bominput@Buffer{bufRaw=iraw,bufL=ir,bufR=iw,bufSize=_}output=domb<-readIORefseen_bomcasembofJustdecode->decodeinputoutputNothing->ifiw-ir<4thenreturn(InputUnderflow,input,output)elsedoc0<-readWord8Bufirawirc1<-readWord8Bufiraw(ir+1)c2<-readWord8Bufiraw(ir+2)c3<-readWord8Bufiraw(ir+3)case()of_|c0==bom0&&c1==bom1&&c2==bom2&&c3==bom3->dowriteIORefseen_bom(Justutf32be_decode)utf32be_decodeinput{bufL=ir+4}output_|c0==bom3&&c1==bom2&&c2==bom1&&c3==bom0->dowriteIORefseen_bom(Justutf32le_decode)utf32le_decodeinput{bufL=ir+4}output|otherwise->dowriteIORefseen_bom(Justutf32_native_decode)utf32_native_decodeinputoutputbom0,bom1,bom2,bom3::Word8bom0=0bom1=0bom2=0xfebom3=0xff-- choose UTF-32BE by default for UTF-32 outpututf32_native_decode::DecodeBufferutf32_native_decode=utf32be_decodeutf32_native_encode::EncodeBufferutf32_native_encode=utf32be_encode-- ------------------------------------------------------------------------------- UTF32LE and UTF32BEutf32be::TextEncodingutf32be=mkUTF32beErrorOnCodingFailure-- | @since 4.4.0.0mkUTF32be::CodingFailureMode->TextEncodingmkUTF32becfm=TextEncoding{textEncodingName="UTF-32BE",mkTextDecoder=utf32be_DFcfm,mkTextEncoder=utf32be_EFcfm}utf32be_DF::CodingFailureMode->IO(TextDecoder())utf32be_DFcfm=return(BufferCodec{encode=utf32be_decode,recover=recoverDecodecfm,close=return(),getState=return(),setState=const$return()})utf32be_EF::CodingFailureMode->IO(TextEncoder())utf32be_EFcfm=return(BufferCodec{encode=utf32be_encode,recover=recoverEncodecfm,close=return(),getState=return(),setState=const$return()})utf32le::TextEncodingutf32le=mkUTF32leErrorOnCodingFailure-- | @since 4.4.0.0mkUTF32le::CodingFailureMode->TextEncodingmkUTF32lecfm=TextEncoding{textEncodingName="UTF-32LE",mkTextDecoder=utf32le_DFcfm,mkTextEncoder=utf32le_EFcfm}utf32le_DF::CodingFailureMode->IO(TextDecoder())utf32le_DFcfm=return(BufferCodec{encode=utf32le_decode,recover=recoverDecodecfm,close=return(),getState=return(),setState=const$return()})utf32le_EF::CodingFailureMode->IO(TextEncoder())utf32le_EFcfm=return(BufferCodec{encode=utf32le_encode,recover=recoverEncodecfm,close=return(),getState=return(),setState=const$return()})utf32be_decode::DecodeBufferutf32be_decodeinput@Buffer{bufRaw=iraw,bufL=ir0,bufR=iw,bufSize=_}output@Buffer{bufRaw=oraw,bufL=_,bufR=ow0,bufSize=os}=letloop!ir!ow|ow>=os=doneOutputUnderflowirow|iw-ir<4=doneInputUnderflowirow|otherwise=doc0<-readWord8Bufirawirc1<-readWord8Bufiraw(ir+1)c2<-readWord8Bufiraw(ir+2)c3<-readWord8Bufiraw(ir+3)letx1=chr4c0c1c2c3ifnot(validatex1)theninvalidelsedoow'<-writeCharBuforawowx1loop(ir+4)ow'whereinvalid=doneInvalidSequenceirow-- lambda-lifted, to avoid thunks being built in the inner-loop:donewhy!ir!ow=return(why,ifir==iwtheninput{bufL=0,bufR=0}elseinput{bufL=ir},output{bufR=ow})inloopir0ow0utf32le_decode::DecodeBufferutf32le_decodeinput@Buffer{bufRaw=iraw,bufL=ir0,bufR=iw,bufSize=_}output@Buffer{bufRaw=oraw,bufL=_,bufR=ow0,bufSize=os}=letloop!ir!ow|ow>=os=doneOutputUnderflowirow|iw-ir<4=doneInputUnderflowirow|otherwise=doc0<-readWord8Bufirawirc1<-readWord8Bufiraw(ir+1)c2<-readWord8Bufiraw(ir+2)c3<-readWord8Bufiraw(ir+3)letx1=chr4c3c2c1c0ifnot(validatex1)theninvalidelsedoow'<-writeCharBuforawowx1loop(ir+4)ow'whereinvalid=doneInvalidSequenceirow-- lambda-lifted, to avoid thunks being built in the inner-loop:donewhy!ir!ow=return(why,ifir==iwtheninput{bufL=0,bufR=0}elseinput{bufL=ir},output{bufR=ow})inloopir0ow0utf32be_encode::EncodeBufferutf32be_encodeinput@Buffer{bufRaw=iraw,bufL=ir0,bufR=iw,bufSize=_}output@Buffer{bufRaw=oraw,bufL=_,bufR=ow0,bufSize=os}=letdonewhy!ir!ow=return(why,ifir==iwtheninput{bufL=0,bufR=0}elseinput{bufL=ir},output{bufR=ow})loop!ir!ow|ir>=iw=doneInputUnderflowirow|os-ow<4=doneOutputUnderflowirow|otherwise=do(c,ir')<-readCharBufirawirifisSurrogatecthendoneInvalidSequenceirowelsedolet(c0,c1,c2,c3)=ord4cwriteWord8Buforawowc0writeWord8Buforaw(ow+1)c1writeWord8Buforaw(ow+2)c2writeWord8Buforaw(ow+3)c3loopir'(ow+4)inloopir0ow0utf32le_encode::EncodeBufferutf32le_encodeinput@Buffer{bufRaw=iraw,bufL=ir0,bufR=iw,bufSize=_}output@Buffer{bufRaw=oraw,bufL=_,bufR=ow0,bufSize=os}=letdonewhy!ir!ow=return(why,ifir==iwtheninput{bufL=0,bufR=0}elseinput{bufL=ir},output{bufR=ow})loop!ir!ow|ir>=iw=doneInputUnderflowirow|os-ow<4=doneOutputUnderflowirow|otherwise=do(c,ir')<-readCharBufirawirifisSurrogatecthendoneInvalidSequenceirowelsedolet(c0,c1,c2,c3)=ord4cwriteWord8Buforawowc3writeWord8Buforaw(ow+1)c2writeWord8Buforaw(ow+2)c1writeWord8Buforaw(ow+3)c0loopir'(ow+4)inloopir0ow0chr4::Word8->Word8->Word8->Word8->Charchr4(W8#x1#)(W8#x2#)(W8#x3#)(W8#x4#)=C#(chr#(z1#+#z2#+#z3#+#z4#))where!y1#=word2Int#x1#!y2#=word2Int#x2#!y3#=word2Int#x3#!y4#=word2Int#x4#!z1#=uncheckedIShiftL#y1#24#!z2#=uncheckedIShiftL#y2#16#!z3#=uncheckedIShiftL#y3#8#!z4#=y4#{-# INLINEchr4#-}ord4::Char->(Word8,Word8,Word8,Word8)ord4c=(fromIntegral(x`shiftR`24),fromIntegral(x`shiftR`16),fromIntegral(x`shiftR`8),fromIntegralx)wherex=ordc{-# INLINEord4#-}validate::Char->Boolvalidatec=(x1>=0x0&&x1<0xD800)||(x1>0xDFFF&&x1<=0x10FFFF)wherex1=ordc{-# INLINEvalidate#-}
[8]ページ先頭