@@ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch)
730730}
731731
732732
733+ /*
734+ * Create a Unicode string containing 'count' copies of the official
735+ * Unicode REPLACEMENT CHARACTER (0xFFFD).
736+ */
737+ static PyObject *
738+ codec_handler_unicode_replacement_character (Py_ssize_t count )
739+ {
740+ PyObject * res = PyUnicode_New (count ,Py_UNICODE_REPLACEMENT_CHARACTER );
741+ if (res == NULL ) {
742+ return NULL ;
743+ }
744+ assert (count == 0 || PyUnicode_KIND (res )== PyUnicode_2BYTE_KIND );
745+ Py_UCS2 * outp = PyUnicode_2BYTE_DATA (res );
746+ for (Py_ssize_t i = 0 ;i < count ;++ i ) {
747+ outp [i ]= Py_UNICODE_REPLACEMENT_CHARACTER ;
748+ }
749+ assert (_PyUnicode_CheckConsistency (res ,1 ));
750+ return res ;
751+ }
752+
753+
733754// --- handler: 'strict' ------------------------------------------------------
734755
735756PyObject * PyCodec_StrictErrors (PyObject * exc )
@@ -774,50 +795,71 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc)
774795}
775796
776797
777- PyObject * PyCodec_ReplaceErrors (PyObject * exc )
798+ // --- handler: 'replace' -----------------------------------------------------
799+
800+ static PyObject *
801+ _PyCodec_ReplaceUnicodeEncodeError (PyObject * exc )
778802{
779803Py_ssize_t start ,end ,slen ;
804+ if (_PyUnicodeError_GetParams (exc ,NULL ,NULL ,
805+ & start ,& end ,& slen , false)< 0 )
806+ {
807+ return NULL ;
808+ }
809+ PyObject * res = PyUnicode_New (slen ,'?' );
810+ if (res == NULL ) {
811+ return NULL ;
812+ }
813+ assert (PyUnicode_KIND (res )== PyUnicode_1BYTE_KIND );
814+ Py_UCS1 * outp = PyUnicode_1BYTE_DATA (res );
815+ memset (outp ,'?' ,sizeof (Py_UCS1 )* slen );
816+ assert (_PyUnicode_CheckConsistency (res ,1 ));
817+ return Py_BuildValue ("(Nn)" ,res ,end );
818+ }
780819
781- if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeEncodeError )) {
782- if (_PyUnicodeError_GetParams (exc ,NULL ,NULL ,
783- & start ,& end ,& slen , false)< 0 ) {
784- return NULL ;
785- }
786- PyObject * res = PyUnicode_New (slen ,'?' );
787- if (res == NULL ) {
788- return NULL ;
789- }
790- assert (PyUnicode_KIND (res )== PyUnicode_1BYTE_KIND );
791- Py_UCS1 * outp = PyUnicode_1BYTE_DATA (res );
792- memset (outp ,'?' ,sizeof (Py_UCS1 )* slen );
793- assert (_PyUnicode_CheckConsistency (res ,1 ));
794- return Py_BuildValue ("(Nn)" ,res ,end );
820+
821+ static PyObject *
822+ _PyCodec_ReplaceUnicodeDecodeError (PyObject * exc )
823+ {
824+ Py_ssize_t end ;
825+ if (PyUnicodeDecodeError_GetEnd (exc ,& end )< 0 ) {
826+ return NULL ;
795827 }
796- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeDecodeError )) {
797- if (_PyUnicodeError_GetParams (exc ,NULL ,NULL ,
798- NULL ,& end ,NULL , true)< 0 ) {
799- return NULL ;
800- }
801- return Py_BuildValue ("(Cn)" ,
802- (int )Py_UNICODE_REPLACEMENT_CHARACTER ,
803- end );
828+ PyObject * res = codec_handler_unicode_replacement_character (1 );
829+ if (res == NULL ) {
830+ return NULL ;
804831 }
805- else if (PyObject_TypeCheck (exc , (PyTypeObject * )PyExc_UnicodeTranslateError )) {
806- if (_PyUnicodeError_GetParams (exc ,NULL ,NULL ,
807- & start ,& end ,& slen , false)< 0 ) {
808- return NULL ;
809- }
810- PyObject * res = PyUnicode_New (slen ,Py_UNICODE_REPLACEMENT_CHARACTER );
811- if (res == NULL ) {
812- return NULL ;
813- }
814- assert (slen == 0 || PyUnicode_KIND (res )== PyUnicode_2BYTE_KIND );
815- Py_UCS2 * outp = PyUnicode_2BYTE_DATA (res );
816- for (Py_ssize_t i = 0 ;i < slen ;++ i ) {
817- outp [i ]= Py_UNICODE_REPLACEMENT_CHARACTER ;
818- }
819- assert (_PyUnicode_CheckConsistency (res ,1 ));
820- return Py_BuildValue ("(Nn)" ,res ,end );
832+ return Py_BuildValue ("(Nn)" ,res ,end );
833+ }
834+
835+
836+ static PyObject *
837+ _PyCodec_ReplaceUnicodeTranslateError (PyObject * exc )
838+ {
839+ Py_ssize_t start ,end ,slen ;
840+ if (_PyUnicodeError_GetParams (exc ,NULL ,NULL ,
841+ & start ,& end ,& slen , false)< 0 )
842+ {
843+ return NULL ;
844+ }
845+ PyObject * res = codec_handler_unicode_replacement_character (slen );
846+ if (res == NULL ) {
847+ return NULL ;
848+ }
849+ return Py_BuildValue ("(Nn)" ,res ,end );
850+ }
851+
852+
853+ PyObject * PyCodec_ReplaceErrors (PyObject * exc )
854+ {
855+ if (_PyIsUnicodeEncodeError (exc )) {
856+ return _PyCodec_ReplaceUnicodeEncodeError (exc );
857+ }
858+ else if (_PyIsUnicodeDecodeError (exc )) {
859+ return _PyCodec_ReplaceUnicodeDecodeError (exc );
860+ }
861+ else if (_PyIsUnicodeTranslateError (exc )) {
862+ return _PyCodec_ReplaceUnicodeTranslateError (exc );
821863 }
822864else {
823865wrong_exception_type (exc );
@@ -1468,7 +1510,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc)
14681510}
14691511
14701512
1471- static PyObject * replace_errors (PyObject * self ,PyObject * exc )
1513+ static inline PyObject *
1514+ replace_errors (PyObject * Py_UNUSED (self ),PyObject * exc )
14721515{
14731516return PyCodec_ReplaceErrors (exc );
14741517}