Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit863bd97

Browse files
authored
Merge branch 'main' into multi_inputs
2 parents1ce22c0 +25a614a commit863bd97

File tree

3 files changed

+69
-66
lines changed

3 files changed

+69
-66
lines changed

‎Lib/test/test_capi/test_codecs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,8 @@ def test_codec_xmlcharrefreplace_errors_handler(self):
849849

850850
deftest_codec_backslashreplace_errors_handler(self):
851851
handler=_testcapi.codec_backslashreplace_errors
852-
self.do_test_codec_errors_handler(handler,self.all_unicode_errors)
852+
self.do_test_codec_errors_handler(handler,self.all_unicode_errors,
853+
safe=True)
853854

854855
deftest_codec_namereplace_errors_handler(self):
855856
handler=_testlimitedcapi.codec_namereplace_errors
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix handling of:attr:`UnicodeError.start` and:attr:`UnicodeError.end`
2+
values in the:func:`codecs.backslashreplace_errors` error handler. Patch by
3+
Bénédikt Tran.

‎Python/codecs.c

Lines changed: 64 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -864,108 +864,107 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
864864

865865
PyObject*PyCodec_BackslashReplaceErrors(PyObject*exc)
866866
{
867-
PyObject*object;
868-
Py_ssize_ti;
869-
Py_ssize_tstart;
870-
Py_ssize_tend;
871-
PyObject*res;
872-
Py_UCS1*outp;
873-
intressize;
874-
Py_UCS4c;
875-
867+
PyObject*obj;
868+
Py_ssize_tobjlen,start,end,slen;
876869
if (PyObject_TypeCheck(exc, (PyTypeObject*)PyExc_UnicodeDecodeError)) {
877-
constunsignedchar*p;
878-
if (PyUnicodeDecodeError_GetStart(exc,&start))
879-
returnNULL;
880-
if (PyUnicodeDecodeError_GetEnd(exc,&end))
881-
returnNULL;
882-
if (!(object=PyUnicodeDecodeError_GetObject(exc)))
870+
if (_PyUnicodeError_GetParams(exc,
871+
&obj,&objlen,
872+
&start,&end,&slen, true)<0)
873+
{
883874
returnNULL;
884-
p= (constunsignedchar*)PyBytes_AS_STRING(object);
885-
res=PyUnicode_New(4*(end-start),127);
875+
}
876+
PyObject*res=PyUnicode_New(4*slen,127);
886877
if (res==NULL) {
887-
Py_DECREF(object);
878+
Py_DECREF(obj);
888879
returnNULL;
889880
}
890-
outp=PyUnicode_1BYTE_DATA(res);
891-
for (i=start;i<end;i++,outp+=4) {
892-
unsignedcharc=p[i];
881+
Py_UCS1*outp=PyUnicode_1BYTE_DATA(res);
882+
constunsignedchar*p= (constunsignedchar*)PyBytes_AS_STRING(obj);
883+
for (Py_ssize_ti=start;i<end;i++,outp+=4) {
884+
constunsignedcharch=p[i];
893885
outp[0]='\\';
894886
outp[1]='x';
895-
outp[2]=Py_hexdigits[(c>>4)&0xf];
896-
outp[3]=Py_hexdigits[c&0xf];
887+
outp[2]=Py_hexdigits[(ch >>4)&0xf];
888+
outp[3]=Py_hexdigits[ch&0xf];
897889
}
898-
899890
assert(_PyUnicode_CheckConsistency(res,1));
900-
Py_DECREF(object);
891+
Py_DECREF(obj);
901892
returnPy_BuildValue("(Nn)",res,end);
902893
}
903-
if (PyObject_TypeCheck(exc, (PyTypeObject*)PyExc_UnicodeEncodeError)) {
904-
if (PyUnicodeEncodeError_GetStart(exc,&start))
905-
returnNULL;
906-
if (PyUnicodeEncodeError_GetEnd(exc,&end))
907-
returnNULL;
908-
if (!(object=PyUnicodeEncodeError_GetObject(exc)))
909-
returnNULL;
910-
}
911-
elseif (PyObject_TypeCheck(exc, (PyTypeObject*)PyExc_UnicodeTranslateError)) {
912-
if (PyUnicodeTranslateError_GetStart(exc,&start))
913-
returnNULL;
914-
if (PyUnicodeTranslateError_GetEnd(exc,&end))
915-
returnNULL;
916-
if (!(object=PyUnicodeTranslateError_GetObject(exc)))
894+
895+
if (
896+
PyObject_TypeCheck(exc, (PyTypeObject*)PyExc_UnicodeEncodeError)
897+
||PyObject_TypeCheck(exc, (PyTypeObject*)PyExc_UnicodeTranslateError)
898+
) {
899+
if (_PyUnicodeError_GetParams(exc,
900+
&obj,&objlen,
901+
&start,&end,&slen, false)<0)
902+
{
917903
returnNULL;
904+
}
918905
}
919906
else {
920907
wrong_exception_type(exc);
921908
returnNULL;
922909
}
923910

924-
if (end-start>PY_SSIZE_T_MAX / (1+1+8))
925-
end=start+PY_SSIZE_T_MAX / (1+1+8);
926-
for (i=start,ressize=0;i<end;++i) {
911+
// The number of characters that each character 'ch' contributes
912+
// in the result is 1 + 1 + k, where k >= min{t >= 1 | 16^t > ch}
913+
// and will be formatted as "\\" + ('U'|'u'|'x') + HEXDIGITS,
914+
// where the number of hexdigits is either 2, 4, or 8 (not 6).
915+
// Since the Unicode range is below 10^7, we choose k = 8 whence
916+
// each "block" requires at most 1 + 1 + 8 characters.
917+
if (slen>PY_SSIZE_T_MAX / (1+1+8)) {
918+
end=start+PY_SSIZE_T_MAX / (1+1+8);
919+
end=Py_MIN(end,objlen);
920+
slen=Py_MAX(0,end-start);
921+
}
922+
923+
Py_ssize_tressize=0;
924+
for (Py_ssize_ti=start;i<end;++i) {
927925
/* object is guaranteed to be "ready" */
928-
c=PyUnicode_READ_CHAR(object,i);
926+
Py_UCS4c=PyUnicode_READ_CHAR(obj,i);
929927
if (c >=0x10000) {
930-
ressize+=1+1+8;
928+
ressize+=1+1+8;
931929
}
932930
elseif (c >=0x100) {
933-
ressize+=1+1+4;
931+
ressize+=1+1+4;
932+
}
933+
else {
934+
ressize+=1+1+2;
934935
}
935-
else
936-
ressize+=1+1+2;
937936
}
938-
res=PyUnicode_New(ressize,127);
937+
PyObject*res=PyUnicode_New(ressize,127);
939938
if (res==NULL) {
940-
Py_DECREF(object);
939+
Py_DECREF(obj);
941940
returnNULL;
942941
}
943-
outp=PyUnicode_1BYTE_DATA(res);
944-
for (i=start;i<end;++i) {
945-
c=PyUnicode_READ_CHAR(object,i);
942+
Py_UCS1*outp=PyUnicode_1BYTE_DATA(res);
943+
for (Py_ssize_ti=start;i<end;++i) {
944+
Py_UCS4c=PyUnicode_READ_CHAR(obj,i);
946945
*outp++='\\';
947946
if (c >=0x00010000) {
948947
*outp++='U';
949-
*outp++=Py_hexdigits[(c>>28)&0xf];
950-
*outp++=Py_hexdigits[(c>>24)&0xf];
951-
*outp++=Py_hexdigits[(c>>20)&0xf];
952-
*outp++=Py_hexdigits[(c>>16)&0xf];
953-
*outp++=Py_hexdigits[(c>>12)&0xf];
954-
*outp++=Py_hexdigits[(c>>8)&0xf];
948+
*outp++=Py_hexdigits[(c >>28)&0xf];
949+
*outp++=Py_hexdigits[(c >>24)&0xf];
950+
*outp++=Py_hexdigits[(c >>20)&0xf];
951+
*outp++=Py_hexdigits[(c >>16)&0xf];
952+
*outp++=Py_hexdigits[(c >>12)&0xf];
953+
*outp++=Py_hexdigits[(c >>8)&0xf];
955954
}
956955
elseif (c >=0x100) {
957956
*outp++='u';
958-
*outp++=Py_hexdigits[(c>>12)&0xf];
959-
*outp++=Py_hexdigits[(c>>8)&0xf];
957+
*outp++=Py_hexdigits[(c >>12)&0xf];
958+
*outp++=Py_hexdigits[(c >>8)&0xf];
960959
}
961-
else
960+
else {
962961
*outp++='x';
963-
*outp++=Py_hexdigits[(c>>4)&0xf];
964-
*outp++=Py_hexdigits[c&0xf];
962+
}
963+
*outp++=Py_hexdigits[(c >>4)&0xf];
964+
*outp++=Py_hexdigits[c&0xf];
965965
}
966-
967966
assert(_PyUnicode_CheckConsistency(res,1));
968-
Py_DECREF(object);
967+
Py_DECREF(obj);
969968
returnPy_BuildValue("(Nn)",res,end);
970969
}
971970

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp