Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit195cde6

Browse files
authored
Use non-BOM encodings (#2370)
* Use non-BOM encodingsThe documentation of the used `PyUnicode_DecodeUTF16` states that not passing `*byteorder` or passing a 0 results in the first two bytes, ifthey are the BOM (U+FEFF, zero-width no-break space), to be interpreted and skipped, which is incorrect when we convert a known "non BOM" string, which all strings from C# are.
1 parent6a8a97d commit195cde6

File tree

8 files changed

+44
-29
lines changed

8 files changed

+44
-29
lines changed

‎src/embed_tests/TestPyType.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public void CanCreateHeapType()
2828
conststringname="nÁmæ";
2929
conststringdocStr="dÁcæ";
3030

31-
usingvardoc=newStrPtr(docStr,Encoding.UTF8);
31+
usingvardoc=newStrPtr(docStr,Encodings.UTF8);
3232
varspec=newTypeSpec(
3333
name:name,
3434
basicSize:Util.ReadInt32(Runtime.Runtime.PyBaseObjectType,TypeOffset.tp_basicsize),

‎src/runtime/Loader.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ public unsafe static int Initialize(IntPtr data, int size)
1212
{
1313
try
1414
{
15-
vardllPath=Encoding.UTF8.GetString((byte*)data.ToPointer(),size);
15+
vardllPath=Encodings.UTF8.GetString((byte*)data.ToPointer(),size);
1616

1717
if(!string.IsNullOrEmpty(dllPath))
1818
{
@@ -33,15 +33,15 @@ public unsafe static int Initialize(IntPtr data, int size)
3333
);
3434
return1;
3535
}
36-
36+
3737
return0;
3838
}
3939

4040
publicunsafestaticintShutdown(IntPtrdata,intsize)
4141
{
4242
try
4343
{
44-
varcommand=Encoding.UTF8.GetString((byte*)data.ToPointer(),size);
44+
varcommand=Encodings.UTF8.GetString((byte*)data.ToPointer(),size);
4545

4646
if(command=="full_shutdown")
4747
{

‎src/runtime/Native/CustomMarshaler.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public int GetNativeDataSize()
4242
internalclassUcsMarshaler:MarshalerBase
4343
{
4444
internalstaticreadonlyint_UCS=RuntimeInformation.IsOSPlatform(OSPlatform.Windows)?2:4;
45-
internalstaticreadonlyEncodingPyEncoding=_UCS==2?Encoding.Unicode:Encoding.UTF32;
45+
internalstaticreadonlyEncodingPyEncoding=_UCS==2?Encodings.UTF16:Encodings.UTF32;
4646
privatestaticreadonlyMarshalerBaseInstance=newUcsMarshaler();
4747

4848
publicoverrideIntPtrMarshalManagedToNative(objectmanagedObj)

‎src/runtime/Native/NativeTypeSpec.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public NativeTypeSpec(TypeSpec spec)
1717
{
1818
if(specisnull)thrownewArgumentNullException(nameof(spec));
1919

20-
this.Name=newStrPtr(spec.Name,Encoding.UTF8);
20+
this.Name=newStrPtr(spec.Name,Encodings.UTF8);
2121
this.BasicSize=spec.BasicSize;
2222
this.ItemSize=spec.ItemSize;
2323
this.Flags=(int)spec.Flags;

‎src/runtime/PythonTypes/PyType.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public string Name
5353
{
5454
RawPointer=Util.ReadIntPtr(this,TypeOffset.tp_name),
5555
};
56-
returnnamePtr.ToString(System.Text.Encoding.UTF8)!;
56+
returnnamePtr.ToString(Encodings.UTF8)!;
5757
}
5858
}
5959

‎src/runtime/Runtime.cs

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -795,13 +795,13 @@ public static int Py_Main(int argc, string[] argv)
795795

796796
internalstaticintPyRun_SimpleString(stringcode)
797797
{
798-
usingvarcodePtr=newStrPtr(code,Encoding.UTF8);
798+
usingvarcodePtr=newStrPtr(code,Encodings.UTF8);
799799
returnDelegates.PyRun_SimpleStringFlags(codePtr,Utf8String);
800800
}
801801

802802
internalstaticNewReferencePyRun_String(stringcode,RunFlagTypest,BorrowedReferenceglobals,BorrowedReferencelocals)
803803
{
804-
usingvarcodePtr=newStrPtr(code,Encoding.UTF8);
804+
usingvarcodePtr=newStrPtr(code,Encodings.UTF8);
805805
returnDelegates.PyRun_StringFlags(codePtr,st,globals,locals,Utf8String);
806806
}
807807

@@ -813,14 +813,14 @@ internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedR
813813
/// </summary>
814814
internalstaticNewReferencePy_CompileString(stringstr,stringfile,intstart)
815815
{
816-
usingvarstrPtr=newStrPtr(str,Encoding.UTF8);
816+
usingvarstrPtr=newStrPtr(str,Encodings.UTF8);
817817
usingvarfileObj=newPyString(file);
818818
returnDelegates.Py_CompileStringObject(strPtr,fileObj,start,Utf8String,-1);
819819
}
820820

821821
internalstaticNewReferencePyImport_ExecCodeModule(stringname,BorrowedReferencecode)
822822
{
823-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
823+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
824824
returnDelegates.PyImport_ExecCodeModule(namePtr,code);
825825
}
826826

@@ -867,13 +867,13 @@ internal static bool PyObject_IsIterable(BorrowedReference ob)
867867

868868
internalstaticintPyObject_HasAttrString(BorrowedReferencepointer,stringname)
869869
{
870-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
870+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
871871
returnDelegates.PyObject_HasAttrString(pointer,namePtr);
872872
}
873873

874874
internalstaticNewReferencePyObject_GetAttrString(BorrowedReferencepointer,stringname)
875875
{
876-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
876+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
877877
returnDelegates.PyObject_GetAttrString(pointer,namePtr);
878878
}
879879

@@ -884,12 +884,12 @@ internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, S
884884
internalstaticintPyObject_DelAttr(BorrowedReference@object,BorrowedReferencename)=>Delegates.PyObject_SetAttr(@object,name,null);
885885
internalstaticintPyObject_DelAttrString(BorrowedReference@object,stringname)
886886
{
887-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
887+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
888888
returnDelegates.PyObject_SetAttrString(@object,namePtr,null);
889889
}
890890
internalstaticintPyObject_SetAttrString(BorrowedReference@object,stringname,BorrowedReferencevalue)
891891
{
892-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
892+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
893893
returnDelegates.PyObject_SetAttrString(@object,namePtr,value);
894894
}
895895

@@ -1071,7 +1071,7 @@ internal static bool PyBool_CheckExact(BorrowedReference ob)
10711071

10721072
internalstaticNewReferencePyLong_FromString(stringvalue,intradix)
10731073
{
1074-
usingvarvalPtr=newStrPtr(value,Encoding.UTF8);
1074+
usingvarvalPtr=newStrPtr(value,Encodings.UTF8);
10751075
returnDelegates.PyLong_FromString(valPtr,IntPtr.Zero,radix);
10761076
}
10771077

@@ -1252,12 +1252,14 @@ internal static bool PyString_CheckExact(BorrowedReference ob)
12521252

12531253
internalstaticNewReferencePyString_FromString(stringvalue)
12541254
{
1255+
intbyteorder=BitConverter.IsLittleEndian?-1:1;
1256+
int*byteorderPtr=&byteorder;
12551257
fixed(char*ptr=value)
12561258
returnDelegates.PyUnicode_DecodeUTF16(
12571259
(IntPtr)ptr,
12581260
value.Length*sizeof(Char),
12591261
IntPtr.Zero,
1260-
IntPtr.Zero
1262+
(IntPtr)byteorderPtr
12611263
);
12621264
}
12631265

@@ -1272,7 +1274,7 @@ internal static NewReference EmptyPyBytes()
12721274
internalstaticNewReferencePyByteArray_FromStringAndSize(IntPtrstrPtr,nintlen)=>Delegates.PyByteArray_FromStringAndSize(strPtr,len);
12731275
internalstaticNewReferencePyByteArray_FromStringAndSize(strings)
12741276
{
1275-
usingvarptr=newStrPtr(s,Encoding.UTF8);
1277+
usingvarptr=newStrPtr(s,Encodings.UTF8);
12761278
returnPyByteArray_FromStringAndSize(ptr.RawPointer,checked((nint)ptr.ByteCount));
12771279
}
12781280

@@ -1300,7 +1302,7 @@ internal static IntPtr PyBytes_AsString(BorrowedReference ob)
13001302

13011303
internalstaticNewReferencePyUnicode_InternFromString(strings)
13021304
{
1303-
usingvarptr=newStrPtr(s,Encoding.UTF8);
1305+
usingvarptr=newStrPtr(s,Encodings.UTF8);
13041306
returnDelegates.PyUnicode_InternFromString(ptr);
13051307
}
13061308

@@ -1375,7 +1377,7 @@ internal static bool PyDict_Check(BorrowedReference ob)
13751377

13761378
internalstaticBorrowedReferencePyDict_GetItemString(BorrowedReferencepointer,stringkey)
13771379
{
1378-
usingvarkeyStr=newStrPtr(key,Encoding.UTF8);
1380+
usingvarkeyStr=newStrPtr(key,Encodings.UTF8);
13791381
returnDelegates.PyDict_GetItemString(pointer,keyStr);
13801382
}
13811383

@@ -1391,7 +1393,7 @@ internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer
13911393
/// </summary>
13921394
internalstaticintPyDict_SetItemString(BorrowedReferencedict,stringkey,BorrowedReferencevalue)
13931395
{
1394-
usingvarkeyPtr=newStrPtr(key,Encoding.UTF8);
1396+
usingvarkeyPtr=newStrPtr(key,Encodings.UTF8);
13951397
returnDelegates.PyDict_SetItemString(dict,keyPtr,value);
13961398
}
13971399

@@ -1400,7 +1402,7 @@ internal static int PyDict_SetItemString(BorrowedReference dict, string key, Bor
14001402

14011403
internalstaticintPyDict_DelItemString(BorrowedReferencepointer,stringkey)
14021404
{
1403-
usingvarkeyPtr=newStrPtr(key,Encoding.UTF8);
1405+
usingvarkeyPtr=newStrPtr(key,Encodings.UTF8);
14041406
returnDelegates.PyDict_DelItemString(pointer,keyPtr);
14051407
}
14061408

@@ -1515,7 +1517,7 @@ internal static bool PyIter_Check(BorrowedReference ob)
15151517

15161518
internalstaticNewReferencePyModule_New(stringname)
15171519
{
1518-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
1520+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
15191521
returnDelegates.PyModule_New(namePtr);
15201522
}
15211523

@@ -1529,7 +1531,7 @@ internal static NewReference PyModule_New(string name)
15291531
/// <returns>Return -1 on error, 0 on success.</returns>
15301532
internalstaticintPyModule_AddObject(BorrowedReferencemodule,stringname,StolenReferencevalue)
15311533
{
1532-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
1534+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
15331535
IntPtrvalueAddr=value.DangerousGetAddressOrNull();
15341536
intres=Delegates.PyModule_AddObject(module,namePtr,valueAddr);
15351537
// We can't just exit here because the reference is stolen only on success.
@@ -1547,7 +1549,7 @@ internal static int PyModule_AddObject(BorrowedReference module, string name, St
15471549

15481550
internalstaticNewReferencePyImport_ImportModule(stringname)
15491551
{
1550-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
1552+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
15511553
returnDelegates.PyImport_ImportModule(namePtr);
15521554
}
15531555

@@ -1556,7 +1558,7 @@ internal static NewReference PyImport_ImportModule(string name)
15561558

15571559
internalstaticBorrowedReferencePyImport_AddModule(stringname)
15581560
{
1559-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
1561+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
15601562
returnDelegates.PyImport_AddModule(namePtr);
15611563
}
15621564

@@ -1584,13 +1586,13 @@ internal static void PySys_SetArgvEx(int argc, string[] argv, int updatepath)
15841586

15851587
internalstaticBorrowedReferencePySys_GetObject(stringname)
15861588
{
1587-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
1589+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
15881590
returnDelegates.PySys_GetObject(namePtr);
15891591
}
15901592

15911593
internalstaticintPySys_SetObject(stringname,BorrowedReferenceob)
15921594
{
1593-
usingvarnamePtr=newStrPtr(name,Encoding.UTF8);
1595+
usingvarnamePtr=newStrPtr(name,Encodings.UTF8);
15941596
returnDelegates.PySys_SetObject(namePtr,ob);
15951597
}
15961598

@@ -1689,7 +1691,7 @@ internal static IntPtr PyMem_Malloc(long size)
16891691

16901692
internalstaticvoidPyErr_SetString(BorrowedReferenceob,stringmessage)
16911693
{
1692-
usingvarmsgPtr=newStrPtr(message,Encoding.UTF8);
1694+
usingvarmsgPtr=newStrPtr(message,Encodings.UTF8);
16931695
Delegates.PyErr_SetString(ob,msgPtr);
16941696
}
16951697

‎src/runtime/Util/Encodings.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
usingSystem;
2+
usingSystem.Text;
3+
4+
namespacePython.Runtime;
5+
6+
staticclassEncodings{
7+
publicstaticSystem.Text.EncodingUTF8=newUTF8Encoding(false,true);
8+
publicstaticSystem.Text.EncodingUTF16=newUnicodeEncoding(!BitConverter.IsLittleEndian,false,true);
9+
publicstaticSystem.Text.EncodingUTF32=newUTF32Encoding(!BitConverter.IsLittleEndian,false,true);
10+
}

‎tests/test_conversion.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,9 @@ def test_string_conversion():
510510
ob.StringField=System.String(u'\uffff\uffff')
511511
assertob.StringField==u'\uffff\uffff'
512512

513+
ob.StringField=System.String("\ufeffbom")
514+
assertob.StringField=="\ufeffbom"
515+
513516
ob.StringField=None
514517
assertob.StringFieldisNone
515518

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp