Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit12027ad

Browse files
authored
Merge pull request#1467 from pkese/master
Use exclusively PyUnicode_DecodeUTF16 for .NET->Python string conversion
2 parents7d8f754 +49ccc1e commit12027ad

File tree

10 files changed

+42
-49
lines changed

10 files changed

+42
-49
lines changed

‎AUTHORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
- Meinrad Recheis ([@henon](https://github.com/henon))
5656
- Mohamed Koubaa ([@koubaa](https://github.com/koubaa))
5757
- Patrick Stewart ([@patstew](https://github.com/patstew))
58+
- Peter Kese ([@pkese](https://github.com/pkese))
5859
- Raphael Nestler ([@rnestler](https://github.com/rnestler))
5960
- Rickard Holmberg ([@rickardraysearch](https://github.com/rickardraysearch))
6061
- Sam Winstanley ([@swinstanley](https://github.com/swinstanley))

‎CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ One must now either use enum members (e.g. `MyEnum.Option`), or use enum constru
7171
- Exception stacktraces on`PythonException.StackTrace` are now properly formatted
7272
- Providing an invalid type parameter to a generic type or method produces a helpful Python error
7373
- Empty parameter names (as can be generated from F#) do not cause crashes
74+
- Unicode strings with surrogates were truncated when converting from Python
7475

7576
###Removed
7677

‎src/embed_tests/TestCustomMarshal.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public static void GetManagedStringTwice()
2323
{
2424
conststringexpected="FooBar";
2525

26-
IntPtrop=Runtime.Runtime.PyUnicode_FromString(expected);
26+
IntPtrop=Runtime.Runtime.PyString_FromString(expected);
2727
strings1=Runtime.Runtime.GetManagedString(op);
2828
strings2=Runtime.Runtime.GetManagedString(op);
2929

‎src/embed_tests/TestPyString.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,24 @@ public void TestUnicode()
9494
PyObjectactual=newPyString(expected);
9595
Assert.AreEqual(expected,actual.ToString());
9696
}
97+
98+
[Test]
99+
publicvoidTestUnicodeSurrogateToString()
100+
{
101+
varexpected="foo\ud83d\udc3c";
102+
varactual=PythonEngine.Eval("'foo\ud83d\udc3c'");
103+
Assert.AreEqual(4,actual.Length());
104+
Assert.AreEqual(expected,actual.ToString());
105+
}
106+
107+
[Test]
108+
publicvoidTestUnicodeSurrogate()
109+
{
110+
conststringexpected="foo\ud83d\udc3c";// "foo🐼"
111+
PyObjectactual=newPyString(expected);
112+
// python treats "foo🐼" as 4 characters, dotnet as 5
113+
Assert.AreEqual(4,actual.Length());
114+
Assert.AreEqual(expected,actual.ToString());
115+
}
97116
}
98117
}

‎src/embed_tests/TestRuntime.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ public static void Py_IsInitializedValue()
3636
publicstaticvoidRefCountTest()
3737
{
3838
Runtime.Runtime.Py_Initialize();
39-
IntPtrop=Runtime.Runtime.PyUnicode_FromString("FooBar");
39+
IntPtrop=Runtime.Runtime.PyString_FromString("FooBar");
4040

4141
// New object RefCount should be one
4242
Assert.AreEqual(1,Runtime.Runtime.Refcount(op));

‎src/runtime/converter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ internal static IntPtr ToPython(object value, Type type)
221221
returnCLRObject.GetInstHandle(value,type);
222222

223223
caseTypeCode.String:
224-
returnRuntime.PyUnicode_FromString((string)value);
224+
returnRuntime.PyString_FromString((string)value);
225225

226226
caseTypeCode.Int32:
227227
returnRuntime.PyInt_FromInt32((int)value);

‎src/runtime/exceptions.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ internal static Exception ToException(BorrowedReference ob)
5050
{
5151
message=String.Format("{0}()",name);
5252
}
53-
returnRuntime.PyUnicode_FromString(message);
53+
returnRuntime.PyString_FromString(message);
5454
}
5555

5656
/// <summary>
@@ -75,7 +75,7 @@ internal static Exception ToException(BorrowedReference ob)
7575
{
7676
message=message.Substring(fullTypeName.Length);
7777
}
78-
returnRuntime.PyUnicode_FromString(message);
78+
returnRuntime.PyString_FromString(message);
7979
}
8080
}
8181

@@ -153,7 +153,7 @@ internal static void SetArgsAndCause(BorrowedReference ob, Exception e)
153153
if(!string.IsNullOrEmpty(e.Message))
154154
{
155155
args=Runtime.PyTuple_New(1);
156-
IntPtrmsg=Runtime.PyUnicode_FromString(e.Message);
156+
IntPtrmsg=Runtime.PyString_FromString(e.Message);
157157
Runtime.PyTuple_SetItem(args,0,msg);
158158
}
159159
else

‎src/runtime/pystring.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public PyString(PyObject o) : base(FromObject(o))
5151

5252
privatestaticIntPtrFromString(strings)
5353
{
54-
IntPtrval=Runtime.PyUnicode_FromUnicode(s,s.Length);
54+
IntPtrval=Runtime.PyString_FromString(s);
5555
PythonException.ThrowIfIsNull(val);
5656
returnval;
5757
}

‎src/runtime/runtime.cs

Lines changed: 13 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ private static void InitPyMembers()
230230
()=>PyStringType=IntPtr.Zero);
231231
XDecref(op);
232232

233-
op=PyUnicode_FromString("unicode");
233+
op=PyString_FromString("unicode");
234234
SetPyMemberTypeOf(refPyUnicodeType,op,
235235
()=>PyUnicodeType=IntPtr.Zero);
236236
XDecref(op);
@@ -1527,7 +1527,12 @@ internal static bool PyString_Check(IntPtr ob)
15271527
internalstaticIntPtrPyString_FromString(stringvalue)
15281528
{
15291529
fixed(char*ptr=value)
1530-
returnPyUnicode_FromKindAndData(2,(IntPtr)ptr,value.Length);
1530+
returnDelegates.PyUnicode_DecodeUTF16(
1531+
(IntPtr)ptr,
1532+
value.Length*sizeof(Char),
1533+
IntPtr.Zero,
1534+
IntPtr.Zero
1535+
).DangerousMoveToPointerOrNull();
15311536
}
15321537

15331538

@@ -1553,16 +1558,6 @@ internal static long PyBytes_Size(IntPtr op)
15531558

15541559
privatestaticIntPtr_PyBytes_Size(IntPtrop)=>Delegates._PyBytes_Size(op);
15551560

1556-
1557-
internalstaticIntPtrPyUnicode_FromStringAndSize(IntPtrvalue,longsize)
1558-
{
1559-
returnPyUnicode_FromStringAndSize(value,newIntPtr(size));
1560-
}
1561-
1562-
1563-
privatestaticIntPtrPyUnicode_FromStringAndSize(IntPtrvalue,IntPtrsize)=>Delegates.PyUnicode_FromStringAndSize(value,size);
1564-
1565-
15661561
internalstaticIntPtrPyUnicode_AsUTF8(IntPtrunicode)=>Delegates.PyUnicode_AsUTF8(unicode);
15671562

15681563
internalstaticboolPyUnicode_Check(IntPtrob)
@@ -1576,22 +1571,6 @@ internal static bool PyUnicode_Check(IntPtr ob)
15761571

15771572
internalstaticIntPtrPyUnicode_FromEncodedObject(IntPtrob,IntPtrenc,IntPtrerr)=>Delegates.PyUnicode_FromEncodedObject(ob,enc,err);
15781573

1579-
internalstaticIntPtrPyUnicode_FromKindAndData(intkind,IntPtrs,longsize)
1580-
{
1581-
returnPyUnicode_FromKindAndData(kind,s,newIntPtr(size));
1582-
}
1583-
1584-
1585-
privatestaticIntPtrPyUnicode_FromKindAndData(intkind,IntPtrs,IntPtrsize)
1586-
=>Delegates.PyUnicode_FromKindAndData(kind,s,size);
1587-
1588-
internalstaticIntPtrPyUnicode_FromUnicode(strings,longsize)
1589-
{
1590-
fixed(char*ptr=s)
1591-
returnPyUnicode_FromKindAndData(2,(IntPtr)ptr,size);
1592-
}
1593-
1594-
15951574
internalstaticintPyUnicode_GetMax()=>Delegates.PyUnicode_GetMax();
15961575

15971576
internalstaticlongPyUnicode_GetSize(IntPtrob)
@@ -1610,12 +1589,6 @@ internal static long PyUnicode_GetSize(IntPtr ob)
16101589

16111590
internalstaticIntPtrPyUnicode_FromOrdinal(intc)=>Delegates.PyUnicode_FromOrdinal(c);
16121591

1613-
internalstaticIntPtrPyUnicode_FromString(strings)
1614-
{
1615-
returnPyUnicode_FromUnicode(s,s.Length);
1616-
}
1617-
1618-
16191592
internalstaticIntPtrPyUnicode_InternFromString(strings)
16201593
{
16211594
usingvarptr=newStrPtr(s,Encoding.UTF8);
@@ -1646,11 +1619,12 @@ internal static string GetManagedString(IntPtr op)
16461619
if(type==PyUnicodeType)
16471620
{
16481621
usingvarp=PyUnicode_AsUTF16String(newBorrowedReference(op));
1649-
intlength=(int)PyUnicode_GetSize(op);
1650-
char*codePoints=(char*)PyBytes_AsString(p.DangerousGetAddress());
1622+
varbytesPtr=p.DangerousGetAddress();
1623+
intbytesLength=(int)Runtime.PyBytes_Size(bytesPtr);
1624+
char*codePoints=(char*)PyBytes_AsString(bytesPtr);
16511625
returnnewstring(codePoints,
16521626
startIndex:1,// skip BOM
1653-
length:length);
1627+
length:bytesLength/2-1);// utf16 - BOM
16541628
}
16551629

16561630
returnnull;
@@ -2442,11 +2416,10 @@ static Delegates()
24422416
PyBytes_AsString=(delegate* unmanaged[Cdecl]<BorrowedReference,IntPtr>)GetFunctionByName(nameof(PyBytes_AsString),GetUnmanagedDll(_PythonDll));
24432417
PyBytes_FromString=(delegate* unmanaged[Cdecl]<IntPtr,IntPtr>)GetFunctionByName(nameof(PyBytes_FromString),GetUnmanagedDll(_PythonDll));
24442418
_PyBytes_Size=(delegate* unmanaged[Cdecl]<IntPtr,IntPtr>)GetFunctionByName("PyBytes_Size",GetUnmanagedDll(_PythonDll));
2445-
PyUnicode_FromStringAndSize=(delegate* unmanaged[Cdecl]<IntPtr,IntPtr,IntPtr>)GetFunctionByName(nameof(PyUnicode_FromStringAndSize),GetUnmanagedDll(_PythonDll));
24462419
PyUnicode_AsUTF8=(delegate* unmanaged[Cdecl]<IntPtr,IntPtr>)GetFunctionByName(nameof(PyUnicode_AsUTF8),GetUnmanagedDll(_PythonDll));
24472420
PyUnicode_FromObject=(delegate* unmanaged[Cdecl]<IntPtr,IntPtr>)GetFunctionByName(nameof(PyUnicode_FromObject),GetUnmanagedDll(_PythonDll));
2421+
PyUnicode_DecodeUTF16=(delegate* unmanaged[Cdecl]<IntPtr,nint,IntPtr,IntPtr,NewReference>)GetFunctionByName(nameof(PyUnicode_DecodeUTF16),GetUnmanagedDll(_PythonDll));
24482422
PyUnicode_FromEncodedObject=(delegate* unmanaged[Cdecl]<IntPtr,IntPtr,IntPtr,IntPtr>)GetFunctionByName(nameof(PyUnicode_FromEncodedObject),GetUnmanagedDll(_PythonDll));
2449-
PyUnicode_FromKindAndData=(delegate* unmanaged[Cdecl]<int,IntPtr,IntPtr,IntPtr>)GetFunctionByName(nameof(PyUnicode_FromKindAndData),GetUnmanagedDll(_PythonDll));
24502423
PyUnicode_GetMax=(delegate* unmanaged[Cdecl]<int>)GetFunctionByName(nameof(PyUnicode_GetMax),GetUnmanagedDll(_PythonDll));
24512424
_PyUnicode_GetSize=(delegate* unmanaged[Cdecl]<IntPtr,IntPtr>)GetFunctionByName("PyUnicode_GetSize",GetUnmanagedDll(_PythonDll));
24522425
PyUnicode_AsUnicode=(delegate* unmanaged[Cdecl]<IntPtr,IntPtr>)GetFunctionByName(nameof(PyUnicode_AsUnicode),GetUnmanagedDll(_PythonDll));
@@ -2738,11 +2711,10 @@ static Delegates()
27382711
internalstaticdelegate* unmanaged[Cdecl]<BorrowedReference,IntPtr>PyBytes_AsString{get;}
27392712
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,IntPtr>PyBytes_FromString{get;}
27402713
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,IntPtr>_PyBytes_Size{get;}
2741-
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,IntPtr,IntPtr>PyUnicode_FromStringAndSize{get;}
27422714
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,IntPtr>PyUnicode_AsUTF8{get;}
27432715
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,IntPtr>PyUnicode_FromObject{get;}
27442716
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,IntPtr,IntPtr,IntPtr>PyUnicode_FromEncodedObject{get;}
2745-
internalstaticdelegate* unmanaged[Cdecl]<int,IntPtr,IntPtr,IntPtr>PyUnicode_FromKindAndData{get;}
2717+
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,nint,IntPtr,IntPtr,NewReference>PyUnicode_DecodeUTF16{get;}
27462718
internalstaticdelegate* unmanaged[Cdecl]<int>PyUnicode_GetMax{get;}
27472719
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,IntPtr>_PyUnicode_GetSize{get;}
27482720
internalstaticdelegate* unmanaged[Cdecl]<IntPtr,IntPtr>PyUnicode_AsUnicode{get;}

‎src/runtime/typemanager.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ internal static IntPtr AllocateTypeObject(string name, IntPtr metatype)
580580
// Cheat a little: we'll set tp_name to the internal char * of
581581
// the Python version of the type name - otherwise we'd have to
582582
// allocate the tp_name and would have no way to free it.
583-
IntPtrtemp=Runtime.PyUnicode_FromString(name);
583+
IntPtrtemp=Runtime.PyString_FromString(name);
584584
IntPtrraw=Runtime.PyUnicode_AsUTF8(temp);
585585
Marshal.WriteIntPtr(type,TypeOffset.tp_name,raw);
586586
Marshal.WriteIntPtr(type,TypeOffset.name,temp);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp