- Notifications
You must be signed in to change notification settings - Fork14.1k
Description
For some reason, the__addkf3 symbol linked by rustc says that0x00000000000000000000000000000000 + 0x00000000000000000000000000000001 = 0x00000000000000000000000000000000, when the answer should be0x00000000000000000000000000000001. Other symbols likely do the wrong thing here too.
Example:
$ powerpc64-linux-gnu-gcc add_test.c -o add_test.c.ppc64$ rustc add_test.rs -o add_test.rs.ppc64 --target powerpc64-unknown-linux-gnu -Clinker=powerpc64-linux-gnu-gcc$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ add_test.c.ppc64$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ add_test.rs.ppc64$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ add_test.c.ppc640000000000000000000000000000000000 0.0000000000000000000000000000000000000001 0.0000000000000000000000000000000000000001 0.000000$ qemu-ppc64 -L /usr/powerpc64-linux-gnu/ add_test.rs.ppc64[add_test.rs:12:5] a = 0x00000000000000000000000000000000[add_test.rs:12:5] b = 0x00000000000000000000000000000001[add_test.rs:14:5] c = 0x00000000000000000000000000000000
Our Rust code emits__addkf3 which calls the hardware f128 addition routinexsaddup. The part I cannot explain is that the C version also emits__addkf3 whichalso callsxsaddup, but somehow produces a correct result. I checked with GDB to make sure they are both hittingxsaddup, so can't really explain the discrepancy.
Original notes atrust-lang/compiler-builtins#606 (comment), discussion on Zulip athttps://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/f128.20system.20libraries.20noncompliant.20platforms/near/438486364.
Full code copied fromrust-lang/compiler-builtins#606 (comment):
Rust code
#![feature(f128)]#[no_mangle]#[inline(never)]fnadd_entry(a:f128,b:f128) ->f128{ a + b}fnmain(){let a = f128::from_bits(0x0);let b = f128::from_bits(0x1);dbg!(a, b);let c =add_entry(a, b);dbg!(c);}
C code
#include<stdio.h>#include<stdlib.h>#include<inttypes.h>#define_Float128 __float128typedefstruct {#if__BYTE_ORDER==__LITTLE_ENDIANuint64_tlower,upper;#elif__BYTE_ORDER==__BIG_ENDIANuint64_tupper,lower;#else#error missing endian check#endif} __attribute__((aligned(_Alignof(_Float128))))u128;_Float128__addkf3(_Float128,_Float128);voidf128_print(_Float128val) {u128ival=*((u128*)(&val));printf("%#018"PRIx64"%016"PRIx64" %lf\n",ival.upper,ival.lower, (double)val);}_Float128new_f128(uint64_tupper,uint64_tlower) {u128val= { .lower=lower, .upper=upper };return*((_Float128*)(&val));}_Float128add_entry(_Float128a,_Float128b) {#ifdefUSE_ADDKF3return__addkf3(a,b);#elsereturna+b;#endif}intmain() {_Float128a=new_f128(0x0000000000000000,0x0000000000000000);_Float128b=new_f128(0x0000000000000000,0x0000000000000001);f128_print(a);f128_print(b);_Float128c=add_entry(a,b);f128_print(c);return0;}
Assembly generated from Rust (incorrect result)
0000000000010d00 <.add_entry>: 10d00: 7c0802 a6 mflr r0 10d04: f821 ff91 stdu r1,-112(r1) 10d08: f8010080std r0,128(r1) 10d0c: 4b ff c695bl d3a0 <00000143.plt_call.__addkf3> 10d10: e8410028 ld r2,40(r1) 10d14:38210070 addi r1,r1,112 10d18: e8010010 ld r0,16(r1) 10d1c: 7c0803 a6 mtlr r0 10d20: 4e800020 blr000000000000d3a0 <00000143.plt_call.__addkf3>: d3a0: f8410028std r2,40(r1) d3a4: 3d62 ff ff addisr11,r2,-1 d3a8: e9 8b 7f58 ldr12,32600(r11) d3ac: 7d8903 a6 mtctrr12 d3b0: e8 4b 7f60 ld r2,32608(r11) d3b4: 4e800420 bctr0000000000056790 <.__addkf3_resolve>:56790:81 2d 8f 9c lwzr9,-28772(r13)56794:75290040 andis.r9,r9,6456798:41820018 beq 567b0 <.__addkf3_resolve+0x20> 5679c: e8628010 ld r3,-32752(r2) 567a0: 4e800020 blr 567a4:60000000nop 567a8:60000000nop 567ac:60420000 ori r2,r2,0 567b0: e8628018 ld r3,-32744(r2) 567b4: 4e800020 blr ... 567c4:60000000nop 567c8:60000000nop 567cc:60420000 ori r2,r2,0000000000005e6e0 <.__addkf3_hw>: 5e6e0: fc421808 xsaddqp v2,v2,v3 5e6e4: 4e800020 blr ... 5e6f4:60000000nop 5e6f8:60000000nop 5e6fc:60000000nop0000000000057050 <.__addkf3_sw>:57050: fb41 ff d0std r26,-48(r1)57054: fb61 ff d8std r27,-40(r1)57058: fb81 ff e0std r28,-32(r1) 5705c: fb a1 ff e8std r29,-24(r1)57060: fb c1 ff f0std r30,-16(r1)57064: fb e1 ff f8std r31,-8(r1)57068: f821 ff41 stdu r1,-192(r1) 5706c:39210070 addir9,r1,11257070:39410070 addir10,r1,112 // ... full sw implementation
Assembly generated from C (correct result)
0000000010000af8 <.add_entry>: 10000af8: 7c0802 a6 mflr r0 10000afc: f8010010std r0,16(r1) 10000b00: fb e1 ff f8std r31,-8(r1) 10000b04: f821 ff81 stdu r1,-128(r1) 10000b08: 7c 3f0b78 mr r31,r1 10000b0c:39200030 lir9,48 10000b10:39 5f0080 addir10,r31,128 10000b14: 7c 4a 4f99 stxvd2x vs34,r10,r9 10000b18:39200040 lir9,64 10000b1c:39 5f0080 addir10,r31,128 10000b20: 7c 6a 4f99 stxvd2x vs35,r10,r9 10000b24:39200040 lir9,64 10000b28:39 5f0080 addir10,r31,128 10000b2c: 7c 6a 4e99 lxvd2x vs35,r10,r9 10000b30:39200030 lir9,48 10000b34:39 5f0080 addir10,r31,128 10000b38: 7c 4a 4e99 lxvd2x vs34,r10,r9 10000b3c: 4b ff fb a5bl 100006e0 <00000019.plt_call.__addkf3> 10000b40: e8410028 ld r2,40(r1) 10000b44: f0021496 xxmr vs0,vs34 10000b48: f0400491 xxmr vs34,vs0 10000b4c:38 3f0080 addi r1,r31,128 10000b50: e8010010 ld r0,16(r1) 10000b54: 7c0803 a6 mtlr r0 10000b58: eb e1 ff f8 ld r31,-8(r1) 10000b5c: 4e800020 blr 10000b60:00000000 .long0x0 10000b64:00000001 .long0x1 10000b68:80010001 lwz r0,1(r1)0000000010000c40 <.__addkf3_resolve>: 10000c40:81 2d 8f 9c lwzr9,-28772(r13) 10000c44:75290040 andis.r9,r9,64 10000c48:41820018 beq 10000c60 <.__addkf3_resolve+0x20> 10000c4c: e8628010 ld r3,-32752(r2) 10000c50: 4e800020 blr 10000c54:60000000nop 10000c58:60000000nop 10000c5c:60420000 ori r2,r2,0 10000c60: e8628018 ld r3,-32744(r2) 10000c64: 4e800020 blr ... 10000c74:60000000nop 10000c78:60000000nop 10000c7c:60420000 ori r2,r2,00000000010008b90 <.__addkf3_hw>: 10008b90: fc421808 xsaddqp v2,v2,v3 10008b94: 4e800020 blr ... 10008ba4:60000000nop 10008ba8:60000000nop 10008bac:60000000nop0000000010001500 <.__addkf3_sw>:10001500: fb41 ff d0std r26,-48(r1)10001504: fb61 ff d8std r27,-40(r1)10001508: fb81 ff e0std r28,-32(r1) 1000150c: fb a1 ff e8std r29,-24(r1)10001510: fb c1 ff f0std r30,-16(r1)10001514: fb e1 ff f8std r31,-8(r1)10001518: f821 ff41 stdu r1,-192(r1) 1000151c:39210070 addir9,r1,11210001520:39410070 addir10,r1,112 // ...