Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit965b68e

Browse files
authored
[NVPTX] Prevent fptrunc of v2f32 from being folded into store (#149571)
1 parentb02787d commit965b68e

File tree

4 files changed

+48
-5
lines changed

4 files changed

+48
-5
lines changed

‎llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
731731
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
732732
setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
733733
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
734+
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
735+
setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand);
734736

735737
// PTX does not support load / store predicate registers
736738
setOperationAction(ISD::LOAD, MVT::i1, Custom);

‎llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,11 +359,12 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b,
359359
define <2 x bfloat>@test_fptrunc_2xfloat(<2 xfloat>%a) #0 {
360360
; CHECK-LABEL: test_fptrunc_2xfloat(
361361
; CHECK: {
362-
; CHECK-NEXT: .reg .b64 %rd<2>;
362+
; CHECK-NEXT: .reg .b32 %r<4>;
363363
; CHECK-EMPTY:
364364
; CHECK-NEXT: // %bb.0:
365-
; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0];
366-
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
365+
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0];
366+
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1;
367+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
367368
; CHECK-NEXT: ret;
368369
%r =fptrunc <2 xfloat>%ato <2 x bfloat>
369370
ret <2 x bfloat>%r

‎llvm/test/CodeGen/NVPTX/f16x2-instructions.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,11 +1499,16 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
14991499
define <2 xhalf>@test_fptrunc_2xfloat(<2 xfloat>%a) #0 {
15001500
; CHECK-LABEL: test_fptrunc_2xfloat(
15011501
; CHECK: {
1502+
; CHECK-NEXT: .reg .b16 %rs<3>;
1503+
; CHECK-NEXT: .reg .b32 %r<4>;
15021504
; CHECK-NEXT: .reg .b64 %rd<2>;
15031505
; CHECK-EMPTY:
15041506
; CHECK-NEXT: // %bb.0:
1505-
; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0];
1506-
; CHECK-NEXT: st.param.b32 [func_retval0], %rd1;
1507+
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptrunc_2xfloat_param_0];
1508+
; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %r2;
1509+
; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r1;
1510+
; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1};
1511+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
15071512
; CHECK-NEXT: ret;
15081513
%r =fptrunc <2 xfloat>%ato <2 xhalf>
15091514
ret <2 xhalf>%r

‎llvm/test/CodeGen/NVPTX/f32x2-instructions.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2108,6 +2108,41 @@ define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 {
21082108
ret <2 xfloat>%r
21092109
}
21102110

2111+
definevoid@test_trunc_to_v2bf16(<2 xfloat>%a,ptr%p) {
2112+
; CHECK-LABEL: test_trunc_to_v2bf16(
2113+
; CHECK: {
2114+
; CHECK-NEXT: .reg .b32 %r<4>;
2115+
; CHECK-NEXT: .reg .b64 %rd<3>;
2116+
; CHECK-EMPTY:
2117+
; CHECK-NEXT: // %bb.0:
2118+
; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2bf16_param_1];
2119+
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2bf16_param_0];
2120+
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1;
2121+
; CHECK-NEXT: st.b32 [%rd2], %r3;
2122+
; CHECK-NEXT: ret;
2123+
%trunc =fptrunc <2 xfloat>%ato <2 x bfloat>
2124+
store <2 x bfloat>%trunc,ptr%p
2125+
retvoid
2126+
}
2127+
2128+
definevoid@test_trunc_to_v2f16(<2 xfloat>%a,ptr%p) {
2129+
; CHECK-LABEL: test_trunc_to_v2f16(
2130+
; CHECK: {
2131+
; CHECK-NEXT: .reg .b32 %r<4>;
2132+
; CHECK-NEXT: .reg .b64 %rd<3>;
2133+
; CHECK-EMPTY:
2134+
; CHECK-NEXT: // %bb.0:
2135+
; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2f16_param_1];
2136+
; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_to_v2f16_param_0];
2137+
; CHECK-NEXT: cvt.rn.f16x2.f32 %r3, %r2, %r1;
2138+
; CHECK-NEXT: st.b32 [%rd2], %r3;
2139+
; CHECK-NEXT: ret;
2140+
%trunc =fptrunc <2 xfloat>%ato <2 xhalf>
2141+
store <2 xhalf>%trunc,ptr%p
2142+
retvoid
2143+
}
2144+
2145+
21112146
attributes #0 = {nounwind }
21122147
attributes #1 = {"unsafe-fp-math" ="true" }
21132148
attributes #2 = {"denormal-fp-math"="preserve-sign" }

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp