[llvm] [NVPTX] don't erase CopyToRegs when folding movs into loads (PR #149393)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 12:57:27 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Princeton Ferro (Prince781)
<details>
<summary>Changes</summary>
We may still need to keep CopyToReg even after folding uses into vector loads, since the original register may be used in other blocks.
Partially reverts 1fdbe6984976d9e85ab3b1a93e8de434a85c5646
---
Patch is 91.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149393.diff
6 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+3-9)
- (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+139-38)
- (modified) llvm/test/CodeGen/NVPTX/f32x2-instructions.ll (+146-29)
- (modified) llvm/test/CodeGen/NVPTX/i16x2-instructions.ll (+45-12)
- (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+4)
- (modified) llvm/test/CodeGen/NVPTX/reduction-intrinsics.ll (+48)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d017c658c53a3..967a640f2842f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -5008,11 +5008,9 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return !U.getUser()->use_empty();
}
- // Handle CopyToReg nodes that will become dead after our replacement
- if (U.getUser()->getOpcode() == ISD::CopyToReg) {
- DeadCopyToRegs.push_back(U.getUser());
+ // Peek through CopyToReg nodes
+ if (U.getUser()->getOpcode() == ISD::CopyToReg)
return true;
- }
// Otherwise, this use prevents us from splitting a value.
return false;
@@ -5080,10 +5078,6 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
for (unsigned I : seq(NewLoad->getNumValues() - NewNumOutputs))
Results.push_back(NewLoad.getValue(NewNumOutputs + I));
- // Remove dead CopyToReg nodes by folding them into the chain they reference
- for (SDNode *CTR : DeadCopyToRegs)
- DCI.CombineTo(CTR, CTR->getOperand(0));
-
return DCI.DAG.getMergeValues(Results, DL);
}
@@ -6420,4 +6414,4 @@ void NVPTXTargetLowering::computeKnownBitsForTargetNode(
default:
break;
}
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index d0e2c1817f696..66166756aecf9 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -50,6 +50,7 @@ define half @test_extract_0(<2 x half> %a) #0 {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_0_param_0];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
; CHECK-NEXT: ret;
%e = extractelement <2 x half> %a, i32 0
@@ -64,6 +65,7 @@ define half @test_extract_1(<2 x half> %a) #0 {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_1_param_0];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: st.param.b16 [func_retval0], %rs2;
; CHECK-NEXT: ret;
%e = extractelement <2 x half> %a, i32 1
@@ -79,8 +81,9 @@ define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_i_param_0];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
; CHECK-NEXT: setp.eq.b64 %p1, %rd1, 0;
; CHECK-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
@@ -108,7 +111,9 @@ define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fadd_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: add.rn.f32 %r5, %r4, %r3;
@@ -144,6 +149,7 @@ define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_imm_0_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
; CHECK-NOF16-NEXT: add.rn.f32 %r3, %r2, 0f40000000;
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3;
@@ -176,6 +182,7 @@ define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_imm_1_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
; CHECK-NOF16-NEXT: add.rn.f32 %r3, %r2, 0f40000000;
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3;
@@ -208,7 +215,9 @@ define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fsub_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fsub_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: sub.rn.f32 %r5, %r4, %r3;
@@ -243,6 +252,7 @@ define <2 x half> @test_fneg(<2 x half> %a) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fneg_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
; CHECK-NOF16-NEXT: mov.b32 %r3, 0f00000000;
; CHECK-NOF16-NEXT: sub.rn.f32 %r4, %r3, %r2;
@@ -276,7 +286,9 @@ define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fmul_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fmul_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: mul.rn.f32 %r5, %r4, %r3;
@@ -300,7 +312,9 @@ define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fdiv_param_0];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fdiv_param_1];
+; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NEXT: div.rn.f32 %r5, %r4, %r3;
@@ -332,7 +346,9 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_frem_param_0];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_frem_param_1];
+; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NEXT: div.rn.f32 %r5, %r4, %r3;
@@ -533,11 +549,13 @@ define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; CHECK-F16-NEXT: .reg .b32 %r<5>;
; CHECK-F16-EMPTY:
; CHECK-F16-NEXT: // %bb.0:
+; CHECK-F16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0];
+; CHECK-F16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-F16-NEXT: ld.param.b32 %r4, [test_select_cc_param_3];
; CHECK-F16-NEXT: ld.param.b32 %r3, [test_select_cc_param_2];
-; CHECK-F16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0];
-; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r3, %r4;
; CHECK-F16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_1];
+; CHECK-F16-NEXT: mov.b32 %r2, {%rs3, %rs4};
+; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r3, %r4;
; CHECK-F16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2;
; CHECK-F16-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1;
; CHECK-F16-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -551,15 +569,19 @@ define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_3];
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3;
+; CHECK-NOF16-NEXT: mov.b32 %r4, {%rs3, %rs4};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_select_cc_param_2];
+; CHECK-NOF16-NEXT: mov.b32 %r3, {%rs5, %rs6};
+; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs7, %rs8}, [test_select_cc_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs7, %rs8};
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs5;
; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r6, %r5;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs6;
; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r8, %r7;
-; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs7, %rs8}, [test_select_cc_param_1];
; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs2, %rs8, %p2;
; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs7, %p1;
; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs9};
@@ -577,11 +599,13 @@ define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
; CHECK-F16-NEXT: .reg .b64 %rd<3>;
; CHECK-F16-EMPTY:
; CHECK-F16-NEXT: // %bb.0:
+; CHECK-F16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_0];
+; CHECK-F16-NEXT: mov.b64 %rd1, {%r3, %r4};
; CHECK-F16-NEXT: ld.param.b32 %r2, [test_select_cc_f32_f16_param_3];
; CHECK-F16-NEXT: ld.param.b32 %r1, [test_select_cc_f32_f16_param_2];
-; CHECK-F16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_0];
-; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r1, %r2;
; CHECK-F16-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f32_f16_param_1];
+; CHECK-F16-NEXT: mov.b64 %rd2, {%r5, %r6};
+; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r1, %r2;
; CHECK-F16-NEXT: selp.f32 %r7, %r4, %r6, %p2;
; CHECK-F16-NEXT: selp.f32 %r8, %r3, %r5, %p1;
; CHECK-F16-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7};
@@ -596,17 +620,21 @@ define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_0];
+; CHECK-NOF16-NEXT: mov.b64 %rd1, {%r3, %r4};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_f32_f16_param_3];
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_f32_f16_param_2];
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3;
-; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r6, %r5;
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs2;
-; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs4;
-; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r8, %r7;
-; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r9, %r10}, [test_select_cc_f32_f16_param_1];
-; CHECK-NOF16-NEXT: selp.f32 %r11, %r4, %r10, %p2;
-; CHECK-NOF16-NEXT: selp.f32 %r12, %r3, %r9, %p1;
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs3, %rs4};
+; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f32_f16_param_1];
+; CHECK-NOF16-NEXT: mov.b64 %rd2, {%r5, %r6};
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
+; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r8, %r7;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs2;
+; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs4;
+; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r10, %r9;
+; CHECK-NOF16-NEXT: selp.f32 %r11, %r4, %r6, %p2;
+; CHECK-NOF16-NEXT: selp.f32 %r12, %r3, %r5, %p1;
; CHECK-NOF16-NEXT: st.param.v2.b32 [func_retval0], {%r12, %r11};
; CHECK-NOF16-NEXT: ret;
<2 x half> %c, <2 x half> %d) #0 {
@@ -625,11 +653,15 @@ define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_f16_f32_param_0];
-; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f16_f32_param_2];
-; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f16_f32_param_3];
-; CHECK-NEXT: setp.neu.f32 %p1, %r3, %r5;
-; CHECK-NEXT: setp.neu.f32 %p2, %r4, %r6;
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f16_f32_param_3];
+; CHECK-NEXT: mov.b64 %rd2, {%r3, %r4};
+; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f16_f32_param_2];
+; CHECK-NEXT: mov.b64 %rd1, {%r5, %r6};
; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_f16_f32_param_1];
+; CHECK-NEXT: mov.b32 %r2, {%rs3, %rs4};
+; CHECK-NEXT: setp.neu.f32 %p1, %r5, %r3;
+; CHECK-NEXT: setp.neu.f32 %p2, %r6, %r4;
; CHECK-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2;
; CHECK-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1;
; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5};
@@ -665,7 +697,9 @@ define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_une_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_une_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r4, %r3;
@@ -706,7 +740,9 @@ define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ueq_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ueq_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.equ.f32 %p1, %r4, %r3;
@@ -747,7 +783,9 @@ define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ugt_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ugt_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.gtu.f32 %p1, %r4, %r3;
@@ -788,7 +826,9 @@ define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_uge_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_uge_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.geu.f32 %p1, %r4, %r3;
@@ -829,7 +869,9 @@ define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ult_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ult_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.ltu.f32 %p1, %r4, %r3;
@@ -870,7 +912,9 @@ define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ule_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ule_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.leu.f32 %p1, %r4, %r3;
@@ -912,7 +956,9 @@ define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_uno_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_uno_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r4, %r3;
@@ -953,7 +999,9 @@ define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_one_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_one_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.ne.f32 %p1, %r4, %r3;
@@ -994,7 +1042,9 @@ define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_oeq_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_oeq_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.eq.f32 %p1, %r4, %r3;
@@ -1035,7 +1085,9 @@ define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ogt_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ogt_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r4, %r3;
@@ -1076,7 +1128,9 @@ define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_oge_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_oge_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.ge.f32 %p1, %r4, %r3;
@@ -1117,7 +1171,9 @@ define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_olt_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_olt_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r4, %r3;
@@ -1158,7 +1214,9 @@ define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ole_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ole_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.le.f32 %p1, %r4, %r3;
@@ -1199,7 +1257,9 @@ define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NOF16-EMPTY:
; CHECK-NOF16-NEXT: // %bb.0:
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ord_param_0];
+; CHECK-NOF16-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ord_param_1];
+; CHECK-NOF16-NEXT: mov.b32 %r2, {%rs3, %rs4};
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
; CHECK-NOF16-NEXT: setp.num.f32 %p1, %r4, %r3;
@@ -1223,6 +1283,7 @@ define <2 x i...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/149393
More information about the llvm-commits
mailing list