[llvm] stop emitting direct copy from intermediate result to out reg (PR #135326)

via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 11 01:26:54 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Pankaj Dwivedi (PankajDwivedi-25)

<details>
<summary>Changes</summary>

Isel introduces direct copy from intermediate result to out reg which leads to illegal VReg to SReg copy, Introducing `readfirstlane` can avoid such cases?

---
Full diff: https://github.com/llvm/llvm-project/pull/135326.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+4-1) 
- (added) llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll (+21) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 250963b3019a0..f9384c80fa654 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3246,6 +3246,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   SmallVector<SDValue, 48> RetOps;
   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
 
+  SDValue ReadFirstLane =
+      DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, DL, MVT::i32);
   // Copy the result values into the output registers.
   for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
        ++I, ++RealRVLocIdx) {
@@ -3273,7 +3275,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
     default:
       llvm_unreachable("Unknown loc info!");
     }
-
+    Arg = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Arg.getValueType(),
+                      ReadFirstLane, Arg);
     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Glue);
     Glue = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
diff --git a/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
new file mode 100644
index 0000000000000..34b5a3f358225
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GFX11
+
+define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) {
+  ; GFX11-LABEL: name: s_copysign_f32_bf16
+  ; GFX11: bb.0 (%ir-block.0):
+  ; GFX11-NEXT:   liveins: $sgpr0, $sgpr1
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+  ; GFX11-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+  ; GFX11-NEXT:   [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
+  ; GFX11-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+  ; GFX11-NEXT:   [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], killed [[V_LSHLREV_B32_e64_]], implicit $exec
+  ; GFX11-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[V_BFI_B32_e64_]], implicit $exec
+  ; GFX11-NEXT:   $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+  ; GFX11-NEXT:   SI_RETURN_TO_EPILOG $sgpr0
+  %sign = fpext bfloat %sign.bf16 to float
+  %op = call float @llvm.copysign.f32(float %mag, float %sign)
+  %cast = bitcast float %op to i32
+  ret i32 %cast
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/135326


More information about the llvm-commits mailing list