[llvm] stop emitting direct copy from intermediate result to out reg (PR #135326)
Pankaj Dwivedi via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 11 01:26:20 PDT 2025
https://github.com/PankajDwivedi-25 created https://github.com/llvm/llvm-project/pull/135326
Isel introduces direct copy from intermediate result to out reg which leads to illegal VReg to SReg copy, Introducing `readfirstlane` can avoid such cases?
>From e604cd6d71e8e2996ce704aeaaba2c0915dc7623 Mon Sep 17 00:00:00 2001
From: Pankaj kumar divedi <Pankajkumar.divedi at amd.com>
Date: Fri, 11 Apr 2025 13:48:46 +0530
Subject: [PATCH] while lowering return introduce readfirstlane to copy the
intermediate result to out reg
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 ++++-
llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll | 21 ++++++++++++++++++++
2 files changed, 25 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 250963b3019a0..f9384c80fa654 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3246,6 +3246,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 48> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ SDValue ReadFirstLane =
+ DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, DL, MVT::i32);
// Copy the result values into the output registers.
for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
++I, ++RealRVLocIdx) {
@@ -3273,7 +3275,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
default:
llvm_unreachable("Unknown loc info!");
}
-
+ Arg = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Arg.getValueType(),
+ ReadFirstLane, Arg);
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
diff --git a/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
new file mode 100644
index 0000000000000..34b5a3f358225
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-illegal-copy.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=finalize-isel | FileCheck %s -check-prefixes=GFX11
+
+define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.bf16) {
+ ; GFX11-LABEL: name: s_copysign_f32_bf16
+ ; GFX11: bb.0 (%ir-block.0):
+ ; GFX11-NEXT: liveins: $sgpr0, $sgpr1
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX11-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
+ ; GFX11-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], killed [[V_LSHLREV_B32_e64_]], implicit $exec
+ ; GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[V_BFI_B32_e64_]], implicit $exec
+ ; GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX11-NEXT: SI_RETURN_TO_EPILOG $sgpr0
+ %sign = fpext bfloat %sign.bf16 to float
+ %op = call float @llvm.copysign.f32(float %mag, float %sign)
+ %cast = bitcast float %op to i32
+ ret i32 %cast
+}
More information about the llvm-commits
mailing list