[llvm] [AMDGPU] Fix moveToValue for copy to phys SGPR (PR #76715)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 2 05:37:07 PST 2024
https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/76715
Fixes #76031
>From 196ddbbe09c2294c8dffd1ab6f82781c5893770f Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 2 Jan 2024 14:36:20 +0100
Subject: [PATCH] [AMDGPU] Fix moveToValue for copy to phys SGPR
Fixes #76031
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 11 +++++
llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll | 25 ++++++-----
.../si-fix-sgpr-copies-copy-to-sgpr.mir | 44 +++++++++++++++++++
3 files changed, 68 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2fb3957a1ca9dc..b08c579895aaae 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7198,6 +7198,17 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
Register DstReg = Inst.getOperand(0).getReg();
const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
+ // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and
+ // hope for the best.
+ if (Inst.isCopy() && DstReg.isPhysical() &&
+ RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
+ BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+ get(AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand(0).getReg())
+ .add(Inst.getOperand(1));
+ Inst.eraseFromParent();
+ return;
+ }
+
if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() &&
NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
// Instead of creating a copy where src and dst are the same register
diff --git a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
index ac196635b363a4..5c1a7093720428 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
@@ -4,19 +4,20 @@
define amdgpu_cs <2 x i32> @f() {
; CHECK-LABEL: f:
; CHECK: ; %bb.0: ; %bb
-; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: s_mov_b32 s1, s0
-; CHECK-NEXT: s_mov_b32 s2, s0
-; CHECK-NEXT: s_mov_b32 s3, s0
-; CHECK-NEXT: s_mov_b32 s4, s0
-; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; CHECK-NEXT: s_mov_b32 s5, s0
-; CHECK-NEXT: v_mov_b32_e32 v2, s0
+; CHECK-NEXT: s_mov_b32 s4, 0
+; CHECK-NEXT: s_mov_b32 s5, s4
+; CHECK-NEXT: s_mov_b32 s6, s4
+; CHECK-NEXT: s_mov_b32 s7, s4
+; CHECK-NEXT: s_mov_b32 s0, s4
+; CHECK-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
+; CHECK-NEXT: s_mov_b32 s1, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1]
-; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; CHECK-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], 0
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v1, s4
+; CHECK-NEXT: s_mov_b32 s1, 0
+; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; CHECK-NEXT: v_readfirstlane_b32 s0, v0
+; CHECK-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; CHECK-NEXT: ; return to shader part epilog
bb:
%i = call <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir
new file mode 100644
index 00000000000000..4292e76f370962
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -mcpu=tonga -run-pass=si-fix-sgpr-copies --verify-machineinstrs -o - %s | FileCheck %s
+
+# Copy to $sgpr0 is disconnected and becomes an IMPLICIT_DEF
+# Inserted V_AND_B32 defines virtual register after use.
+
+---
+name: si_fix_sgpr_copies_breaks_function
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; CHECK-LABEL: name: si_fix_sgpr_copies_breaks_function
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_LSHR_B32_]]
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]]
+ ; CHECK-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed [[COPY1]], [[COPY2]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[S_MOV_B32_2]], [[V_XOR_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[V_AND_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0
+ %0:sgpr_32 = COPY $sgpr0
+ %2:sreg_32 = S_MOV_B32 16
+ %3:sreg_32 = S_LSHR_B32 %0, killed %2, implicit-def dead $scc
+ %4:sreg_32 = COPY killed %3
+ %5:sreg_32 = S_MOV_B32 -32768
+ %7:vgpr_32 = COPY killed %5
+ %6:vgpr_32 = V_XOR_B32_e64 killed %4, %7, implicit $exec
+ %8:sreg_32 = S_MOV_B32 65535
+ %10:sreg_32 = COPY %6
+ %9:sreg_32 = S_AND_B32 killed %8, killed %10, implicit-def dead $scc
+ $sgpr0 = COPY %9
+ SI_RETURN_TO_EPILOG $sgpr0
+
+...
More information about the llvm-commits
mailing list