[llvm] [LiveVariables] Mark use as implicit-def if defined at instr (PR #119446)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 02:52:39 PST 2024
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/119446
>From 6b852f9d05a1997167ed3b337559b3d43479b147 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Tue, 10 Dec 2024 11:53:21 -0800
Subject: [PATCH 1/3] [LiveVariables] Mark use as implicit-def if def is a
subregister
LiveVariables will mark instructions with their implicit subregister
uses. However, it will miss marking the subregister as an implicit-def
if its own definition is a subregister of it, i.e.
`$r3 = OP val, implicit-def $r0_r1_r2_r3, ..., implicit $r2_r3`,
which defines $sr3 on the same line it is used.
This change ensures such uses are marked as implicit-def, i.e.
`$r3 = OP val, implicit-def $r0_r1_r2_r3, ..., implicit-def $r2_r3`.
---
llvm/lib/CodeGen/LiveVariables.cpp | 17 +++++--
.../CodeGen/AMDGPU/implicitdef-subreg.mir | 46 +++++++++++++++++++
2 files changed, 60 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index f17d60dc22dda9..ec6c360561bd9c 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -277,11 +277,22 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
continue;
if (PartDefRegs.count(SubReg))
continue;
+
+ // Check if SubReg is defined at LastPartialDef.
+ bool IsDefinedHere = false;
+ for (int I = 0; I < LastPartialDef->getNumOperands(); ++I) {
+ const auto MO = LastPartialDef->getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ if (TRI->isSubRegister(SubReg, MO.getReg())) {
+ IsDefinedHere = true;
+ break;
+ }
+ }
// This part of Reg was defined before the last partial def. It's killed
// here.
- LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
- false/*IsDef*/,
- true/*IsImp*/));
+ LastPartialDef->addOperand(
+ MachineOperand::CreateReg(SubReg, IsDefinedHere, true /*IsImp*/));
PhysRegDef[SubReg] = LastPartialDef;
for (MCPhysReg SS : TRI->subregs(SubReg))
Processed.insert(SS);
diff --git a/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir b/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
new file mode 100644
index 00000000000000..4f5bc49dabfdae
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn --run-pass=livevars -o - %s | FileCheck %s
+---
+name: sgpr_copy
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: sgpr_copy
+ ; CHECK: %sval:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: $sgpr0 = COPY %sval
+ ; CHECK-NEXT: $sgpr1 = COPY %sval
+ ; CHECK-NEXT: $sgpr2 = COPY %sval
+ ; CHECK-NEXT: $sgpr3 = COPY killed %sval, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+
+ %sval:sreg_32 = S_MOV_B32 0
+
+ $sgpr0 = COPY %sval
+ $sgpr1 = COPY %sval
+ $sgpr2 = COPY %sval
+ $sgpr3 = COPY %sval
+ $sgpr30_sgpr31 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+
+...
+---
+name: vgpr_copy
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vgpr_copy
+ ; CHECK: %vval:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY %vval
+ ; CHECK-NEXT: $vgpr1 = COPY %vval
+ ; CHECK-NEXT: $vgpr2 = COPY %vval
+ ; CHECK-NEXT: $vgpr3 = COPY killed %vval, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr0_vgpr1, implicit $vgpr0_vgpr1_vgpr2, implicit-def $vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3
+
+ %vval:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+ $vgpr0 = COPY %vval
+ $vgpr1 = COPY %vval
+ $vgpr2 = COPY %vval
+ $vgpr3 = COPY %vval
+ %0:vgpr_32 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+
+...
>From 0208eb91b91c5b211d661853cd3a12b6b88e0701 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Wed, 11 Dec 2024 01:24:12 -0800
Subject: [PATCH 2/3] Rewrite loop to be modifiesRegister
This also causes superregisters to be marked as implicit-def.
---
llvm/lib/CodeGen/LiveVariables.cpp | 11 +----------
llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir | 2 +-
llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir | 8 ++++----
3 files changed, 6 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index ec6c360561bd9c..39d26580308983 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -279,16 +279,7 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
continue;
// Check if SubReg is defined at LastPartialDef.
- bool IsDefinedHere = false;
- for (int I = 0; I < LastPartialDef->getNumOperands(); ++I) {
- const auto MO = LastPartialDef->getOperand(I);
- if (!MO.isReg() || !MO.isDef())
- continue;
- if (TRI->isSubRegister(SubReg, MO.getReg())) {
- IsDefinedHere = true;
- break;
- }
- }
+ bool IsDefinedHere = LastPartialDef->modifiesRegister(SubReg, TRI);
// This part of Reg was defined before the last partial def. It's killed
// here.
LastPartialDef->addOperand(
diff --git a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
index a10d7588cb4429..c2fba541bdd1e4 100644
--- a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
+++ b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
@@ -756,7 +756,7 @@ body: |
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1, implicit $w1_hi :: (load (s32))
- ; CHECK-NEXT: renamable $w2 = LDRWui renamable $x1, 1, implicit-def $x2, implicit $w2_hi :: (load (s32))
+ ; CHECK-NEXT: renamable $w2 = LDRWui renamable $x1, 1, implicit-def $x2, implicit-def $w2_hi :: (load (s32))
; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64))
; CHECK-NEXT: RET undef $lr
early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32))
diff --git a/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir b/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
index 4f5bc49dabfdae..dd6352586f2e57 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
@@ -5,14 +5,14 @@ name: sgpr_copy
tracksRegLiveness: true
body: |
bb.0:
+
; CHECK-LABEL: name: sgpr_copy
; CHECK: %sval:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: $sgpr0 = COPY %sval
; CHECK-NEXT: $sgpr1 = COPY %sval
; CHECK-NEXT: $sgpr2 = COPY %sval
- ; CHECK-NEXT: $sgpr3 = COPY killed %sval, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr3 = COPY killed %sval, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr1, implicit-def $sgpr2, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3
; CHECK-NEXT: dead $sgpr30_sgpr31 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
-
%sval:sreg_32 = S_MOV_B32 0
$sgpr0 = COPY %sval
@@ -27,14 +27,14 @@ name: vgpr_copy
tracksRegLiveness: true
body: |
bb.0:
+
; CHECK-LABEL: name: vgpr_copy
; CHECK: %vval:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY %vval
; CHECK-NEXT: $vgpr1 = COPY %vval
; CHECK-NEXT: $vgpr2 = COPY %vval
- ; CHECK-NEXT: $vgpr3 = COPY killed %vval, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr0_vgpr1, implicit $vgpr0_vgpr1_vgpr2, implicit-def $vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: $vgpr3 = COPY killed %vval, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr0_vgpr1, implicit-def $vgpr0_vgpr1_vgpr2, implicit-def $vgpr1_vgpr2_vgpr3
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3
-
%vval:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$vgpr0 = COPY %vval
>From e0cd95a70d8d348e2364fee4479f953dcb21f775 Mon Sep 17 00:00:00 2001
From: jofernau <Joe.Fernau at amd.com>
Date: Wed, 11 Dec 2024 02:52:19 -0800
Subject: [PATCH 3/3] Add ll test
---
.../test/CodeGen/AMDGPU/fncall-implicitdef.ll | 28 +++++++++++++++++++
1 file changed, 28 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/fncall-implicitdef.ll
diff --git a/llvm/test/CodeGen/AMDGPU/fncall-implicitdef.ll b/llvm/test/CodeGen/AMDGPU/fncall-implicitdef.ll
new file mode 100644
index 00000000000000..9c053219c0b316
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fncall-implicitdef.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O1 %s -o - | FileCheck %s
+
+define amdgpu_ps <4 x float> @caller(ptr %1) {
+; CHECK-LABEL: caller:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: flat_load_dword v1, v[0:1]
+; CHECK-NEXT: s_getpc_b64 s[0:1]
+; CHECK-NEXT: s_add_u32 s0, s0, fn at gotpcrel32@lo+4
+; CHECK-NEXT: s_addc_u32 s1, s1, fn at gotpcrel32@hi+12
+; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
+; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: s_mov_b32 s1, 0
+; CHECK-NEXT: s_mov_b32 s2, 0
+; CHECK-NEXT: s_mov_b64 s[8:9], 36
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_mov_b32 s3, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: s_mov_b32 s32, 0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; CHECK-NEXT: ; return to shader part epilog
+ %L = load i32, ptr %1, align 4
+ %R = call <4 x float> @fn(<4 x i32> zeroinitializer, i32 0, i32 %L, i32 0)
+ ret <4 x float> %R
+}
+
+declare <4 x float> @fn(<4 x i32> inreg, i32, i32, i32)
More information about the llvm-commits
mailing list