[llvm] [LiveVariables] Mark use as implicit-def if defined at instr (PR #119446)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 11 02:52:39 PST 2024


https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/119446

>From 6b852f9d05a1997167ed3b337559b3d43479b147 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Tue, 10 Dec 2024 11:53:21 -0800
Subject: [PATCH 1/3] [LiveVariables] Mark use as implicit-def if def is a
 subregister

LiveVariables will mark instructions with their implicit subregister
uses. However, it will miss marking the subregister as an implicit-def
if its own definition is a subregister of it, i.e.
`$r3 = OP val, implicit-def $r0_r1_r2_r3, ..., implicit $r2_r3`,
which defines $sr3 on the same line it is used.

This change ensures such uses are marked as implicit-def, i.e.
`$r3 = OP val, implicit-def $r0_r1_r2_r3, ..., implicit-def $r2_r3`.
---
 llvm/lib/CodeGen/LiveVariables.cpp            | 17 +++++--
 .../CodeGen/AMDGPU/implicitdef-subreg.mir     | 46 +++++++++++++++++++
 2 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir

diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index f17d60dc22dda9..ec6c360561bd9c 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -277,11 +277,22 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
           continue;
         if (PartDefRegs.count(SubReg))
           continue;
+
+        // Check if SubReg is defined at LastPartialDef.
+        bool IsDefinedHere = false;
+        for (int I = 0; I < LastPartialDef->getNumOperands(); ++I) {
+          const auto MO = LastPartialDef->getOperand(I);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          if (TRI->isSubRegister(SubReg, MO.getReg())) {
+            IsDefinedHere = true;
+            break;
+          }
+        }
         // This part of Reg was defined before the last partial def. It's killed
         // here.
-        LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
-                                                             false/*IsDef*/,
-                                                             true/*IsImp*/));
+        LastPartialDef->addOperand(
+            MachineOperand::CreateReg(SubReg, IsDefinedHere, true /*IsImp*/));
         PhysRegDef[SubReg] = LastPartialDef;
         for (MCPhysReg SS : TRI->subregs(SubReg))
           Processed.insert(SS);
diff --git a/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir b/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
new file mode 100644
index 00000000000000..4f5bc49dabfdae
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn --run-pass=livevars -o - %s | FileCheck %s
+---
+name:            sgpr_copy
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: sgpr_copy
+    ; CHECK: %sval:sreg_32 = S_MOV_B32 0
+    ; CHECK-NEXT: $sgpr0 = COPY %sval
+    ; CHECK-NEXT: $sgpr1 = COPY %sval
+    ; CHECK-NEXT: $sgpr2 = COPY %sval
+    ; CHECK-NEXT: $sgpr3 = COPY killed %sval, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3
+    ; CHECK-NEXT: dead $sgpr30_sgpr31 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
+
+    %sval:sreg_32 = S_MOV_B32 0
+
+    $sgpr0 = COPY %sval
+    $sgpr1 = COPY %sval
+    $sgpr2 = COPY %sval
+    $sgpr3 = COPY %sval
+    $sgpr30_sgpr31 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+
+...
+---
+name:            vgpr_copy
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: vgpr_copy
+    ; CHECK: %vval:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY %vval
+    ; CHECK-NEXT: $vgpr1 = COPY %vval
+    ; CHECK-NEXT: $vgpr2 = COPY %vval
+    ; CHECK-NEXT: $vgpr3 = COPY killed %vval, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr0_vgpr1, implicit $vgpr0_vgpr1_vgpr2, implicit-def $vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3
+
+    %vval:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    $vgpr0 = COPY %vval
+    $vgpr1 = COPY %vval
+    $vgpr2 = COPY %vval
+    $vgpr3 = COPY %vval
+    %0:vgpr_32 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+
+...

>From 0208eb91b91c5b211d661853cd3a12b6b88e0701 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Wed, 11 Dec 2024 01:24:12 -0800
Subject: [PATCH 2/3] Rewrite loop to be modifiesRegister

This also causes superregisters to be marked as implicit-def.
---
 llvm/lib/CodeGen/LiveVariables.cpp              | 11 +----------
 llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir  |  2 +-
 llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir |  8 ++++----
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index ec6c360561bd9c..39d26580308983 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -279,16 +279,7 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
           continue;
 
         // Check if SubReg is defined at LastPartialDef.
-        bool IsDefinedHere = false;
-        for (int I = 0; I < LastPartialDef->getNumOperands(); ++I) {
-          const auto MO = LastPartialDef->getOperand(I);
-          if (!MO.isReg() || !MO.isDef())
-            continue;
-          if (TRI->isSubRegister(SubReg, MO.getReg())) {
-            IsDefinedHere = true;
-            break;
-          }
-        }
+        bool IsDefinedHere = LastPartialDef->modifiesRegister(SubReg, TRI);
         // This part of Reg was defined before the last partial def. It's killed
         // here.
         LastPartialDef->addOperand(
diff --git a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
index a10d7588cb4429..c2fba541bdd1e4 100644
--- a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
+++ b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
@@ -756,7 +756,7 @@ body:             |
     ; CHECK: liveins: $x0, $x1, $x2
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1, implicit $w1_hi :: (load (s32))
-    ; CHECK-NEXT: renamable $w2 = LDRWui renamable $x1, 1, implicit-def $x2, implicit $w2_hi :: (load (s32))
+    ; CHECK-NEXT: renamable $w2 = LDRWui renamable $x1, 1, implicit-def $x2, implicit-def $w2_hi :: (load (s32))
     ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64))
     ; CHECK-NEXT: RET undef $lr
     early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32))
diff --git a/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir b/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
index 4f5bc49dabfdae..dd6352586f2e57 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/implicitdef-subreg.mir
@@ -5,14 +5,14 @@ name:            sgpr_copy
 tracksRegLiveness: true
 body:             |
   bb.0:
+
     ; CHECK-LABEL: name: sgpr_copy
     ; CHECK: %sval:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: $sgpr0 = COPY %sval
     ; CHECK-NEXT: $sgpr1 = COPY %sval
     ; CHECK-NEXT: $sgpr2 = COPY %sval
-    ; CHECK-NEXT: $sgpr3 = COPY killed %sval, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3
+    ; CHECK-NEXT: $sgpr3 = COPY killed %sval, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr0, implicit-def $sgpr1, implicit-def $sgpr2, implicit-def $sgpr0_sgpr1, implicit-def $sgpr0_sgpr1_sgpr2, implicit-def $sgpr2_sgpr3
     ; CHECK-NEXT: dead $sgpr30_sgpr31 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
-
     %sval:sreg_32 = S_MOV_B32 0
 
     $sgpr0 = COPY %sval
@@ -27,14 +27,14 @@ name:            vgpr_copy
 tracksRegLiveness: true
 body:             |
   bb.0:
+
     ; CHECK-LABEL: name: vgpr_copy
     ; CHECK: %vval:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; CHECK-NEXT: $vgpr0 = COPY %vval
     ; CHECK-NEXT: $vgpr1 = COPY %vval
     ; CHECK-NEXT: $vgpr2 = COPY %vval
-    ; CHECK-NEXT: $vgpr3 = COPY killed %vval, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr0_vgpr1, implicit $vgpr0_vgpr1_vgpr2, implicit-def $vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: $vgpr3 = COPY killed %vval, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr0_vgpr1, implicit-def $vgpr0_vgpr1_vgpr2, implicit-def $vgpr1_vgpr2_vgpr3
     ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3
-
     %vval:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 
     $vgpr0 = COPY %vval

>From e0cd95a70d8d348e2364fee4479f953dcb21f775 Mon Sep 17 00:00:00 2001
From: jofernau <Joe.Fernau at amd.com>
Date: Wed, 11 Dec 2024 02:52:19 -0800
Subject: [PATCH 3/3] Add ll test

---
 .../test/CodeGen/AMDGPU/fncall-implicitdef.ll | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/fncall-implicitdef.ll

diff --git a/llvm/test/CodeGen/AMDGPU/fncall-implicitdef.ll b/llvm/test/CodeGen/AMDGPU/fncall-implicitdef.ll
new file mode 100644
index 00000000000000..9c053219c0b316
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fncall-implicitdef.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O1 %s -o - | FileCheck %s
+
+define amdgpu_ps <4 x float> @caller(ptr %1) {
+; CHECK-LABEL: caller:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    flat_load_dword v1, v[0:1]
+; CHECK-NEXT:    s_getpc_b64 s[0:1]
+; CHECK-NEXT:    s_add_u32 s0, s0, fn at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s1, s1, fn at gotpcrel32@hi+12
+; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    s_mov_b32 s2, 0
+; CHECK-NEXT:    s_mov_b64 s[8:9], 36
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    s_mov_b32 s3, 0
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    s_mov_b32 s32, 0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; CHECK-NEXT:    ; return to shader part epilog
+  %L = load i32, ptr %1, align 4
+  %R = call <4 x float> @fn(<4 x i32> zeroinitializer, i32 0, i32 %L, i32 0)
+  ret <4 x float> %R
+}
+
+declare <4 x float> @fn(<4 x i32> inreg, i32, i32, i32)



More information about the llvm-commits mailing list