[llvm] [AMDGPU][True16] fix a bug in codeGen causing e64 with wrong vgpr type to shrink (PR #102942)

Mon Aug 12 12:47:30 PDT 2024

https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/102942

>From b1ee138e7511dac2a9352f5441fef71ae9260d9a Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Tue, 6 Aug 2024 14:43:46 -0400
Subject: [PATCH 1/4] [AMDGPU][CodeGen] support v_mov_b16 and v_swap_b16 in
 true16 format

---
 llvm/lib/Target/AMDGPU/SIInstructions.td | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index c41850ab55f75c..2fcdcbd6b5ba1b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2192,20 +2192,6 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
   }
 }
 
-let True16Predicate = UseRealTrue16Insts in {
-  def : GCNPat <
-    (VGPRImm<(i16 imm)>:$imm),
-    (V_MOV_B16_t16_e64 0, imm:$imm, 0)
-  >;
-
-  foreach vt = [f16, bf16] in {
-    def : GCNPat <
-      (VGPRImm<(vt fpimm)>:$imm),
-      (V_MOV_B16_t16_e64 0, $imm, 0)
-    >;
-  }
-}
-
 // V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
 // immediate and wil be expanded as needed, but we will only use these patterns
 // for values which can be encoded.

>From b01863a05ea636a553fda11fb107b03e6d206151 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 8 Aug 2024 11:36:10 -0400
Subject: [PATCH 2/4] added back the missing imm pattern for mov_b16

---
 llvm/lib/Target/AMDGPU/SIInstructions.td | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2fcdcbd6b5ba1b..c41850ab55f75c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2192,6 +2192,20 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
   }
 }
 
+let True16Predicate = UseRealTrue16Insts in {
+  def : GCNPat <
+    (VGPRImm<(i16 imm)>:$imm),
+    (V_MOV_B16_t16_e64 0, imm:$imm, 0)
+  >;
+
+  foreach vt = [f16, bf16] in {
+    def : GCNPat <
+      (VGPRImm<(vt fpimm)>:$imm),
+      (V_MOV_B16_t16_e64 0, $imm, 0)
+    >;
+  }
+}
+
 // V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
 // immediate and wil be expanded as needed, but we will only use these patterns
 // for values which can be encoded.

>From acfb65ae6dee17685b87c4b70582bcd2ace8da85 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 12 Aug 2024 13:42:25 -0400
Subject: [PATCH 3/4] [AMDGPU][True16] fix a bug in codeGen causing e64 with
 wrong vgpr type     to shrink

---
 llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 155747551471e3..5d38cafd73dd95 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -1048,7 +1048,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
               MachineFunctionProperties::Property::NoVRegs))
         continue;
 
-      if (ST->useRealTrue16Insts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
+      if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
           !shouldShrinkTrue16(MI))
         continue;
 

>From 6fb5015b4b4128cc135b6f998fd4384b376015fe Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 12 Aug 2024 15:42:48 -0400
Subject: [PATCH 4/4] added a mir test for shrinking Lo128 register type

---
 llvm/test/CodeGen/AMDGPU/shrink-true16.mir | 28 ++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-true16.mir

diff --git a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
new file mode 100644
index 00000000000000..4b2e3951ce4c13
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
@@ -0,0 +1,28 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-shrink-instructions -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1100 %s
+
+---
+name: 16bit_lo128_shrink
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr127
+    ; GFX1100-LABEL: name: 16bit_lo128_shrink
+    ; GFX1100: liveins: $vgpr127
+    ; GFX1100-NEXT: {{  $}}
+    ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc, implicit $exec, implicit $exec
+    $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec
+...
+
+---
+name: 16bit_lo128_no_shrink
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr128
+    ; GFX1100-LABEL: name: 16bit_lo128_no_shrink
+    ; GFX1100: liveins: $vgpr128
+    ; GFX1100-NEXT: {{  $}}
+    ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec
+    $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec
+...