[llvm] [AMDGPU][True16] fix a bug in codeGen causing e64 with wrong vgpr type to shrink (PR #102942)

Mon Aug 12 10:43:29 PDT 2024

https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/102942

This bug is introduced in https://github.com/llvm/llvm-project/pull/102198

>From b1ee138e7511dac2a9352f5441fef71ae9260d9a Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Tue, 6 Aug 2024 14:43:46 -0400
Subject: [PATCH 1/3] [AMDGPU][CodeGen] support v_mov_b16 and v_swap_b16 in
 true16 format

---
 llvm/lib/Target/AMDGPU/SIInstructions.td | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index c41850ab55f75c..2fcdcbd6b5ba1b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2192,20 +2192,6 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
   }
 }
 
-let True16Predicate = UseRealTrue16Insts in {
-  def : GCNPat <
-    (VGPRImm<(i16 imm)>:$imm),
-    (V_MOV_B16_t16_e64 0, imm:$imm, 0)
-  >;
-
-  foreach vt = [f16, bf16] in {
-    def : GCNPat <
-      (VGPRImm<(vt fpimm)>:$imm),
-      (V_MOV_B16_t16_e64 0, $imm, 0)
-    >;
-  }
-}
-
 // V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
 // immediate and wil be expanded as needed, but we will only use these patterns
 // for values which can be encoded.

>From b01863a05ea636a553fda11fb107b03e6d206151 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 8 Aug 2024 11:36:10 -0400
Subject: [PATCH 2/3] added back the missing imm pattern for mov_b16

---
 llvm/lib/Target/AMDGPU/SIInstructions.td | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2fcdcbd6b5ba1b..c41850ab55f75c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2192,6 +2192,20 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
   }
 }
 
+let True16Predicate = UseRealTrue16Insts in {
+  def : GCNPat <
+    (VGPRImm<(i16 imm)>:$imm),
+    (V_MOV_B16_t16_e64 0, imm:$imm, 0)
+  >;
+
+  foreach vt = [f16, bf16] in {
+    def : GCNPat <
+      (VGPRImm<(vt fpimm)>:$imm),
+      (V_MOV_B16_t16_e64 0, $imm, 0)
+    >;
+  }
+}
+
 // V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
 // immediate and wil be expanded as needed, but we will only use these patterns
 // for values which can be encoded.

>From acfb65ae6dee17685b87c4b70582bcd2ace8da85 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 12 Aug 2024 13:42:25 -0400
Subject: [PATCH 3/3] [AMDGPU][True16] fix a bug in codeGen causing e64 with
 wrong vgpr type     to shrink

---
 llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 155747551471e3..5d38cafd73dd95 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -1048,7 +1048,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
               MachineFunctionProperties::Property::NoVRegs))
         continue;
 
-      if (ST->useRealTrue16Insts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
+      if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
           !shouldShrinkTrue16(MI))
         continue;