[llvm] [AMDGPU][True16] fix a bug in codeGen causing e64 with wrong vgpr type to shrink (PR #102942)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 12:47:30 PDT 2024
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/102942
>From b1ee138e7511dac2a9352f5441fef71ae9260d9a Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Tue, 6 Aug 2024 14:43:46 -0400
Subject: [PATCH 1/4] [AMDGPU][CodeGen] support v_mov_b16 and v_swap_b16 in
true16 format
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 14 --------------
1 file changed, 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index c41850ab55f75c..2fcdcbd6b5ba1b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2192,20 +2192,6 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
}
}
-let True16Predicate = UseRealTrue16Insts in {
- def : GCNPat <
- (VGPRImm<(i16 imm)>:$imm),
- (V_MOV_B16_t16_e64 0, imm:$imm, 0)
- >;
-
- foreach vt = [f16, bf16] in {
- def : GCNPat <
- (VGPRImm<(vt fpimm)>:$imm),
- (V_MOV_B16_t16_e64 0, $imm, 0)
- >;
- }
-}
-
// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
// immediate and wil be expanded as needed, but we will only use these patterns
// for values which can be encoded.
>From b01863a05ea636a553fda11fb107b03e6d206151 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Thu, 8 Aug 2024 11:36:10 -0400
Subject: [PATCH 2/4] added back the missing imm pattern for mov_b16
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 2fcdcbd6b5ba1b..c41850ab55f75c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2192,6 +2192,20 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
}
}
+let True16Predicate = UseRealTrue16Insts in {
+ def : GCNPat <
+ (VGPRImm<(i16 imm)>:$imm),
+ (V_MOV_B16_t16_e64 0, imm:$imm, 0)
+ >;
+
+ foreach vt = [f16, bf16] in {
+ def : GCNPat <
+ (VGPRImm<(vt fpimm)>:$imm),
+ (V_MOV_B16_t16_e64 0, $imm, 0)
+ >;
+ }
+}
+
// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
// immediate and wil be expanded as needed, but we will only use these patterns
// for values which can be encoded.
>From acfb65ae6dee17685b87c4b70582bcd2ace8da85 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 12 Aug 2024 13:42:25 -0400
Subject: [PATCH 3/4] [AMDGPU][True16] fix a bug in codeGen causing e64 with
wrong vgpr type to shrink
---
llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 155747551471e3..5d38cafd73dd95 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -1048,7 +1048,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineFunctionProperties::Property::NoVRegs))
continue;
- if (ST->useRealTrue16Insts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
+ if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
!shouldShrinkTrue16(MI))
continue;
>From 6fb5015b4b4128cc135b6f998fd4384b376015fe Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 12 Aug 2024 15:42:48 -0400
Subject: [PATCH 4/4] added a mir test for shrinking Lo128 register type
---
llvm/test/CodeGen/AMDGPU/shrink-true16.mir | 28 ++++++++++++++++++++++
1 file changed, 28 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-true16.mir
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
new file mode 100644
index 00000000000000..4b2e3951ce4c13
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir
@@ -0,0 +1,28 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-shrink-instructions -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1100 %s
+
+---
+name: 16bit_lo128_shrink
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr127
+ ; GFX1100-LABEL: name: 16bit_lo128_shrink
+ ; GFX1100: liveins: $vgpr127
+ ; GFX1100-NEXT: {{ $}}
+ ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc, implicit $exec, implicit $exec
+ $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec
+...
+
+---
+name: 16bit_lo128_no_shrink
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr128
+ ; GFX1100-LABEL: name: 16bit_lo128_no_shrink
+ ; GFX1100: liveins: $vgpr128
+ ; GFX1100-NEXT: {{ $}}
+ ; GFX1100-NEXT: $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc_lo, implicit $exec
+ $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr128, implicit-def $vcc, implicit $exec
+...
More information about the llvm-commits
mailing list