[llvm] [AMDGPU] Combine build_vector patterns for i16 and f16. NFCI. (PR #91806)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri May 10 14:41:23 PDT 2024
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/91806
>From 8bb76838520d4da2d178de489e78f17c8bd30b02 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 10 May 2024 13:43:03 -0700
Subject: [PATCH 1/2] [AMDGPU] Combine build_vector patterns for i16 and f16.
NFCI.
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 48 ++++++++----------------
1 file changed, 16 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index cca8d96f29c0f..97ff61bde45ec 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3162,49 +3162,34 @@ def : GCNPat <
(v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
>;
-def : GCNPat <
- (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src0), (i16 undef))),
- (COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
->;
-
-def : GCNPat <
- (v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 undef))),
- (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
->;
+foreach Ty = [i16, f16] in {
-def : GCNPat <
- (v2f16 (build_vector f16:$src0, (f16 undef))),
- (COPY $src0)
->;
+defvar vecTy = !cond(!eq(Ty, i16) : v2i16,
+ !eq(Ty, f16) : v2f16);
def : GCNPat <
- (v2i16 (UniformBinFrag<build_vector> (i16 undef), (i16 SReg_32:$src1))),
- (S_LSHL_B32 SReg_32:$src1, (i32 16))
+ (vecTy (UniformBinFrag<build_vector> (Ty SReg_32:$src0), (Ty undef))),
+ (COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
>;
def : GCNPat <
- (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 VGPR_32:$src1))),
- (v2i16 (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
+ (vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))),
+ (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
>;
-
def : GCNPat <
- (v2f16 (UniformBinFrag<build_vector> (f16 undef), (f16 SReg_32:$src1))),
+ (vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))),
(S_LSHL_B32 SReg_32:$src1, (i32 16))
>;
def : GCNPat <
- (v2f16 (DivergentBinFrag<build_vector> (f16 undef), (f16 VGPR_32:$src1))),
- (v2f16 (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
+ (vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
+ (vecTy (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
>;
+} // End foreach Ty = ...
}
let SubtargetPredicate = HasVOP3PInsts in {
-def : GCNPat <
- (v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
- (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
->;
-
def : GCNPat <
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))),
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
@@ -3223,18 +3208,17 @@ def : GCNPat <
(S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1)
>;
-def : GCNPat <
- (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
- (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
->;
-
-
foreach Ty = [i16, f16] in {
defvar vecTy = !if(!eq(Ty, i16), v2i16, v2f16);
defvar immzeroTy = !if(!eq(Ty, i16), immzero, fpimmzero);
+def : GCNPat <
+ (vecTy (UniformBinFrag<build_vector> (Ty SReg_32:$src0), (Ty SReg_32:$src1))),
+ (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
+>;
+
// Take the lower 16 bits from each VGPR_32 and concat them
def : GCNPat <
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))),
>From f5e8e2e5e9dc8cc5f0cdd2d1c59460fb7cc51a6b Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 10 May 2024 14:37:29 -0700
Subject: [PATCH 2/2] Use vecTy.ElementType instead of switch
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 97ff61bde45ec..8be3818d7d249 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3162,10 +3162,9 @@ def : GCNPat <
(v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
>;
-foreach Ty = [i16, f16] in {
+foreach vecTy = [v2i16, v2f16] in {
-defvar vecTy = !cond(!eq(Ty, i16) : v2i16,
- !eq(Ty, f16) : v2f16);
+defvar Ty = vecTy.ElementType;
def : GCNPat <
(vecTy (UniformBinFrag<build_vector> (Ty SReg_32:$src0), (Ty undef))),
@@ -3209,9 +3208,9 @@ def : GCNPat <
>;
-foreach Ty = [i16, f16] in {
+foreach vecTy = [v2i16, v2f16] in {
-defvar vecTy = !if(!eq(Ty, i16), v2i16, v2f16);
+defvar Ty = vecTy.ElementType;
defvar immzeroTy = !if(!eq(Ty, i16), immzero, fpimmzero);
def : GCNPat <
More information about the llvm-commits
mailing list