[llvm] [AMDGPU][True16][CodeGen] Update and/or/xor codegen pattern for i16 (PR #121835)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 6 13:00:07 PST 2025
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/121835
In true16 flow, remove and/or/xor patterns which match i16 to 32bit instructions
>From 452e86b066b1e2b233be418bdaa57f920d1430d1 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 6 Jan 2025 14:05:03 -0500
Subject: [PATCH] Update and/or/xor codegen pattern for i16
---
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 24 ++++++++++++++++++----
llvm/test/CodeGen/AMDGPU/uaddsat.ll | 8 +++-----
llvm/test/CodeGen/AMDGPU/usubsat.ll | 5 ++---
3 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index ca4a0fa706c301..6bbf19179b7f6c 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1261,23 +1261,39 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
$src)
>;
-foreach vt = [i16, v2i16] in {
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let True16Predicate = p in {
def : GCNPat <
- (and vt:$src0, vt:$src1),
+ (and i16:$src0, i16:$src1),
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
def : GCNPat <
- (or vt:$src0, vt:$src1),
+ (or i16:$src0, i16:$src1),
(V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
def : GCNPat <
- (xor vt:$src0, vt:$src1),
+ (xor i16:$src0, i16:$src1),
(V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
}
+def : GCNPat <
+ (and v2i16:$src0, v2i16:$src1),
+ (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
+def : GCNPat <
+ (or v2i16:$src0, v2i16:$src1),
+ (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
+def : GCNPat <
+ (xor v2i16:$src0, v2i16:$src1),
+ (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
let Predicates = [Has16BitInsts, isGFX8GFX9] in {
// Undo sub x, c -> add x, -c canonicalization since c is more likely
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index 2775de29368fbb..572793e1c5d711 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -42,12 +42,10 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
; GFX11-TRUE16-LABEL: v_uaddsat_i8:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_min_u16 v0.l, 0xff, v0.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 775602ab80cde0..75866e33da23a8 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -39,9 +39,8 @@ define i8 @v_usubsat_i8(i8 %lhs, i8 %rhs) {
; GFX11-TRUE16-LABEL: v_usubsat_i8:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, v0.h clamp
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
More information about the llvm-commits
mailing list