[llvm] [AMDGPU][True16][CodeGen] Update and/or/xor codegen pattern for i16 (PR #121835)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 7 09:43:34 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
In true16 flow, remove and/or/xor patterns which match i16 to 32bit instructions
---
Full diff: https://github.com/llvm/llvm-project/pull/121835.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+20-4)
- (modified) llvm/test/CodeGen/AMDGPU/uaddsat.ll (+3-5)
- (modified) llvm/test/CodeGen/AMDGPU/usubsat.ll (+2-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index ca4a0fa706c301..6bbf19179b7f6c 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1261,23 +1261,39 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
$src)
>;
-foreach vt = [i16, v2i16] in {
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let True16Predicate = p in {
def : GCNPat <
- (and vt:$src0, vt:$src1),
+ (and i16:$src0, i16:$src1),
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
def : GCNPat <
- (or vt:$src0, vt:$src1),
+ (or i16:$src0, i16:$src1),
(V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
def : GCNPat <
- (xor vt:$src0, vt:$src1),
+ (xor i16:$src0, i16:$src1),
(V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
>;
}
+def : GCNPat <
+ (and v2i16:$src0, v2i16:$src1),
+ (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
+def : GCNPat <
+ (or v2i16:$src0, v2i16:$src1),
+ (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
+def : GCNPat <
+ (xor v2i16:$src0, v2i16:$src1),
+ (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
+>;
+
let Predicates = [Has16BitInsts, isGFX8GFX9] in {
// Undo sub x, c -> add x, -c canonicalization since c is more likely
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index 2775de29368fbb..572793e1c5d711 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -42,12 +42,10 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
; GFX11-TRUE16-LABEL: v_uaddsat_i8:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.h
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_min_u16 v0.l, 0xff, v0.l
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 775602ab80cde0..75866e33da23a8 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -39,9 +39,8 @@ define i8 @v_usubsat_i8(i8 %lhs, i8 %rhs) {
; GFX11-TRUE16-LABEL: v_usubsat_i8:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
+; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, v0.h clamp
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
``````````
</details>
https://github.com/llvm/llvm-project/pull/121835
More information about the llvm-commits
mailing list