[llvm] dc22595 - [AMDGPU] Add selection pattern for v_xnor_b32
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 26 08:42:02 PST 2021
Author: Jay Foad
Date: 2021-02-26T16:41:47Z
New Revision: dc2259537a30127808efc4ee68ec5b0cae5c983a
URL: https://github.com/llvm/llvm-project/commit/dc2259537a30127808efc4ee68ec5b0cae5c983a
DIFF: https://github.com/llvm/llvm-project/commit/dc2259537a30127808efc4ee68ec5b0cae5c983a.diff
LOG: [AMDGPU] Add selection pattern for v_xnor_b32
This allows GlobalISel to use this instruction where available. I assume
SelectionDAG always selects s_xnor_b32 so it isn't affected by this
change.
Differential Revision: https://reviews.llvm.org/D97560
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index b616228095f7e..ec844fcddcb38 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -636,6 +636,11 @@ def add_ctpop : PatFrag <
(add (ctpop $src0), $src1)
>;
+def xnor : PatFrag <
+ (ops node:$src0, node:$src1),
+ (not (xor $src0, $src1))
+>;
+
foreach I = 1-4 in {
def shl#I#_add : PatFrag <
(ops node:$src0, node:$src1),
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index edcd8595f9e1b..2b8d5d9a52094 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -678,7 +678,7 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
let SubtargetPredicate = HasDLInsts in {
-defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>;
+defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>;
let Constraints = "$vdst = $src2",
DisableEncoding = "$src2",
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index 17d199e319630..b201066cb7a48 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -166,12 +166,32 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b)
}
define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
-; GCN-LABEL: vector_xnor_i32_one_use:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
-; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xnor_i32_one_use:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xnor_i32_one_use:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xnor_i32_one_use:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xnor_i32_one_use:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
+; GFX906-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i32 %a, %b
%r = xor i32 %xor, -1
@@ -194,11 +214,28 @@ entry:
}
define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
-; GCN-LABEL: xnor_s_v_i32_one_use:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
-; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
-; GCN-NEXT: ; return to shader part epilog
+; GFX7-LABEL: xnor_s_v_i32_one_use:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: xnor_s_v_i32_one_use:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX900-LABEL: xnor_s_v_i32_one_use:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX900-NEXT: ; return to shader part epilog
+;
+; GFX906-LABEL: xnor_s_v_i32_one_use:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
+; GFX906-NEXT: ; return to shader part epilog
%xor = xor i32 %s, %v
%d = xor i32 %xor, -1
%cast = bitcast i32 %d to float
@@ -206,11 +243,28 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
}
define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
-; GCN-LABEL: xnor_v_s_i32_one_use:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
-; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
-; GCN-NEXT: ; return to shader part epilog
+; GFX7-LABEL: xnor_v_s_i32_one_use:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: xnor_v_s_i32_one_use:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX900-LABEL: xnor_v_s_i32_one_use:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX900-NEXT: ; return to shader part epilog
+;
+; GFX906-LABEL: xnor_v_s_i32_one_use:
+; GFX906: ; %bb.0:
+; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
+; GFX906-NEXT: ; return to shader part epilog
%xor = xor i32 %v, %s
%d = xor i32 %xor, -1
%cast = bitcast i32 %d to float
@@ -305,12 +359,32 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
}
define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
-; GCN-LABEL: vector_xor_na_b_i32_one_use:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
-; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xor_na_b_i32_one_use:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xor_na_b_i32_one_use:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xor_na_b_i32_one_use:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xor_na_b_i32_one_use:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
+; GFX906-NEXT: s_setpc_b64 s[30:31]
entry:
%na = xor i32 %a, -1
%r = xor i32 %na, %b
@@ -318,12 +392,32 @@ entry:
}
define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
-; GCN-LABEL: vector_xor_a_nb_i32_one_use:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v1, -1, v1
-; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
-; GCN-NEXT: s_setpc_b64 s[30:31]
+; GFX7-LABEL: vector_xor_a_nb_i32_one_use:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: vector_xor_a_nb_i32_one_use:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-LABEL: vector_xor_a_nb_i32_one_use:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX906-LABEL: vector_xor_a_nb_i32_one_use:
+; GFX906: ; %bb.0: ; %entry
+; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX906-NEXT: v_xnor_b32_e32 v0, v1, v0
+; GFX906-NEXT: s_setpc_b64 s[30:31]
entry:
%nb = xor i32 %b, -1
%r = xor i32 %a, %nb
More information about the llvm-commits
mailing list