[llvm] 417a75c - [AArch64][SVE] Avoid using ptrue for ptest in VECREDUCE_OR.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 27 03:46:05 PST 2022
Author: Sander de Smalen
Date: 2022-01-27T11:44:49Z
New Revision: 417a75c6d094379914ccd1249488b5a331492985
URL: https://github.com/llvm/llvm-project/commit/417a75c6d094379914ccd1249488b5a331492985
DIFF: https://github.com/llvm/llvm-project/commit/417a75c6d094379914ccd1249488b5a331492985.diff
LOG: [AArch64][SVE] Avoid using ptrue for ptest in VECREDUCE_OR.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D118145
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 676ee1b18914..b395231c69a7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19364,7 +19364,12 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
default:
return SDValue();
case ISD::VECREDUCE_OR:
- return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
+ if (isAllActivePredicate(Pg))
+ // The predicate can be 'Op' because
+ // vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
+ return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
+ else
+ return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
case ISD::VECREDUCE_AND: {
Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
diff --git a/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
index 9900e5d54558..56265fa3e619 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
@@ -56,8 +56,7 @@ define i1 @reduce_and_nxv2i1(<vscale x 2 x i1> %vec) {
define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -67,8 +66,7 @@ define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv8i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -78,8 +76,7 @@ define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -89,8 +86,7 @@ define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
@@ -198,8 +194,7 @@ define i1 @reduce_smax_nxv2i1(<vscale x 2 x i1> %vec) {
define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -209,8 +204,7 @@ define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv8i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -220,8 +214,7 @@ define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -231,8 +224,7 @@ define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
@@ -244,8 +236,7 @@ define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -255,8 +246,7 @@ define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv8i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -266,8 +256,7 @@ define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -277,8 +266,7 @@ define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: ptest p1, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
index 9d6a3a4b3d96..2afcaf7cedd8 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
@@ -46,7 +46,7 @@ define i1 @orv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1> %a)
@@ -89,7 +89,7 @@ define i1 @sminv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1> %a)
@@ -103,7 +103,7 @@ define i1 @umaxv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
-; CHECK-NEXT: ptest p2, p0.b
+; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1> %a)
More information about the llvm-commits
mailing list