[llvm] daf1a1f - [AArch64][SVE] Add instcombine to convert ptest.last/first to ptest.any
Bradley Smith via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 07:59:44 PST 2022
Author: Bradley Smith
Date: 2022-11-15T15:59:21Z
New Revision: daf1a1f690c3b259376747f1d7d992f5b50c0bcb
URL: https://github.com/llvm/llvm-project/commit/daf1a1f690c3b259376747f1d7d992f5b50c0bcb
DIFF: https://github.com/llvm/llvm-project/commit/daf1a1f690c3b259376747f1d7d992f5b50c0bcb.diff
LOG: [AArch64][SVE] Add instcombine to convert ptest.last/first to ptest.any
This allow for better optimization later in the backend.
This fixes the remaining missed optimizations in D137717.
Depends on D137930
Differential Revision: https://reviews.llvm.org/D137947
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 3f98d1f00532a..7c37464a2b91e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -975,17 +975,35 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
IntrinsicInst &II) {
- IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
- IntrinsicInst *Op = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
+ Value *PgVal = II.getArgOperand(0);
+ Value *OpVal = II.getArgOperand(1);
+
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ // PTEST_<FIRST|LAST>(X, X) is equivalent to PTEST_ANY(X, X).
+ // Later optimizations prefer this form.
+ if (PgVal == OpVal &&
+ (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
+ II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
+ Value *Ops[] = {PgVal, OpVal};
+ Type *Tys[] = {PgVal->getType()};
+
+ auto *PTest =
+ Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptest_any, Tys, Ops);
+ PTest->takeName(&II);
+
+ return IC.replaceInstUsesWith(II, PTest);
+ }
+
+ IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(PgVal);
+ IntrinsicInst *Op = dyn_cast<IntrinsicInst>(OpVal);
if (!Pg || !Op)
return None;
Intrinsic::ID OpIID = Op->getIntrinsicID();
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
-
if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
Pg->getArgOperand(0)->getType() == Op->getArgOperand(0)->getType()) {
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
index 66abcef720c9c..7f664b878998d 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+; RUN: opt -instcombine -S %s | llc -mtriple=aarch64--linux-gnu -mattr=+sve -o - | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
;
; Immediate Compares
@@ -189,8 +191,7 @@ define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
; CHECK-LABEL: cmp8_ptest_first_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptest p0, p0.b
-; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
@@ -205,8 +206,7 @@ define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vs
; CHECK-LABEL: cmp8_ptest_last_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptest p0, p0.b
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
%2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
@@ -235,8 +235,7 @@ define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <
; CHECK-LABEL: cmp32_ptest_first_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p0.b
-; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -252,8 +251,7 @@ define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
; CHECK-LABEL: cmp32_ptest_last_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p0.b
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
index c6ef477a4341d..a22454b586c25 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
@@ -3,6 +3,26 @@
target triple = "aarch64-unknown-linux-gnu"
+; PTEST first can be changed to any if the mask and operand are the same
+define i1 @ptest_first_to_any(<vscale x 16 x i1> %a) #0 {
+; CHECK-LABEL: @ptest_first_to_any(
+; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[A]])
+; CHECK-NEXT: ret i1 [[OUT]]
+;
+ %out = call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %a)
+ ret i1 %out
+}
+
+; PTEST last can be changed to any if the mask and operand are the same
+define i1 @ptest_last_to_any(<vscale x 16 x i1> %a) #0 {
+; CHECK-LABEL: @ptest_last_to_any(
+; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[A]])
+; CHECK-NEXT: ret i1 [[OUT]]
+;
+ %out = call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %a)
+ ret i1 %out
+}
+
define i1 @ptest_any1(<vscale x 2 x i1> %a) #0 {
; CHECK-LABEL: @ptest_any1(
; CHECK-NEXT: [[MASK:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 0)
@@ -47,7 +67,7 @@ define i1 @ptest_first(<vscale x 4 x i1> %a) #0 {
define i1 @ptest_first_same_ops(<vscale x 2 x i1> %a) #0 {
; CHECK-LABEL: @ptest_first_same_ops(
-; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.aarch64.sve.ptest.first.nxv2i1(<vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[A]])
+; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv2i1(<vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[A]])
; CHECK-NEXT: ret i1 [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
More information about the llvm-commits
mailing list