[llvm] daf1a1f - [AArch64][SVE] Add instcombine to convert ptest.last/first to ptest.any

Bradley Smith via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 15 07:59:44 PST 2022


Author: Bradley Smith
Date: 2022-11-15T15:59:21Z
New Revision: daf1a1f690c3b259376747f1d7d992f5b50c0bcb

URL: https://github.com/llvm/llvm-project/commit/daf1a1f690c3b259376747f1d7d992f5b50c0bcb
DIFF: https://github.com/llvm/llvm-project/commit/daf1a1f690c3b259376747f1d7d992f5b50c0bcb.diff

LOG: [AArch64][SVE] Add instcombine to convert ptest.last/first to ptest.any

This allow for better optimization later in the backend.

This fixes the remaining missed optimizations in D137717.

Depends on D137930

Differential Revision: https://reviews.llvm.org/D137947

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 3f98d1f00532a..7c37464a2b91e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -975,17 +975,35 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
 
 static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
                                                    IntrinsicInst &II) {
-  IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
-  IntrinsicInst *Op = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
+  Value *PgVal = II.getArgOperand(0);
+  Value *OpVal = II.getArgOperand(1);
+
+  IRBuilder<> Builder(II.getContext());
+  Builder.SetInsertPoint(&II);
+
+  // PTEST_<FIRST|LAST>(X, X) is equivalent to PTEST_ANY(X, X).
+  // Later optimizations prefer this form.
+  if (PgVal == OpVal &&
+      (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
+       II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
+    Value *Ops[] = {PgVal, OpVal};
+    Type *Tys[] = {PgVal->getType()};
+
+    auto *PTest =
+        Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptest_any, Tys, Ops);
+    PTest->takeName(&II);
+
+    return IC.replaceInstUsesWith(II, PTest);
+  }
+
+  IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(PgVal);
+  IntrinsicInst *Op = dyn_cast<IntrinsicInst>(OpVal);
 
   if (!Pg || !Op)
     return None;
 
   Intrinsic::ID OpIID = Op->getIntrinsicID();
 
-  IRBuilder<> Builder(II.getContext());
-  Builder.SetInsertPoint(&II);
-
   if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
       OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
       Pg->getArgOperand(0)->getType() == Op->getArgOperand(0)->getType()) {

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
index 66abcef720c9c..7f664b878998d 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s
+; RUN: opt -instcombine -S %s | llc -mtriple=aarch64--linux-gnu -mattr=+sve -o - | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
 
 ;
 ; Immediate Compares
@@ -189,8 +191,7 @@ define i1 @cmp8_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
 ; CHECK-LABEL: cmp8_ptest_first_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT:    ptest p0, p0.b
-; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
@@ -205,8 +206,7 @@ define i1 @cmp8_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vs
 ; CHECK-LABEL: cmp8_ptest_last_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT:    ptest p0, p0.b
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
@@ -235,8 +235,7 @@ define i1 @cmp32_ptest_first_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <
 ; CHECK-LABEL: cmp32_ptest_first_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p0, p0.b
-; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -252,8 +251,7 @@ define i1 @cmp32_ptest_last_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
 ; CHECK-LABEL: cmp32_ptest_last_xx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT:    ptest p0, p0.b
-; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
index c6ef477a4341d..a22454b586c25 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
@@ -3,6 +3,26 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
+; PTEST first can be changed to any if the mask and operand are the same
+define i1 @ptest_first_to_any(<vscale x 16 x i1> %a) #0 {
+; CHECK-LABEL: @ptest_first_to_any(
+; CHECK-NEXT:    [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[A]])
+; CHECK-NEXT:    ret i1 [[OUT]]
+;
+  %out = call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %a)
+  ret i1 %out
+}
+
+; PTEST last can be changed to any if the mask and operand are the same
+define i1 @ptest_last_to_any(<vscale x 16 x i1> %a) #0 {
+; CHECK-LABEL: @ptest_last_to_any(
+; CHECK-NEXT:    [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[A]])
+; CHECK-NEXT:    ret i1 [[OUT]]
+;
+  %out = call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %a)
+  ret i1 %out
+}
+
 define i1 @ptest_any1(<vscale x 2 x i1> %a) #0 {
 ; CHECK-LABEL: @ptest_any1(
 ; CHECK-NEXT:    [[MASK:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 0)
@@ -47,7 +67,7 @@ define i1 @ptest_first(<vscale x 4 x i1> %a) #0 {
 
 define i1 @ptest_first_same_ops(<vscale x 2 x i1> %a) #0 {
 ; CHECK-LABEL: @ptest_first_same_ops(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.aarch64.sve.ptest.first.nxv2i1(<vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[A]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv2i1(<vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[A]])
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)


        


More information about the llvm-commits mailing list