[llvm] a41ddf1 - [AArch64][SVE] Sink ptrue into loop if it is used by PTEST.

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 26 07:09:01 PDT 2022


Author: Sander de Smalen
Date: 2022-07-26T15:07:41+01:00
New Revision: a41ddf178eac21ab259b48acb6d41e852ee4abaa

URL: https://github.com/llvm/llvm-project/commit/a41ddf178eac21ab259b48acb6d41e852ee4abaa
DIFF: https://github.com/llvm/llvm-project/commit/a41ddf178eac21ab259b48acb6d41e852ee4abaa.diff

LOG: [AArch64][SVE] Sink ptrue into loop if it is used by PTEST.

This helps fold away the ptest instructions, which needs the knowledge on whether
the general predicate is known to zero the inactive lanes.

This fixes some PTEST regressions introduced by D129282.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D129852

Added: 
    llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7df43c3f4ff5..ea755bc07d54 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12787,6 +12787,12 @@ bool AArch64TargetLowering::shouldSinkOperands(
       if (isSplatShuffle(II->getOperand(1)))
         Ops.push_back(&II->getOperandUse(1));
       return !Ops.empty();
+    case Intrinsic::aarch64_sve_ptest_first:
+    case Intrinsic::aarch64_sve_ptest_last:
+      if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
+        if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
+          Ops.push_back(&II->getOperandUse(0));
+      return !Ops.empty();
     case Intrinsic::aarch64_sme_write_horiz:
     case Intrinsic::aarch64_sme_write_vert:
     case Intrinsic::aarch64_sme_writeq_horiz:

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll
new file mode 100644
index 000000000000..dbd6207df026
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
+
+;
+; Ensure that the %ptrue from the preheader is sunk into the loop such that the ptest is removed.
+;
+
+define void @test_sink_ptrue_into_ptest(i32 %n) {
+; CHECK-LABEL: test_sink_ptrue_into_ptest:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    whilelt p0.s, wzr, w0
+; CHECK-NEXT:    b.pl .LBB0_3
+; CHECK-NEXT:  // %bb.1: // %for.body.preheader
+; CHECK-NEXT:    mov w9, wzr
+; CHECK-NEXT:    cntw x8
+; CHECK-NEXT:  .LBB0_2: // %for.body
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    add w10, w9, w8
+; CHECK-NEXT:    whilelt p0.s, w9, w0
+; CHECK-NEXT:    mov w9, w10
+; CHECK-NEXT:    b.mi .LBB0_2
+; CHECK-NEXT:  .LBB0_3: // %exit
+; CHECK-NEXT:    ret
+entry:
+  %vscale = tail call i32 @llvm.vscale.i32()
+  %step = shl nuw nsw i32 %vscale, 2
+  %ptrue.ph = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %while.ph = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 0, i32 %n)
+  %ptest.ph = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %ptrue.ph, <vscale x 4 x i1> %while.ph)
+  br i1 %ptest.ph, label %for.body, label %exit
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.next = add i32 %i, %step
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 %i, i32 %n)
+  %ptest = call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %ptrue.ph, <vscale x 4 x i1> %while)
+  br i1 %ptest, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+declare i32 @llvm.vscale.i32()
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32, i32)
+declare i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)


        


More information about the llvm-commits mailing list