[PATCH] D141397: [AArch64][SVE]: Optimize using ptest with whilelo
hassnaaHamdi via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 10 09:08:01 PST 2023
hassnaa-arm created this revision.
hassnaa-arm added reviewers: david-arm, sdesmalen.
Herald added subscribers: ctetreau, psnobl, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
Herald added a project: All.
hassnaa-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
Specify sve_whilelo as an intrinsic that ahs zeroing-inactive lanes, to avoid generating pointless instructions.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D141397
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve2-ptest_whilelt.ll
Index: llvm/test/CodeGen/AArch64/sve2-ptest_whilelt.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-ptest_whilelt.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @test_svwhilelt_with_ptest(i32 noundef %width) #0 {
+; CHECK-LABEL: test_svwhilelt_with_ptest:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilelo p0.s, wzr, w0
+; CHECK-NEXT: b.eq .LBB0_3
+; CHECK-NEXT: // %bb.1: // %for.body.lr.ph
+; CHECK-NEXT: cntw x8
+; CHECK-NEXT: mov w9, w8
+; CHECK-NEXT: .LBB0_2: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: whilelo p0.s, w9, w0
+; CHECK-NEXT: add w9, w9, w8
+; CHECK-NEXT: b.ne .LBB0_2
+; CHECK-NEXT: .LBB0_3: // %for.cond.cleanup
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 0, i32 %width)
+ %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1)
+ br i1 %2, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %3 = tail call i32 @llvm.vscale.i32()
+ %4 = shl nuw nsw i32 %3, 2
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %5 = phi <vscale x 4 x i1> [ %1, %for.body.lr.ph ], [ %6, %for.body ]
+ %x.07 = phi i32 [ 0, %for.body.lr.ph ], [ %conv3, %for.body ]
+ %conv3 = add i32 %4, %x.07
+ %6 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %conv3, i32 %width)
+ %7 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %6)
+ br i1 %7, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+}
+
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32, i32)
+declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
+declare i32 @llvm.vscale.i32()
+
+attributes #0 = { "target-features"="+sve2" }
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -263,6 +263,7 @@
switch (Op.getConstantOperandVal(0)) {
default:
return false;
+ case Intrinsic::aarch64_sve_whilelo:
case Intrinsic::aarch64_sve_ptrue:
case Intrinsic::aarch64_sve_pnext:
case Intrinsic::aarch64_sve_cmpeq:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D141397.487831.patch
Type: text/x-patch
Size: 2843 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230110/8b8853f2/attachment.bin>
More information about the llvm-commits
mailing list