[PATCH] D141397: [AArch64][SVE]: Optimize using ptest with whilelo

Tue Jan 10 09:08:01 PST 2023

hassnaa-arm created this revision.
hassnaa-arm added reviewers: david-arm, sdesmalen.
Herald added subscribers: ctetreau, psnobl, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
Herald added a project: All.
hassnaa-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Specify sve_whilelo as an intrinsic that ahs zeroing-inactive lanes, to avoid generating pointless instructions.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D141397

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/sve2-ptest_whilelt.ll


Index: llvm/test/CodeGen/AArch64/sve2-ptest_whilelt.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve2-ptest_whilelt.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc  < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @test_svwhilelt_with_ptest(i32 noundef %width)  #0 {
+; CHECK-LABEL: test_svwhilelt_with_ptest:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    whilelo p0.s, wzr, w0
+; CHECK-NEXT:    b.eq .LBB0_3
+; CHECK-NEXT:  // %bb.1: // %for.body.lr.ph
+; CHECK-NEXT:    cntw x8
+; CHECK-NEXT:    mov w9, w8
+; CHECK-NEXT:  .LBB0_2: // %for.body
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    whilelo p0.s, w9, w0
+; CHECK-NEXT:    add w9, w9, w8
+; CHECK-NEXT:    b.ne .LBB0_2
+; CHECK-NEXT:  .LBB0_3: // %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 0, i32 %width)
+  %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1)
+  br i1 %2, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %3 = tail call i32 @llvm.vscale.i32()
+  %4 = shl nuw nsw i32 %3, 2
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %5 = phi <vscale x 4 x i1> [ %1, %for.body.lr.ph ], [ %6, %for.body ]
+  %x.07 = phi i32 [ 0, %for.body.lr.ph ], [ %conv3, %for.body ]
+  %conv3 = add i32 %4, %x.07
+  %6 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %conv3, i32 %width)
+  %7 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %6)
+  br i1 %7, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+}
+
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32, i32)
+declare i1 @llvm.aarch64.sve.ptest.any.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
+declare i32 @llvm.vscale.i32()
+
+attributes #0 = { "target-features"="+sve2" }
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -263,6 +263,7 @@
     switch (Op.getConstantOperandVal(0)) {
     default:
       return false;
+    case Intrinsic::aarch64_sve_whilelo:
     case Intrinsic::aarch64_sve_ptrue:
     case Intrinsic::aarch64_sve_pnext:
     case Intrinsic::aarch64_sve_cmpeq:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D141397.487831.patch
Type: text/x-patch
Size: 2843 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230110/8b8853f2/attachment.bin>