[llvm-branch-commits] [llvm] e2e2057 - [ARM] Ensure loop invariant active.lane.mask operands
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 25 20:21:01 PDT 2021
Author: David Green
Date: 2021-06-25T20:20:29-07:00
New Revision: e2e2057132c1360d014235a087d4f678efc56420
URL: https://github.com/llvm/llvm-project/commit/e2e2057132c1360d014235a087d4f678efc56420
DIFF: https://github.com/llvm/llvm-project/commit/e2e2057132c1360d014235a087d4f678efc56420.diff
LOG: [ARM] Ensure loop invariant active.lane.mask operands
CGP can move instructions like a ptrtoint into a loop, but the
MVETailPredication when converting them will currently assume invariant
trip counts. This tries to ensure the operands are loop invariant, and
bails if not.
Differential Revision: https://reviews.llvm.org/D100550
Added:
llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll
Modified:
llvm/lib/Target/ARM/MVETailPredication.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
index b705208660df..cccac5595288 100644
--- a/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -205,6 +205,10 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
EnableTailPredication == TailPredication::ForceEnabled;
Value *ElemCount = ActiveLaneMask->getOperand(1);
+ bool Changed = false;
+ if (!L->makeLoopInvariant(ElemCount, Changed))
+ return false;
+
auto *EC= SE->getSCEV(ElemCount);
auto *TC = SE->getSCEV(TripCount);
int VectorWidth =
diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll
new file mode 100644
index 000000000000..2bd3e51772bf
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll
@@ -0,0 +1,145 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+; This test has an instruction that gets sunk into the loop, that is a
+; active.lane.mask operand. (%exitcount.ptrcnt.to.int = ptrtoint). We
+; need to make sure it is loop invariant.
+
+define i32 @a(i32* readnone %b, i8* %c) {
+; CHECK-LABEL: a:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: cmp r0, r1
+; CHECK-NEXT: it ls
+; CHECK-NEXT: popls {r4, pc}
+; CHECK-NEXT: .LBB0_1: @ %while.body.preheader
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movs r3, #1
+; CHECK-NEXT: add.w r2, r0, #15
+; CHECK-NEXT: mov r12, r1
+; CHECK-NEXT: bic r2, r2, #15
+; CHECK-NEXT: subs r2, #16
+; CHECK-NEXT: add.w lr, r3, r2, lsr #4
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: dls lr, lr
+; CHECK-NEXT: .LBB0_2: @ %vector.body
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: adds r3, r1, r2
+; CHECK-NEXT: vctp.8 r0
+; CHECK-NEXT: vmov.8 q0[0], r3
+; CHECK-NEXT: adds r4, r3, #1
+; CHECK-NEXT: vmov.8 q0[1], r4
+; CHECK-NEXT: adds r4, r3, #2
+; CHECK-NEXT: vmov.8 q0[2], r4
+; CHECK-NEXT: adds r4, r3, #3
+; CHECK-NEXT: vmov.8 q0[3], r4
+; CHECK-NEXT: adds r4, r3, #4
+; CHECK-NEXT: vmov.8 q0[4], r4
+; CHECK-NEXT: adds r4, r3, #5
+; CHECK-NEXT: vmov.8 q0[5], r4
+; CHECK-NEXT: adds r4, r3, #6
+; CHECK-NEXT: vmov.8 q0[6], r4
+; CHECK-NEXT: adds r4, r3, #7
+; CHECK-NEXT: vmov.8 q0[7], r4
+; CHECK-NEXT: add.w r4, r3, #8
+; CHECK-NEXT: vmov.8 q0[8], r4
+; CHECK-NEXT: add.w r4, r3, #9
+; CHECK-NEXT: vmov.8 q0[9], r4
+; CHECK-NEXT: add.w r4, r3, #10
+; CHECK-NEXT: vmov.8 q0[10], r4
+; CHECK-NEXT: add.w r4, r3, #11
+; CHECK-NEXT: vmov.8 q0[11], r4
+; CHECK-NEXT: add.w r4, r3, #12
+; CHECK-NEXT: vmov.8 q0[12], r4
+; CHECK-NEXT: add.w r4, r3, #13
+; CHECK-NEXT: vmov.8 q0[13], r4
+; CHECK-NEXT: add.w r4, r3, #14
+; CHECK-NEXT: adds r2, #16
+; CHECK-NEXT: subs r0, #16
+; CHECK-NEXT: vmov.8 q0[14], r4
+; CHECK-NEXT: adds r3, #15
+; CHECK-NEXT: vmov.8 q0[15], r3
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vstrbt.8 q0, [r12], #16
+; CHECK-NEXT: le lr, .LBB0_2
+; CHECK-NEXT: @ %bb.3: @ %while.end
+; CHECK-NEXT: pop {r4, pc}
+entry:
+ %0 = bitcast i32* %b to i8*
+ %cmp3 = icmp ugt i8* %0, %c
+ br i1 %cmp3, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ %c5 = ptrtoint i8* %c to i32
+ %1 = sub i32 0, %c5
+ %uglygep = getelementptr i8, i8* %0, i32 %1
+ %exitcount.ptrcnt.to.int = ptrtoint i8* %uglygep to i32
+ %n.rnd.up = add i32 %exitcount.ptrcnt.to.int, 15
+ %n.vec = and i32 %n.rnd.up, -16
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %while.body.preheader
+ %index = phi i32 [ 0, %while.body.preheader ], [ %index.next, %vector.body ]
+ %next.gep = getelementptr i8, i8* %c, i32 %index
+ %2 = or i32 %index, 1
+ %next.gep7 = getelementptr i8, i8* %c, i32 %2
+ %3 = or i32 %index, 2
+ %next.gep8 = getelementptr i8, i8* %c, i32 %3
+ %4 = or i32 %index, 3
+ %next.gep9 = getelementptr i8, i8* %c, i32 %4
+ %5 = or i32 %index, 4
+ %next.gep10 = getelementptr i8, i8* %c, i32 %5
+ %6 = or i32 %index, 5
+ %next.gep11 = getelementptr i8, i8* %c, i32 %6
+ %7 = or i32 %index, 6
+ %next.gep12 = getelementptr i8, i8* %c, i32 %7
+ %8 = or i32 %index, 7
+ %next.gep13 = getelementptr i8, i8* %c, i32 %8
+ %9 = or i32 %index, 8
+ %next.gep14 = getelementptr i8, i8* %c, i32 %9
+ %10 = or i32 %index, 9
+ %next.gep15 = getelementptr i8, i8* %c, i32 %10
+ %11 = or i32 %index, 10
+ %next.gep16 = getelementptr i8, i8* %c, i32 %11
+ %12 = or i32 %index, 11
+ %next.gep17 = getelementptr i8, i8* %c, i32 %12
+ %13 = or i32 %index, 12
+ %next.gep18 = getelementptr i8, i8* %c, i32 %13
+ %14 = or i32 %index, 13
+ %next.gep19 = getelementptr i8, i8* %c, i32 %14
+ %15 = or i32 %index, 14
+ %next.gep20 = getelementptr i8, i8* %c, i32 %15
+ %16 = or i32 %index, 15
+ %next.gep21 = getelementptr i8, i8* %c, i32 %16
+ %17 = insertelement <16 x i8*> poison, i8* %next.gep, i32 0
+ %18 = insertelement <16 x i8*> %17, i8* %next.gep7, i32 1
+ %19 = insertelement <16 x i8*> %18, i8* %next.gep8, i32 2
+ %20 = insertelement <16 x i8*> %19, i8* %next.gep9, i32 3
+ %21 = insertelement <16 x i8*> %20, i8* %next.gep10, i32 4
+ %22 = insertelement <16 x i8*> %21, i8* %next.gep11, i32 5
+ %23 = insertelement <16 x i8*> %22, i8* %next.gep12, i32 6
+ %24 = insertelement <16 x i8*> %23, i8* %next.gep13, i32 7
+ %25 = insertelement <16 x i8*> %24, i8* %next.gep14, i32 8
+ %26 = insertelement <16 x i8*> %25, i8* %next.gep15, i32 9
+ %27 = insertelement <16 x i8*> %26, i8* %next.gep16, i32 10
+ %28 = insertelement <16 x i8*> %27, i8* %next.gep17, i32 11
+ %29 = insertelement <16 x i8*> %28, i8* %next.gep18, i32 12
+ %30 = insertelement <16 x i8*> %29, i8* %next.gep19, i32 13
+ %31 = insertelement <16 x i8*> %30, i8* %next.gep20, i32 14
+ %32 = insertelement <16 x i8*> %31, i8* %next.gep21, i32 15
+ %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %exitcount.ptrcnt.to.int)
+ %33 = ptrtoint <16 x i8*> %32 to <16 x i32>
+ %34 = trunc <16 x i32> %33 to <16 x i8>
+ %35 = bitcast i8* %next.gep to <16 x i8>*
+ call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %34, <16 x i8>* %35, i32 1, <16 x i1> %active.lane.mask)
+ %index.next = add i32 %index, 16
+ %36 = icmp eq i32 %index.next, %n.vec
+ br i1 %36, label %while.end, label %vector.body
+
+while.end: ; preds = %vector.body, %entry
+ ret i32 undef
+}
+
+declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
+declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
More information about the llvm-branch-commits
mailing list