[PATCH] D97947: [AArch64] Force runtime unrolling for in-order scheduling models
Nicholas Guy via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 19 08:01:14 PDT 2021
NickGuy updated this revision to Diff 338524.
NickGuy edited the summary of this revision.
NickGuy added a comment.
> Can you remind me about the impact of this? I.e., if -mcpu is omitted, we default to generic which is classified, or is using, an in-order schedmodel description on Android?
Yep, that's right. Though I don't think it was specific to Android, but AArch64 in general.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D97947/new/
https://reviews.llvm.org/D97947
Files:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
Index: llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
+++ llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
@@ -104,7 +104,7 @@
for.cond: ; preds = %for.inc, %entry
%1 = load i32, i32* %i, align 4
%cmp = icmp ult i32 %1, 20000
- br i1 %cmp, label %for.body, label %for.cond.cleanup
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
for.cond.cleanup: ; preds = %for.cond
%2 = bitcast i32* %i to i8*
@@ -138,3 +138,6 @@
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable", i32 1}
\ No newline at end of file
Index: llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
===================================================================
--- llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
+++ llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
@@ -1,5 +1,7 @@
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Tests for unrolling loops with run-time trip counts
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -1105,6 +1106,36 @@
if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
EnableFalkorHWPFUnrollFix)
getFalkorUnrollingPreferences(L, SE, UP);
+
+ // Scan the loop: don't unroll loops with calls as this could prevent
+ // inlining.
+ for (auto *BB : L->getBlocks()) {
+ for (auto &I : *BB) {
+ // Don't unroll vectorised loop.
+ if (I.getType()->isVectorTy())
+ return;
+
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
+ if (!isLoweredToCall(F))
+ continue;
+ }
+ return;
+ }
+ }
+ }
+
+ // Force runtime unrolling for in-order models
+ // If mcpu is omitted, then an in-order model will be used, meaning this
+ // effects the "default" compilation configuration for Aarch64.
+ // TODO This might be beneficial for out-of-order models too
+ if (!ST->getSchedModel().isOutOfOrder()) {
+ UP.Runtime = true;
+ UP.Partial = true;
+ UP.UpperBound = true;
+ UP.UnrollRemainder = true;
+ UP.DefaultUnrollRuntimeCount = 4;
+ }
}
void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97947.338524.patch
Type: text/x-patch
Size: 3519 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210419/377d8031/attachment.bin>
More information about the llvm-commits
mailing list