[PATCH] D97947: [AArch64] Force runtime unrolling for in-order scheduling models

Mon Apr 19 08:01:14 PDT 2021

NickGuy updated this revision to Diff 338524.
NickGuy edited the summary of this revision.
NickGuy added a comment.

> Can you remind me about the impact of this? I.e., if -mcpu is omitted, we default to generic which is classified, or is using, an in-order schedmodel description on Android?

Yep, that's right. Though I don't think it was specific to Android, but AArch64 in general.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97947/new/

https://reviews.llvm.org/D97947

Files:
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
  llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll


Index: llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
===================================================================

--- llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
+++ llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-required-for-vectorization.ll
@@ -104,7 +104,7 @@
 for.cond:                                         ; preds = %for.inc, %entry
   %1 = load i32, i32* %i, align 4
   %cmp = icmp ult i32 %1, 20000
-  br i1 %cmp, label %for.body, label %for.cond.cleanup
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
 
 for.cond.cleanup:                                 ; preds = %for.cond
   %2 = bitcast i32* %i to i8*
@@ -138,3 +138,6 @@
 declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
 
 declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable", i32 1}
\ No newline at end of file
Index: llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
===================================================================
--- llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
+++ llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
 ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-r82 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
 
 ; Tests for unrolling loops with run-time trip counts
 
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -1105,6 +1106,36 @@
   if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
       EnableFalkorHWPFUnrollFix)
     getFalkorUnrollingPreferences(L, SE, UP);
+
+  // Scan the loop: don't unroll loops with calls as this could prevent
+  // inlining.
+  for (auto *BB : L->getBlocks()) {
+    for (auto &I : *BB) {
+      // Don't unroll vectorised loop.
+      if (I.getType()->isVectorTy())
+        return;
+
+      if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+        if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
+          if (!isLoweredToCall(F))
+            continue;
+        }
+        return;
+      }
+    }
+  }
+
+  // Force runtime unrolling for in-order models
+  // If mcpu is omitted, then an in-order model will be used, meaning this
+  // effects the "default" compilation configuration for Aarch64.
+  // TODO This might be beneficial for out-of-order models too
+  if (!ST->getSchedModel().isOutOfOrder()) {
+    UP.Runtime = true;
+    UP.Partial = true;
+    UP.UpperBound = true;
+    UP.UnrollRemainder = true;
+    UP.DefaultUnrollRuntimeCount = 4;
+  }
 }
 
 void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97947.338524.patch
Type: text/x-patch
Size: 3519 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210419/377d8031/attachment.bin>