[llvm] r231632 - [AArch64] Enable partial & runtime unrolling on cortex-a57

Kevin Qin Kevin.Qin at arm.com
Sun Mar 8 23:14:28 PDT 2015


Author: kevinqin
Date: Mon Mar  9 01:14:28 2015
New Revision: 231632

URL: http://llvm.org/viewvc/llvm-project?rev=231632&view=rev
Log:
[AArch64] Enable partial & runtime unrolling on cortex-a57

For inner one of nested loops, it is more likely to be a hot loop,
and the runtime check can be promoted out from patch 0001, so the
overhead is less, we can try a doubled threshold to unroll more loops.

Added:
    llvm/trunk/test/Transforms/LoopUnroll/AArch64/
    llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg
    llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll
    llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=231632&r1=231631&r2=231632&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Mon Mar  9 01:14:28 2015
@@ -10,6 +10,7 @@
 #include "AArch64TargetTransformInfo.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
@@ -426,6 +427,15 @@ unsigned AArch64TTIImpl::getMaxInterleav
 
 void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
                                              TTI::UnrollingPreferences &UP) {
+  // Enable partial unrolling and runtime unrolling.
+  BaseT::getUnrollingPreferences(L, UP);
+
+  // For inner loop, it is more likely to be a hot one, and the runtime check
+  // can be promoted out from LICM pass, so the overhead is less, let's try
+  // a larger threshold to unroll more loops.
+  if (L->getLoopDepth() > 1)
+    UP.PartialThreshold *= 2;
+
   // Disable partial & runtime unrolling on -Os.
   UP.PartialOptSizeThreshold = 0;
 }

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg?rev=231632&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg Mon Mar  9 01:14:28 2015
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll?rev=231632&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll Mon Mar  9 01:14:28 2015
@@ -0,0 +1,76 @@
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
+
+; Partial unroll 8 times for this loop.
+define void @unroll1() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
+; Partial unroll 16 times for this loop.
+define void @unroll2() nounwind {
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+  br label %loop2.header
+
+loop2.header:
+  br label %loop2
+
+loop2:
+  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+  %inc2 = add i32 %iv2, 1
+  %exitcnd2 = icmp uge i32 %inc2, 1024
+  br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+  br label %loop1.latch
+
+loop1.latch:
+  %inc1 = add i32 %iv1, 1
+  %exitcnd1 = icmp uge i32 %inc1, 1024
+  br i1 %exitcnd2, label %exit, label %loop1
+
+exit:
+  ret void
+}
+
+
+
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll?rev=231632&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll Mon Mar  9 01:14:28 2015
@@ -0,0 +1,33 @@
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
+
+; Tests for unrolling loops with run-time trip counts
+
+; CHECK:  %xtraiter = and i32 %n
+; CHECK:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK:  br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
+
+; CHECK:  for.body.prol:
+; CHECK:  for.body:
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+





More information about the llvm-commits mailing list