[PATCH] [AArch64] Implement getMaximumUnrollFactor()

Wed Apr 16 18:58:09 PDT 2014

  >
  >   This looks fine to me.
  >
  >   Tim.
  >
  > http://reviews.llvm.org/D3320
  >
  >
  >

  See below for rebased patch.

  Thanks,
  Zhaoshi

  Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by
  The Linux Foundation

  From 8faf5715b31c9f19ea0986c08ec4911df1231e15 Mon Sep 17 00:00:00 2001
  From: Zhaoshi Zheng <zhaoshiz at codeaurora.org>
  Date: Mon, 31 Mar 2014 17:10:57 -0700
  Subject: [PATCH 2/3] [AArch64] Implement getMaximumUnrollFactor()

  ---
   lib/Target/AArch64/AArch64TargetTransformInfo.cpp  |  1 +
   .../LoopVectorize/AArch64/aarch64-unroll.ll        | 42
  ++++++++++++++++++++++
   2 files changed, 43 insertions(+)
   create mode 100644 test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll

  diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  index e2a1647..b2fb41a 100644
  --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  @@ -92,6 +92,7 @@ public:
       return 64;
     }

  +  unsigned getMaximumUnrollFactor() const override { return 2; }
     /// @}
   };

  diff --git a/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
  b/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
  new file mode 100644
  index 0000000..9962c3d
  --- /dev/null
  +++ b/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
  @@ -0,0 +1,42 @@
  +; RUN: opt < %s -loop-vectorize -mtriple=aarch64-none-linux-gnu
  -mattr=+neon -S | FileCheck %s
  +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
  +
  +; Function Attrs: nounwind
  +define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias
  nocapture readonly %b, i32* %c, i32 %size) {
  +;CHECK-LABEL: array_add
  +;CHECK: load <4 x i32>
  +;CHECK: load <4 x i32>
  +;CHECK: load <4 x i32>
  +;CHECK: load <4 x i32>
  +;CHECK: add nsw <4 x i32>
  +;CHECK: add nsw <4 x i32>
  +;CHECK: store <4 x i32>
  +;CHECK: store <4 x i32>
  +;CHECK: ret
  +entry:
  +  %cmp10 = icmp sgt i32 %size, 0
  +  br i1 %cmp10, label %for.body.preheader, label %for.end
  +
  +for.body.preheader:                               ; preds = %entry
  +  br label %for.body
  +
  +for.body:                                         ; preds =
  %for.body.preheader, %for.body
  +  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0,
  %for.body.preheader ]
  +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
  +  %0 = load i32* %arrayidx, align 4
  +  %arrayidx2 = getelementptr inbounds i32* %b, i64 %indvars.iv
  +  %1 = load i32* %arrayidx2, align 4
  +  %add = add nsw i32 %1, %0
  +  %arrayidx4 = getelementptr inbounds i32* %c, i64 %indvars.iv
  +  store i32 %add, i32* %arrayidx4, align 4
  +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  +  %exitcond = icmp eq i32 %lftr.wideiv, %size
  +  br i1 %exitcond, label %for.end.loopexit, label %for.body
  +
  +for.end.loopexit:                                 ; preds = %for.body
  +  br label %for.end
  +
  +for.end:                                          ; preds =
  %for.end.loopexit, %entry
  +  ret i32* %c
  +}

  - {F54322, layout=link}

http://reviews.llvm.org/D3320