[PATCH] D23686: Improve the LoopAccessAnalysis to handle the different types in the same size

Jin Lin via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 18 11:46:37 PDT 2016


jinlin created this revision.
jinlin added reviewers: DavidKreitzer, hfinkel.
jinlin added a subscriber: llvm-commits.
jinlin set the repository for this revision to rL LLVM.
Herald added a subscriber: mzolotukhin.

The following loop fails to be vectorized since the load c[i] is casted as i64 and the store c[i] is double. The loop access analysis gives up since they are in different types. 
 
Since these two memory operations are in the same size, I believe the loop access analysis should return forward dependence and thus the loop can be vectorized. 
 
#define N 1000
double a[N], b[N],c[N];
void foo() {
for (int i=0;i<N;i++) {
b[i] =c[i];
c[i]=0.0;
}
}
 
for.body:                                         ; preds = %for.body, %entry
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %arrayidx = getelementptr inbounds [1000 x double], [1000 x double]* @c, i64 0, i64 %indvars.iv
  %0 = bitcast double* %arrayidx to i64*
  %1 = load i64, i64* %0, align 8, !tbaa !1
  %arrayidx2 = getelementptr inbounds [1000 x double], [1000 x double]* @b, i64 0, i64 %indvars.iv
  %2 = bitcast double* %arrayidx2 to i64*
  store i64 %1, i64* %2, align 8, !tbaa !1
  store double 0.000000e+00, double* %arrayidx, align 8, !tbaa !1
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, 1000
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
 
LAA: Found a loop in foo: loop.17
LAA: Processing memory accesses...
  AST: Alias Set Tracker: 2 alias sets for 3 pointer values.
  AliasSet[0x9508b80, 1] must alias, No access Pointers: (<4 x i64>* %1, 18446744073709551615)
  AliasSet[0x95f8a70, 2] must alias, No access Pointers: (<4 x double>* %2, 18446744073709551615), (<4 x i64>* %0, 18446744073709551615)
 
LAA:   Accesses(3):
          %1 = bitcast double* %arrayIdx11 to <4 x i64>* (write)
          %2 = bitcast double* %arrayIdx to <4 x double>* (write)
          %0 = bitcast double* %arrayIdx to <4 x i64>* (read-only)
Underlying objects for pointer   %1 = bitcast double* %arrayIdx11 to <4 x i64>*
  @b = common local_unnamed_addr global [1000 x double] zeroinitializer, align 16
Underlying objects for pointer   %2 = bitcast double* %arrayIdx to <4 x double>*
  @c = common local_unnamed_addr global [1000 x double] zeroinitializer, align 16
Underlying objects for pointer   %0 = bitcast double* %arrayIdx to <4 x i64>*
  @c = common local_unnamed_addr global [1000 x double] zeroinitializer, align 16
LAA: Found a runtime check ptr:  %1 = bitcast double* %arrayIdx11 to <4 x i64>*
LAA: Found a runtime check ptr:  %2 = bitcast double* %arrayIdx to <4 x double>*
LAA: Found a runtime check ptr:  %0 = bitcast double* %arrayIdx to <4 x i64>*
LAA: We need to do 0 pointer comparisons.
LAA: We can perform a memory runtime check if needed.
LAA: Checking memory dependencies
LAA: Src Scev: {@c,+,32}<nsw><%loop.17>Sink Scev: {@c,+,32}<nsw><%loop.17>(Induction step: 1)
LAA: Distance for   %gepload = load <4 x i64>, <4 x i64>* %0, align 16, !tbaa !1 to   store <4 x double> zeroinitializer, <4 x double>* %2, align 16, !tbaa !1: 0
LAA: Zero dependence difference but different types
Total Dependences: 1
LAA: unsafe dependent memory operations in loop


Repository:
  rL LLVM

https://reviews.llvm.org/D23686

Files:
  lib/Analysis/LoopAccessAnalysis.cpp
  test/Analysis/LoopAccessAnalysis/same_size_different_type.ll

Index: lib/Analysis/LoopAccessAnalysis.cpp
===================================================================
--- lib/Analysis/LoopAccessAnalysis.cpp
+++ lib/Analysis/LoopAccessAnalysis.cpp
@@ -1268,7 +1268,10 @@
   // Write to the same location with the same size.
   // Could be improved to assert type sizes are the same (i32 == float, etc).
   if (Val == 0) {
-    if (ATy == BTy)
+    if (ATy == BTy ||
+       // Handle the case of different types in the smae size
+       (ATy->getPrimitiveSizeInBits() == BTy->getPrimitiveSizeInBits() &&
+        ATy->getPrimitiveSizeInBits() > 0))
       return Dependence::Forward;
     DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
     return Dependence::Unknown;
Index: test/Analysis/LoopAccessAnalysis/same_size_different_type.ll
===================================================================
--- test/Analysis/LoopAccessAnalysis/same_size_different_type.ll
+++ test/Analysis/LoopAccessAnalysis/same_size_different_type.ll
@@ -0,0 +1,41 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+;
+; We expect there exist only one anti-dep in this loop.
+;
+;  for (int i=0;i<N;i++) {
+;    b[i] = c[i];
+;    c[i] = 0.0;
+;  }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+ at c = common local_unnamed_addr global [1000 x double] zeroinitializer, align 16
+ at b = common local_unnamed_addr global [1000 x double] zeroinitializer, align 16
+ at a = common local_unnamed_addr global [1000 x double] zeroinitializer, align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @foo() local_unnamed_addr {
+; CHECK: Dependences:
+; CHECK-NEXT:   Forward:
+; CHECK-NEXT:      %1 = load i64, i64* %0, align 8 ->
+; CHECK-NEXT:      store double 0.000000e+00, double* %arrayidx, align 8
+; CHECK-NOT:    Forward:
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1000 x double], [1000 x double]* @c, i64 0, i64 %indvars.iv
+  %0 = bitcast double* %arrayidx to i64*
+  %1 = load i64, i64* %0, align 8
+  %arrayidx2 = getelementptr inbounds [1000 x double], [1000 x double]* @b, i64 0, i64 %indvars.iv
+  %2 = bitcast double* %arrayidx2 to i64*
+  store i64 %1, i64* %2, align 8
+  store double 0.000000e+00, double* %arrayidx, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D23686.68582.patch
Type: text/x-patch
Size: 2658 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160818/a1248368/attachment.bin>


More information about the llvm-commits mailing list