[PATCH] D32674: [LoopIdiom] PR32811 check for safety while expanding

Aditya Kumar via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 29 17:39:57 PDT 2017


hiraditya created this revision.
Herald added a subscriber: mzolotukhin.

Loop Idiom recognition was generating memset in a case that would result generating a division operation to an unsafe location.


https://reviews.llvm.org/D32674

Files:
  lib/Transforms/Scalar/LoopIdiomRecognize.cpp
  test/Transforms/LoopIdiom/unsafe.ll


Index: test/Transforms/LoopIdiom/unsafe.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopIdiom/unsafe.ll
@@ -0,0 +1,70 @@
+; RUN: opt -S < %s -loop-idiom | FileCheck %s
+; CHECK-NOT: memset
+; check that memset is not generated (for assignment in for.body5) because that will result
+; in udiv hoisted out of the loop by the SCEV Expander
+; TODO: ideally we should be able to generate memset
+; if SCEV expander is taught to generate the dependencies
+; at the right point.
+
+ at e = common local_unnamed_addr global i32 0, align 4
+ at d = common local_unnamed_addr global i32 0, align 4
+ at a = common local_unnamed_addr global i32 0, align 4
+ at b = common local_unnamed_addr global i32 0, align 4
+ at f = common local_unnamed_addr global i32 0, align 4
+ at c = common local_unnamed_addr global [1 x i8] zeroinitializer, align 1
+define i32 @main() local_unnamed_addr #0 {
+entry:
+  %.pr = load i32, i32* @e, align 4
+  %cmp16 = icmp slt i32 %.pr, 1
+  br i1 %cmp16, label %for.cond1thread-pre-split.lr.ph, label %for.end11
+for.cond1thread-pre-split.lr.ph:                  ; preds = %entry
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
+  %e.promoted = load i32, i32* @e, align 4
+  br label %for.cond1thread-pre-split
+for.cond1thread-pre-split:                        ; preds = %for.cond1thread-pre-split.lr.ph, %for.inc9
+  %inc1017 = phi i32 [ %e.promoted, %for.cond1thread-pre-split.lr.ph ], [ %inc10, %for.inc9 ]
+  %.pr12 = load i32, i32* @d, align 4
+  %tobool15 = icmp eq i32 %.pr12, 0
+  br i1 %tobool15, label %for.inc9, label %for.body2.lr.ph
+for.body2.lr.ph:                                  ; preds = %for.cond1thread-pre-split
+  %div = udiv i32 %0, %1
+  %2 = sext i32 %div to i64
+  br label %for.body2
+for.body2:                                        ; preds = %for.body2.lr.ph, %for.inc6
+  %3 = phi i32 [ %.pr12, %for.body2.lr.ph ], [ %inc7, %for.inc6 ]
+  %.pr13 = load i32, i32* @f, align 4
+  %tobool414 = icmp eq i32 %.pr13, 0
+  br i1 %tobool414, label %for.inc6, label %for.body5.preheader
+for.body5.preheader:                              ; preds = %for.body2
+  %4 = sext i32 %.pr13 to i64
+  %5 = sub i32 -1, %.pr13
+  %6 = zext i32 %5 to i64
+  br label %for.body5
+for.body5:                                        ; preds = %for.body5.preheader, %for.body5
+  %indvars.iv = phi i64 [ %4, %for.body5.preheader ], [ %indvars.iv.next, %for.body5 ]
+  %7 = add nsw i64 %2, %indvars.iv
+  %arrayidx = getelementptr inbounds [1 x i8], [1 x i8]* @c, i64 0, i64 %7
+  store i8 0, i8* %arrayidx, align 1
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %8 = trunc i64 %indvars.iv.next to i32
+  %tobool4 = icmp eq i32 %8, 0
+  br i1 %tobool4, label %for.cond3.for.inc6_crit_edge, label %for.body5
+for.cond3.for.inc6_crit_edge:                     ; preds = %for.body5
+  br label %for.inc6
+for.inc6:                                         ; preds = %for.cond3.for.inc6_crit_edge, %for.body2
+  %inc7 = add nsw i32 %3, 1
+  %tobool = icmp eq i32 %inc7, 0
+  br i1 %tobool, label %for.cond1.for.inc9_crit_edge, label %for.body2
+for.cond1.for.inc9_crit_edge:                     ; preds = %for.inc6
+  br label %for.inc9
+for.inc9:                                         ; preds = %for.cond1.for.inc9_crit_edge, %for.cond1thread-pre-split
+  %inc10 = add nsw i32 %inc1017, 1
+  %cmp = icmp slt i32 %inc1017, 0
+  br i1 %cmp, label %for.cond1thread-pre-split, label %for.cond.for.end11_crit_edge
+for.cond.for.end11_crit_edge:                     ; preds = %for.inc9
+  br label %for.end11
+for.end11:                                        ; preds = %for.cond.for.end11_crit_edge, %entry
+  ret i32 0
+}
+
Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp
===================================================================
--- lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -783,6 +783,11 @@
   if (NegStride)
     Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
 
+  // TODO: ideally we should still be able to generate memset if SCEV expander
+  // is taught to generate the dependencies at the latest point.
+  if (!isSafeToExpand(Start, *SE))
+    return false;
+
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D32674.97200.patch
Type: text/x-patch
Size: 4489 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170430/535afb0a/attachment.bin>


More information about the llvm-commits mailing list