[PATCH] D20638: [LIR] Fix mis-compilation with unwinding

Wed May 25 14:13:09 PDT 2016

majnemer requested changes to this revision.
majnemer added a reviewer: majnemer.
majnemer added a comment.
This revision now requires changes to proceed.

I'm pretty sure this is wrong.  It only considers calls in the same basic block as the store.  However, the store could post-dominate a call to a function which never returns.

Instead, I suggest using `llvm::isGuaranteedToTransferExecutionToSuccessor` instead of the current, wrong, heuristic of "does this BB dominate all the exit blocks".

Due to the nature of `llvm::isGuaranteedToTransferExecutionToSuccessor`, I suggest that it's logic be factored out so that you can cache some of the intermediate results.

================
Comment at: test/Transforms/LoopIdiom/unwind.ll:4-50
@@ +3,49 @@
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTIi = external constant i8*
+ at ff = global void ()* @f, align 8
+ at gg = global void (i8*, i32)* @g, align 8
+
+define void @f() {
+entry:
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4) #5
+  %0 = bitcast i8* %exception to i32*
+  store i32 1, i32* %0, align 16
+  tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+define void @g(i8* noalias nocapture %base, i32 %size) {
+entry:
+  %cmp4 = icmp eq i32 %size, 0
+  br i1 %cmp4, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %0 = load void ()*, void ()** @ff, align 8
+  tail call void %0()
+  %arrayidx = getelementptr inbounds i8, i8* %base, i64 %indvars.iv
+  store i8 0, i8* %arrayidx, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %size
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+; CHECK-LABEL: @g(
+; CHECK-NOT: llvm.memset
+}
+
----------------
There is no need to provide the definition of `f`. You could use my reduced testcase, I'm pretty sure it is minimal.

Repository:
  rL LLVM

http://reviews.llvm.org/D20638