[llvm] f24175f - Autogen some tests for ease of update
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 22 11:06:39 PDT 2021
Author: Philip Reames
Date: 2021-03-22T11:06:29-07:00
New Revision: f24175fcb9cebf03166658841a88d1a919274623
URL: https://github.com/llvm/llvm-project/commit/f24175fcb9cebf03166658841a88d1a919274623
DIFF: https://github.com/llvm/llvm-project/commit/f24175fcb9cebf03166658841a88d1a919274623.diff
LOG: Autogen some tests for ease of update
Added:
Modified:
llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll
llvm/test/CodeGen/X86/licm-dominance.ll
llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
llvm/test/Transforms/GVN/PRE/pre-load.ll
llvm/test/Transforms/InstCombine/call-guard.ll
llvm/test/Transforms/JumpThreading/guards.ll
llvm/test/Transforms/LICM/hoist-deref-load.ll
llvm/test/Transforms/TailCallElim/reorder_load.ll
Removed:
################################################################################
diff --git a/llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll b/llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll
index 18ee280dbe3d..23b4614fca81 100644
--- a/llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll
+++ b/llvm/test/Analysis/ValueTracking/deref-bitcast-of-gep.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -licm < %s | FileCheck %s
; Note: the !invariant.load is there just solely to let us call @use()
@@ -10,12 +11,16 @@ declare void @use(i32)
define void @f_0(i8* align 4 dereferenceable(1024) %ptr) {
; CHECK-LABEL: @f_0(
-; CHECK: entry:
-; CHECK: %val = load i32, i32* %ptr.i32
-; CHECK: br label %loop
-; CHECK: loop:
-; CHECK: call void @use(i32 0)
-; CHECK-NEXT: call void @use(i32 %val)
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTR_GEP:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 32
+; CHECK-NEXT: [[PTR_I32:%.*]] = bitcast i8* [[PTR_GEP]] to i32*
+; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR_I32]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: call void @use(i32 0)
+; CHECK-NEXT: call void @use(i32 [[VAL]])
+; CHECK-NEXT: br label [[LOOP]]
+;
entry:
@@ -32,18 +37,27 @@ loop:
define void @f_1(i8* align 4 dereferenceable_or_null(1024) %ptr) {
; CHECK-LABEL: @f_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTR_GEP:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 32
+; CHECK-NEXT: [[PTR_I32:%.*]] = bitcast i8* [[PTR_GEP]] to i32*
+; CHECK-NEXT: [[PTR_IS_NULL:%.*]] = icmp eq i8* [[PTR]], null
+; CHECK-NEXT: br i1 [[PTR_IS_NULL]], label [[LEAVE:%.*]], label [[LOOP_PREHEADER:%.*]]
+; CHECK: loop.preheader:
+; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR_I32]], align 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: call void @use(i32 0)
+; CHECK-NEXT: call void @use(i32 [[VAL]])
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: leave:
+; CHECK-NEXT: ret void
+;
entry:
%ptr.gep = getelementptr i8, i8* %ptr, i32 32
%ptr.i32 = bitcast i8* %ptr.gep to i32*
%ptr_is_null = icmp eq i8* %ptr, null
br i1 %ptr_is_null, label %leave, label %loop
-; CHECK: loop.preheader:
-; CHECK: %val = load i32, i32* %ptr.i32
-; CHECK: br label %loop
-; CHECK: loop:
-; CHECK: call void @use(i32 0)
-; CHECK-NEXT: call void @use(i32 %val)
loop:
call void @use(i32 0)
@@ -57,10 +71,21 @@ leave:
define void @f_2(i8* align 4 dereferenceable_or_null(1024) %ptr) {
; CHECK-LABEL: @f_2(
-; CHECK-NOT: load
-; CHECK: call void @use(i32 0)
-; CHECK-NEXT: %val = load i32, i32* %ptr.i32, align 4, !invariant.load !0
-; CHECK-NEXT: call void @use(i32 %val)
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTR_GEP:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 30
+; CHECK-NEXT: [[PTR_I32:%.*]] = bitcast i8* [[PTR_GEP]] to i32*
+; CHECK-NEXT: [[PTR_IS_NULL:%.*]] = icmp eq i8* [[PTR]], null
+; CHECK-NEXT: br i1 [[PTR_IS_NULL]], label [[LEAVE:%.*]], label [[LOOP_PREHEADER:%.*]]
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: call void @use(i32 0)
+; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[PTR_I32]], align 4, !invariant.load !0
+; CHECK-NEXT: call void @use(i32 [[VAL]])
+; CHECK-NEXT: br label [[LOOP]]
+; CHECK: leave:
+; CHECK-NEXT: ret void
+;
entry:
;; Can't hoist, since the alignment does not work out -- (<4 byte
@@ -83,13 +108,15 @@ leave:
define void @checkLaunder(i8* align 4 dereferenceable(1024) %p) {
; CHECK-LABEL: @checkLaunder(
-; CHECK: entry:
-; CHECK: %l = call i8* @llvm.launder.invariant.group.p0i8(i8* %p)
-; CHECK: %val = load i8, i8* %l
-; CHECK: br label %loop
-; CHECK: loop:
-; CHECK: call void @use(i32 0)
-; CHECK-NEXT: call void @use8(i8 %val)
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P:%.*]])
+; CHECK-NEXT: [[VAL:%.*]] = load i8, i8* [[L]], align 1
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: call void @use(i32 0)
+; CHECK-NEXT: call void @use8(i8 [[VAL]])
+; CHECK-NEXT: br label [[LOOP]]
+;
entry:
%l = call i8* @llvm.launder.invariant.group.p0i8(i8* %p)
diff --git a/llvm/test/CodeGen/X86/licm-dominance.ll b/llvm/test/CodeGen/X86/licm-dominance.ll
index f6f563c9bcb6..0bcf413cdf16 100644
--- a/llvm/test/CodeGen/X86/licm-dominance.ll
+++ b/llvm/test/CodeGen/X86/licm-dominance.ll
@@ -1,24 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
; RUN: llc -asm-verbose=true < %s | FileCheck %s
; MachineLICM should check dominance before hoisting instructions.
; only the load of a0 is guaranteed to execute, so only it can be hoisted.
-; CHECK: movb (%rdi), [[a0reg:%[a-z0-9]+]]
-; CHECK: ## %for.body.i
-; CHECK: testb [[a0reg]], [[a0reg]]
-; CHECK: ## in Loop:
-; CHECK: cmpb $1, ({{%[a-z0-9]+}})
-; CHECK: cmpb $2, ({{%[a-z0-9]+}})
-; CHECK: cmpb $3, ({{%[a-z0-9]+}})
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.2"
define void @CMSColorWorldCreateParametricData(
- i8* dereferenceable(1) %a0,
- i8* dereferenceable(1) %a1,
- i8* dereferenceable(1) %a2,
- i8* dereferenceable(1) %a3,
- i64 %count) nounwind uwtable optsize ssp readonly {
+; CHECK-LABEL: CMSColorWorldCreateParametricData:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: movb (%rdi), %dil
+; CHECK-NEXT: LBB0_1: ## %for.body.i
+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: je LBB0_5
+; CHECK-NEXT: ## %bb.2: ## %if.then26.i
+; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: cmpb $1, (%rsi)
+; CHECK-NEXT: jne LBB0_4
+; CHECK-NEXT: ## %bb.3: ## %if.else.i.i
+; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: cmpb $2, (%rdx)
+; CHECK-NEXT: jne LBB0_5
+; CHECK-NEXT: LBB0_4: ## %lor.lhs.false.i.i
+; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: cmpb $3, (%rcx)
+; CHECK-NEXT: jne LBB0_6
+; CHECK-NEXT: LBB0_5: ## %for.inc.i
+; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: incq %rax
+; CHECK-NEXT: cmpq %r8, %rax
+; CHECK-NEXT: jb LBB0_1
+; CHECK-NEXT: LBB0_6: ## %if.end28.i
+; CHECK-NEXT: retq
+ i8* dereferenceable(1) %a0,
+ i8* dereferenceable(1) %a1,
+ i8* dereferenceable(1) %a2,
+ i8* dereferenceable(1) %a3,
+ i64 %count) nounwind uwtable optsize ssp readonly {
entry:
br label %for.body.i
diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
index 6d16587ed152..d4cbda17b51d 100644
--- a/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
+++ b/llvm/test/Transforms/GVN/PRE/load-pre-licm.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -basic-aa -gvn < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "i386-apple-darwin11.0.0"
@@ -5,14 +6,34 @@ target triple = "i386-apple-darwin11.0.0"
@sortlist = external global [5001 x i32], align 4
define void @Bubble() nounwind noinline {
-; CHECK: entry:
-; CHECK-NEXT: %tmp7.pre = load i32
+; CHECK-LABEL: @Bubble(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP7_PRE:%.*]] = load i32, i32* getelementptr inbounds ([5001 x i32], [5001 x i32]* @sortlist, i32 0, i32 1), align 4
+; CHECK-NEXT: br label [[WHILE_BODY5:%.*]]
+; CHECK: while.body5:
+; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP7_PRE]], [[ENTRY:%.*]] ], [ [[TMP71:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP6:%.*]], [[IF_END]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDVAR]], 2
+; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr [5001 x i32], [5001 x i32]* @sortlist, i32 0, i32 [[TMP5]]
+; CHECK-NEXT: [[TMP6]] = add i32 [[INDVAR]], 1
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr [5001 x i32], [5001 x i32]* @sortlist, i32 0, i32 [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP7]], [[TMP10]]
+; CHECK-NEXT: br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[IF_END]]
+; CHECK: if.then:
+; CHECK-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX9]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[TMP71]] = phi i32 [ [[TMP7]], [[IF_THEN]] ], [ [[TMP10]], [[WHILE_BODY5]] ]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP6]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY5]]
+; CHECK: while.end.loopexit:
+; CHECK-NEXT: ret void
+;
entry:
br label %while.body5
-; CHECK: while.body5:
-; CHECK: %tmp7 = phi i32
-; CHECK-NOT: %tmp7 = load i32
while.body5:
%indvar = phi i32 [ 0, %entry ], [ %tmp6, %if.end ]
%tmp5 = add i32 %indvar, 2
@@ -24,7 +45,6 @@ while.body5:
%cmp11 = icmp sgt i32 %tmp7, %tmp10
br i1 %cmp11, label %if.then, label %if.end
-; CHECK: if.then:
if.then:
store i32 %tmp10, i32* %arrayidx, align 4
store i32 %tmp7, i32* %arrayidx9, align 4
@@ -43,14 +63,18 @@ declare void @clobber()
; This is a classic LICM case
define i32 @test1(i1 %cnd, i32* %p) {
-; CHECK-LABEL: @test1
-entry:
-; CHECK-LABEL: entry
-; CHECK-NEXT: %v1.pre = load i32, i32* %p
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V1_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: call void @hold(i32 [[V1_PRE]])
+; CHECK-NEXT: br label [[HEADER]]
+;
+entry:
br label %header
header:
-; CHECK-LABEL: header
%v1 = load i32, i32* %p
call void @hold(i32 %v1)
br label %header
@@ -62,14 +86,24 @@ header:
; the value is fully available across the backedge, we only need to establish
; anticipation for the preheader block (which is trivial in this case.)
define i32 @test2(i1 %cnd, i32* %p) {
-; CHECK-LABEL: @test2
-entry:
-; CHECK-LABEL: entry
-; CHECK-NEXT: %v1.pre = load i32, i32* %p
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V1_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: call void @hold(i32 [[V1_PRE]])
+; CHECK-NEXT: br i1 [[CND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: merge:
+; CHECK-NEXT: br label [[HEADER]]
+;
+entry:
br label %header
header:
-; CHECK-LABEL: header
%v1 = load i32, i32* %p
call void @hold(i32 %v1)
br i1 %cnd, label %bb1, label %bb2
@@ -90,9 +124,21 @@ merge:
; case could be solved through either a backwards anticipation walk or
; use of the "safe to speculate" status (if we annotate the param)
define i32 @test3(i1 %cnd, i32* %p) {
-entry:
-; CHECK-LABEL: @test3
-; CHECK-LABEL: entry
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: br i1 [[CND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: merge:
+; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: call void @hold(i32 [[V1]])
+; CHECK-NEXT: br label [[HEADER]]
+;
+entry:
br label %header
header:
@@ -105,8 +151,6 @@ bb2:
br label %merge
merge:
-; CHECK-LABEL: merge
-; CHECK: load i32, i32* %p
%v1 = load i32, i32* %p
call void @hold(i32 %v1)
br label %header
@@ -115,15 +159,28 @@ merge:
; Highlight that we can PRE into a latch block when there are multiple
; latches only one of which clobbers an otherwise invariant value.
define i32 @test4(i1 %cnd, i32* %p) {
-; CHECK-LABEL: @test4
-entry:
-; CHECK-LABEL: entry
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: call void @hold(i32 [[V1]])
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[V2:%.*]] = phi i32 [ [[V2_PRE:%.*]], [[BB2:%.*]] ], [ [[V2]], [[BB1:%.*]] ], [ [[V1]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: call void @hold(i32 [[V2]])
+; CHECK-NEXT: br i1 [[CND:%.*]], label [[BB1]], label [[BB2]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[HEADER]]
+; CHECK: bb2:
+; CHECK-NEXT: call void @clobber()
+; CHECK-NEXT: [[V2_PRE]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT: br label [[HEADER]]
+;
+entry:
%v1 = load i32, i32* %p
call void @hold(i32 %v1)
br label %header
header:
-; CHECK-LABEL: header
%v2 = load i32, i32* %p
call void @hold(i32 %v2)
br i1 %cnd, label %bb1, label %bb2
@@ -132,10 +189,6 @@ bb1:
br label %header
bb2:
-; CHECK-LABEL: bb2
-; CHECK: call void @clobber()
-; CHECK-NEXT: %v2.pre = load i32, i32* %p
-; CHECK-NEXT: br label %header
call void @clobber()
br label %header
@@ -145,15 +198,31 @@ bb2:
; even in loop simplify form (though multiple applications of the same
; transformation).
define i32 @test5(i1 %cnd, i32* %p) {
-; CHECK-LABEL: @test5
-entry:
-; CHECK-LABEL: entry
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: call void @hold(i32 [[V1]])
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: [[V2_PRE2:%.*]] = phi i32 [ [[V2_PRE:%.*]], [[MERGE:%.*]] ], [ [[V1]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: call void @hold(i32 [[V2_PRE2]])
+; CHECK-NEXT: br i1 [[CND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: bb2:
+; CHECK-NEXT: call void @clobber()
+; CHECK-NEXT: [[V2_PRE_PRE:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: merge:
+; CHECK-NEXT: [[V2_PRE]] = phi i32 [ [[V2_PRE_PRE]], [[BB2]] ], [ [[V2_PRE2]], [[BB1]] ]
+; CHECK-NEXT: br label [[HEADER]]
+;
+entry:
%v1 = load i32, i32* %p
call void @hold(i32 %v1)
br label %header
header:
-; CHECK-LABEL: header
%v2 = load i32, i32* %p
call void @hold(i32 %v2)
br i1 %cnd, label %bb1, label %bb2
@@ -162,10 +231,6 @@ bb1:
br label %merge
bb2:
-; CHECK-LABEL: bb2
-; CHECK: call void @clobber()
-; CHECK-NEXT: %v2.pre.pre = load i32, i32* %p
-; CHECK-NEXT: br label %merge
call void @clobber()
br label %merge
@@ -179,13 +244,19 @@ declare void @llvm.experimental.guard(i1 %cnd, ...)
; These two tests highlight speculation safety when we can not establish
; anticipation (since the original load might actually not execcute)
define i32 @test6a(i1 %cnd, i32* %p) {
-entry:
-; CHECK-LABEL: @test6a
+; CHECK-LABEL: @test6a(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CND:%.*]]) [ "deopt"() ]
+; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: call void @hold(i32 [[V1]])
+; CHECK-NEXT: br label [[HEADER]]
+;
+entry:
br label %header
header:
-; CHECK-LABEL: header
-; CHECK: load i32, i32* %p
call void (i1, ...) @llvm.experimental.guard(i1 %cnd) ["deopt"()]
%v1 = load i32, i32* %p
call void @hold(i32 %v1)
@@ -193,13 +264,19 @@ header:
}
define i32 @test6b(i1 %cnd, i32* dereferenceable(8) align 4 %p) {
-entry:
-; CHECK-LABEL: @test6b
-; CHECK: load i32, i32* %p
+; CHECK-LABEL: @test6b(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V1_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: br label [[HEADER:%.*]]
+; CHECK: header:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CND:%.*]]) [ "deopt"() ]
+; CHECK-NEXT: call void @hold(i32 [[V1_PRE]])
+; CHECK-NEXT: br label [[HEADER]]
+;
+entry:
br label %header
header:
-; CHECK-LABEL: header
call void (i1, ...) @llvm.experimental.guard(i1 %cnd) ["deopt"()]
%v1 = load i32, i32* %p
call void @hold(i32 %v1)
diff --git a/llvm/test/Transforms/GVN/PRE/pre-load.ll b/llvm/test/Transforms/GVN/PRE/pre-load.ll
index 831086d0f68a..161053d4fd85 100644
--- a/llvm/test/Transforms/GVN/PRE/pre-load.ll
+++ b/llvm/test/Transforms/GVN/PRE/pre-load.ll
@@ -1,16 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s
; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn<load-pre>" -enable-load-pre=false -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
define i32 @test1(i32* %p, i1 %C) {
; CHECK-LABEL: @test1(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
+; CHECK: block2:
+; CHECK-NEXT: [[PRE_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: br label [[BLOCK4:%.*]]
+; CHECK: block3:
+; CHECK-NEXT: store i32 0, i32* [[P]], align 4
+; CHECK-NEXT: br label [[BLOCK4]]
+; CHECK: block4:
+; CHECK-NEXT: [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
+; CHECK-NEXT: ret i32 [[PRE]]
+;
block1:
- br i1 %C, label %block2, label %block3
+ br i1 %C, label %block2, label %block3
block2:
- br label %block4
-; CHECK: block2:
-; CHECK-NEXT: load i32, i32* %p
+ br label %block4
block3:
store i32 0, i32* %p
@@ -19,21 +30,29 @@ block3:
block4:
%PRE = load i32, i32* %p
ret i32 %PRE
-; CHECK: block4:
-; CHECK-NEXT: phi i32
-; CHECK-NEXT: ret i32
}
; This is a simple phi translation case.
define i32 @test2(i32* %p, i32* %q, i1 %C) {
; CHECK-LABEL: @test2(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
+; CHECK: block2:
+; CHECK-NEXT: [[PRE_PRE:%.*]] = load i32, i32* [[Q:%.*]], align 4
+; CHECK-NEXT: br label [[BLOCK4:%.*]]
+; CHECK: block3:
+; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
+; CHECK-NEXT: br label [[BLOCK4]]
+; CHECK: block4:
+; CHECK-NEXT: [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
+; CHECK-NEXT: [[P2:%.*]] = phi i32* [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
+; CHECK-NEXT: ret i32 [[PRE]]
+;
block1:
- br i1 %C, label %block2, label %block3
+ br i1 %C, label %block2, label %block3
block2:
- br label %block4
-; CHECK: block2:
-; CHECK-NEXT: load i32, i32* %q
+ br label %block4
block3:
store i32 0, i32* %p
@@ -43,24 +62,35 @@ block4:
%P2 = phi i32* [%p, %block3], [%q, %block2]
%PRE = load i32, i32* %P2
ret i32 %PRE
-; CHECK: block4:
-; CHECK-NEXT: phi i32 [
-; CHECK-NOT: load
-; CHECK: ret i32
}
; This is a PRE case that requires phi translation through a GEP.
define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
; CHECK-LABEL: @test3(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, i32* [[Q:%.*]], i32 1
+; CHECK-NEXT: store i32* [[B]], i32** [[HACK:%.*]], align 8
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
+; CHECK: block2:
+; CHECK-NEXT: [[PRE_PRE:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT: br label [[BLOCK4:%.*]]
+; CHECK: block3:
+; CHECK-NEXT: [[A:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 1
+; CHECK-NEXT: store i32 0, i32* [[A]], align 4
+; CHECK-NEXT: br label [[BLOCK4]]
+; CHECK: block4:
+; CHECK-NEXT: [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
+; CHECK-NEXT: [[P2:%.*]] = phi i32* [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
+; CHECK-NEXT: [[P3:%.*]] = getelementptr i32, i32* [[P2]], i32 1
+; CHECK-NEXT: ret i32 [[PRE]]
+;
block1:
%B = getelementptr i32, i32* %q, i32 1
store i32* %B, i32** %Hack
- br i1 %C, label %block2, label %block3
+ br i1 %C, label %block2, label %block3
block2:
- br label %block4
-; CHECK: block2:
-; CHECK-NEXT: load i32, i32* %B
+ br label %block4
block3:
%A = getelementptr i32, i32* %p, i32 1
@@ -72,24 +102,35 @@ block4:
%P3 = getelementptr i32, i32* %P2, i32 1
%PRE = load i32, i32* %P3
ret i32 %PRE
-; CHECK: block4:
-; CHECK: phi i32 [
-; CHECK-NOT: load
-; CHECK: ret i32
}
;; Here the loaded address is available, but the computation is in 'block3'
;; which does not dominate 'block2'.
define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
; CHECK-LABEL: @test4(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
+; CHECK: block2:
+; CHECK-NEXT: [[P3_PHI_TRANS_INSERT:%.*]] = getelementptr i32, i32* [[Q:%.*]], i32 1
+; CHECK-NEXT: [[PRE_PRE:%.*]] = load i32, i32* [[P3_PHI_TRANS_INSERT]], align 4
+; CHECK-NEXT: br label [[BLOCK4:%.*]]
+; CHECK: block3:
+; CHECK-NEXT: [[B:%.*]] = getelementptr i32, i32* [[Q]], i32 1
+; CHECK-NEXT: store i32* [[B]], i32** [[HACK:%.*]], align 8
+; CHECK-NEXT: [[A:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 1
+; CHECK-NEXT: store i32 0, i32* [[A]], align 4
+; CHECK-NEXT: br label [[BLOCK4]]
+; CHECK: block4:
+; CHECK-NEXT: [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
+; CHECK-NEXT: [[P2:%.*]] = phi i32* [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
+; CHECK-NEXT: [[P3:%.*]] = getelementptr i32, i32* [[P2]], i32 1
+; CHECK-NEXT: ret i32 [[PRE]]
+;
block1:
- br i1 %C, label %block2, label %block3
+ br i1 %C, label %block2, label %block3
block2:
- br label %block4
-; CHECK: block2:
-; CHECK: load i32, i32*
-; CHECK: br label %block4
+ br label %block4
block3:
%B = getelementptr i32, i32* %q, i32 1
@@ -104,10 +145,6 @@ block4:
%P3 = getelementptr i32, i32* %P2, i32 1
%PRE = load i32, i32* %P3
ret i32 %PRE
-; CHECK: block4:
-; CHECK: phi i32 [
-; CHECK-NOT: load
-; CHECK: ret i32
}
;void test5(int N, double *G) {
@@ -118,38 +155,53 @@ block4:
define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
; CHECK-LABEL: @test5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
+; CHECK: bb.nph:
+; CHECK-NEXT: [[TMP:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[DOTPRE:%.*]] = load double, double* [[G:%.*]], align 8
+; CHECK-NEXT: br label [[BB:%.*]]
+; CHECK: bb:
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[DOTPRE]], [[BB_NPH]] ], [ [[TMP3:%.*]], [[BB]] ]
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP6:%.*]], [[BB]] ]
+; CHECK-NEXT: [[TMP6]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[G]], i64 [[TMP6]]
+; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[G]], i64 [[INDVAR]]
+; CHECK-NEXT: [[TMP3]] = load double, double* [[SCEVGEP]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT: store double [[TMP4]], double* [[SCEVGEP7]], align 8
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP6]], [[TMP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
+; CHECK: return:
+; CHECK-NEXT: ret void
+;
entry:
- %0 = add i32 %N, -1
- %1 = icmp sgt i32 %0, 0
+ %0 = add i32 %N, -1
+ %1 = icmp sgt i32 %0, 0
br i1 %1, label %bb.nph, label %return
-bb.nph:
- %tmp = zext i32 %0 to i64
+bb.nph:
+ %tmp = zext i32 %0 to i64
br label %bb
-; CHECK: bb.nph:
-; CHECK: load double, double*
-; CHECK: br label %bb
-bb:
+bb:
%indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
- %tmp6 = add i64 %indvar, 1
+ %tmp6 = add i64 %indvar, 1
%scevgep = getelementptr double, double* %G, i64 %tmp6
%scevgep7 = getelementptr double, double* %G, i64 %indvar
%2 = load double, double* %scevgep7, align 8
- %3 = load double, double* %scevgep, align 8
- %4 = fadd double %2, %3
+ %3 = load double, double* %scevgep, align 8
+ %4 = fadd double %2, %3
store double %4, double* %scevgep7, align 8
- %exitcond = icmp eq i64 %tmp6, %tmp
+ %exitcond = icmp eq i64 %tmp6, %tmp
br i1 %exitcond, label %return, label %bb
; Should only be one load in the loop.
-; CHECK: bb:
-; CHECK: load double, double*
-; CHECK-NOT: load double, double*
-; CHECK: br i1 %exitcond
-return:
+return:
ret void
}
@@ -161,38 +213,53 @@ return:
define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
; CHECK-LABEL: @test6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
+; CHECK: bb.nph:
+; CHECK-NEXT: [[TMP:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[DOTPRE:%.*]] = load double, double* [[G:%.*]], align 8
+; CHECK-NEXT: br label [[BB:%.*]]
+; CHECK: bb:
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[DOTPRE]], [[BB_NPH]] ], [ [[TMP4:%.*]], [[BB]] ]
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP6:%.*]], [[BB]] ]
+; CHECK-NEXT: [[TMP6]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[G]], i64 [[TMP6]]
+; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr double, double* [[G]], i64 [[INDVAR]]
+; CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[SCEVGEP]], align 8
+; CHECK-NEXT: [[TMP4]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT: store double [[TMP4]], double* [[SCEVGEP]], align 8
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP6]], [[TMP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
+; CHECK: return:
+; CHECK-NEXT: ret void
+;
entry:
- %0 = add i32 %N, -1
- %1 = icmp sgt i32 %0, 0
+ %0 = add i32 %N, -1
+ %1 = icmp sgt i32 %0, 0
br i1 %1, label %bb.nph, label %return
-bb.nph:
- %tmp = zext i32 %0 to i64
+bb.nph:
+ %tmp = zext i32 %0 to i64
br label %bb
-; CHECK: bb.nph:
-; CHECK: load double, double*
-; CHECK: br label %bb
-bb:
+bb:
%indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
- %tmp6 = add i64 %indvar, 1
+ %tmp6 = add i64 %indvar, 1
%scevgep = getelementptr double, double* %G, i64 %tmp6
%scevgep7 = getelementptr double, double* %G, i64 %indvar
%2 = load double, double* %scevgep7, align 8
- %3 = load double, double* %scevgep, align 8
- %4 = fadd double %2, %3
+ %3 = load double, double* %scevgep, align 8
+ %4 = fadd double %2, %3
store double %4, double* %scevgep, align 8
- %exitcond = icmp eq i64 %tmp6, %tmp
+ %exitcond = icmp eq i64 %tmp6, %tmp
br i1 %exitcond, label %return, label %bb
; Should only be one load in the loop.
-; CHECK: bb:
-; CHECK: load double, double*
-; CHECK-NOT: load double, double*
-; CHECK: br i1 %exitcond
-return:
+return:
ret void
}
@@ -205,38 +272,60 @@ return:
; This requires phi translation of the adds.
define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, double* [[G:%.*]], i64 1
+; CHECK-NEXT: store double 1.000000e+00, double* [[TMP0]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 1
+; CHECK-NEXT: br i1 [[TMP2]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
+; CHECK: bb.nph:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP]], -1
+; CHECK-NEXT: br label [[BB:%.*]]
+; CHECK: bb:
+; CHECK-NEXT: [[TMP3:%.*]] = phi double [ 1.000000e+00, [[BB_NPH]] ], [ [[TMP5:%.*]], [[BB]] ]
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP9:%.*]], [[BB]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDVAR]], 2
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[G]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP9]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr double, double* [[G]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[SCEVGEP]], align 8
+; CHECK-NEXT: [[TMP5]] = fadd double [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store double [[TMP5]], double* [[SCEVGEP]], align 8
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP9]], [[TMP7]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
+; CHECK: return:
+; CHECK-NEXT: ret void
+;
entry:
- %0 = getelementptr inbounds double, double* %G, i64 1
+ %0 = getelementptr inbounds double, double* %G, i64 1
store double 1.000000e+00, double* %0, align 8
- %1 = add i32 %N, -1
- %2 = icmp sgt i32 %1, 1
+ %1 = add i32 %N, -1
+ %2 = icmp sgt i32 %1, 1
br i1 %2, label %bb.nph, label %return
-bb.nph:
- %tmp = sext i32 %1 to i64
- %tmp7 = add i64 %tmp, -1
+bb.nph:
+ %tmp = sext i32 %1 to i64
+ %tmp7 = add i64 %tmp, -1
br label %bb
-bb:
- %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
- %tmp8 = add i64 %indvar, 2
- %scevgep = getelementptr double, double* %G, i64 %tmp8
- %tmp9 = add i64 %indvar, 1
- %scevgep10 = getelementptr double, double* %G, i64 %tmp9
- %3 = load double, double* %scevgep10, align 8
- %4 = load double, double* %scevgep, align 8
- %5 = fadd double %3, %4
+bb:
+ %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
+ %tmp8 = add i64 %indvar, 2
+ %scevgep = getelementptr double, double* %G, i64 %tmp8
+ %tmp9 = add i64 %indvar, 1
+ %scevgep10 = getelementptr double, double* %G, i64 %tmp9
+ %3 = load double, double* %scevgep10, align 8
+ %4 = load double, double* %scevgep, align 8
+ %5 = fadd double %3, %4
store double %5, double* %scevgep, align 8
- %exitcond = icmp eq i64 %tmp9, %tmp7
+ %exitcond = icmp eq i64 %tmp9, %tmp7
br i1 %exitcond, label %return, label %bb
; Should only be one load in the loop.
-; CHECK: bb:
-; CHECK: load double, double*
-; CHECK-NOT: load double, double*
-; CHECK: br i1 %exitcond
-return:
+return:
ret void
}
@@ -244,14 +333,27 @@ return:
;; GEP to be inserted into it.
define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
; CHECK-LABEL: @test8(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
+; CHECK: block2:
+; CHECK-NEXT: [[P3_PHI_TRANS_INSERT:%.*]] = getelementptr i32, i32* [[Q:%.*]], i32 1
+; CHECK-NEXT: [[PRE_PRE:%.*]] = load i32, i32* [[P3_PHI_TRANS_INSERT]], align 4
+; CHECK-NEXT: br label [[BLOCK4:%.*]]
+; CHECK: block3:
+; CHECK-NEXT: [[A:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 1
+; CHECK-NEXT: store i32 0, i32* [[A]], align 4
+; CHECK-NEXT: br label [[BLOCK4]]
+; CHECK: block4:
+; CHECK-NEXT: [[PRE:%.*]] = phi i32 [ 0, [[BLOCK3]] ], [ [[PRE_PRE]], [[BLOCK2]] ]
+; CHECK-NEXT: [[P2:%.*]] = phi i32* [ [[P]], [[BLOCK3]] ], [ [[Q]], [[BLOCK2]] ]
+; CHECK-NEXT: [[P3:%.*]] = getelementptr i32, i32* [[P2]], i32 1
+; CHECK-NEXT: ret i32 [[PRE]]
+;
block1:
- br i1 %C, label %block2, label %block3
+ br i1 %C, label %block2, label %block3
block2:
- br label %block4
-; CHECK: block2:
-; CHECK: load i32, i32*
-; CHECK: br label %block4
+ br label %block4
block3:
%A = getelementptr i32, i32* %p, i32 1
@@ -263,10 +365,6 @@ block4:
%P3 = getelementptr i32, i32* %P2, i32 1
%PRE = load i32, i32* %P3
ret i32 %PRE
-; CHECK: block4:
-; CHECK: phi i32 [
-; CHECK-NOT: load
-; CHECK: ret i32
}
;void test9(int N, double* G) {
@@ -277,41 +375,60 @@ block4:
; This requires phi translation of the adds.
define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 1
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
+; CHECK: bb.nph:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP]], -1
+; CHECK-NEXT: [[SCEVGEP10_PHI_TRANS_INSERT:%.*]] = getelementptr double, double* [[G:%.*]], i64 1
+; CHECK-NEXT: [[DOTPRE:%.*]] = load double, double* [[SCEVGEP10_PHI_TRANS_INSERT]], align 8
+; CHECK-NEXT: br label [[BB:%.*]]
+; CHECK: bb:
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[DOTPRE]], [[BB_NPH]] ], [ [[TMP4:%.*]], [[BB]] ]
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP9:%.*]], [[BB]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDVAR]], 2
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[G]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP9]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr double, double* [[G]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[SCEVGEP]], align 8
+; CHECK-NEXT: [[TMP4]] = fadd double [[TMP2]], [[TMP3]]
+; CHECK-NEXT: store double [[TMP4]], double* [[SCEVGEP]], align 8
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP9]], [[TMP7]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
+; CHECK: return:
+; CHECK-NEXT: ret void
+;
entry:
add i32 0, 0
- %1 = add i32 %N, -1
- %2 = icmp sgt i32 %1, 1
+ %1 = add i32 %N, -1
+ %2 = icmp sgt i32 %1, 1
br i1 %2, label %bb.nph, label %return
-bb.nph:
- %tmp = sext i32 %1 to i64
- %tmp7 = add i64 %tmp, -1
+bb.nph:
+ %tmp = sext i32 %1 to i64
+ %tmp7 = add i64 %tmp, -1
br label %bb
-; CHECK: bb.nph:
-; CHECK: load double, double*
-; CHECK: br label %bb
-
-bb:
- %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
- %tmp8 = add i64 %indvar, 2
- %scevgep = getelementptr double, double* %G, i64 %tmp8
- %tmp9 = add i64 %indvar, 1
- %scevgep10 = getelementptr double, double* %G, i64 %tmp9
- %3 = load double, double* %scevgep10, align 8
- %4 = load double, double* %scevgep, align 8
- %5 = fadd double %3, %4
+
+bb:
+ %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
+ %tmp8 = add i64 %indvar, 2
+ %scevgep = getelementptr double, double* %G, i64 %tmp8
+ %tmp9 = add i64 %indvar, 1
+ %scevgep10 = getelementptr double, double* %G, i64 %tmp9
+ %3 = load double, double* %scevgep10, align 8
+ %4 = load double, double* %scevgep, align 8
+ %5 = fadd double %3, %4
store double %5, double* %scevgep, align 8
- %exitcond = icmp eq i64 %tmp9, %tmp7
+ %exitcond = icmp eq i64 %tmp9, %tmp7
br i1 %exitcond, label %return, label %bb
; Should only be one load in the loop.
-; CHECK: bb:
-; CHECK: load double, double*
-; CHECK-NOT: load double, double*
-; CHECK: br i1 %exitcond
-return:
+return:
ret void
}
@@ -323,6 +440,36 @@ return:
; PR5501
define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 1
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
+; CHECK: bb.nph:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP]], -1
+; CHECK-NEXT: [[SCEVGEP12_PHI_TRANS_INSERT:%.*]] = getelementptr double, double* [[G:%.*]], i64 1
+; CHECK-NEXT: [[DOTPRE:%.*]] = load double, double* [[SCEVGEP12_PHI_TRANS_INSERT]], align 8
+; CHECK-NEXT: [[DOTPRE1:%.*]] = load double, double* [[G]], align 8
+; CHECK-NEXT: br label [[BB:%.*]]
+; CHECK: bb:
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[DOTPRE1]], [[BB_NPH]] ], [ [[TMP6:%.*]], [[BB]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[DOTPRE]], [[BB_NPH]] ], [ [[TMP4:%.*]], [[BB]] ]
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[TMP11:%.*]], [[BB]] ]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[G]], i64 [[INDVAR]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDVAR]], 2
+; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr double, double* [[G]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP11]] = add i64 [[INDVAR]], 1
+; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr double, double* [[G]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP4]] = load double, double* [[SCEVGEP10]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6]] = fadd double [[TMP5]], [[TMP2]]
+; CHECK-NEXT: store double [[TMP6]], double* [[SCEVGEP12]], align 8
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP11]], [[TMP8]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
+; CHECK: return:
+; CHECK-NEXT: ret void
+;
entry:
%0 = add i32 %N, -1
%1 = icmp sgt i32 %0, 1
@@ -332,10 +479,6 @@ bb.nph:
%tmp = sext i32 %0 to i64
%tmp8 = add i64 %tmp, -1
br label %bb
-; CHECK: bb.nph:
-; CHECK: load double, double*
-; CHECK: load double, double*
-; CHECK: br label %bb
bb:
@@ -355,10 +498,6 @@ bb:
br i1 %exitcond, label %return, label %bb
; Should only be one load in the loop.
-; CHECK: bb:
-; CHECK: load double, double*
-; CHECK-NOT: load double, double*
-; CHECK: br i1 %exitcond
return:
ret void
@@ -367,14 +506,30 @@ return:
; Test critical edge splitting.
define i32 @test11(i32* %p, i1 %C, i32 %N) {
; CHECK-LABEL: @test11(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
+; CHECK: block2:
+; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[N:%.*]], 1
+; CHECK-NEXT: br i1 [[COND]], label [[BLOCK2_BLOCK4_CRIT_EDGE:%.*]], label [[BLOCK5:%.*]]
+; CHECK: block2.block4_crit_edge:
+; CHECK-NEXT: [[PRE_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: br label [[BLOCK4:%.*]]
+; CHECK: block3:
+; CHECK-NEXT: store i32 0, i32* [[P]], align 4
+; CHECK-NEXT: br label [[BLOCK4]]
+; CHECK: block4:
+; CHECK-NEXT: [[PRE:%.*]] = phi i32 [ [[PRE_PRE]], [[BLOCK2_BLOCK4_CRIT_EDGE]] ], [ 0, [[BLOCK3]] ]
+; CHECK-NEXT: br label [[BLOCK5]]
+; CHECK: block5:
+; CHECK-NEXT: [[RET:%.*]] = phi i32 [ 0, [[BLOCK2]] ], [ [[PRE]], [[BLOCK4]] ]
+; CHECK-NEXT: ret i32 [[RET]]
+;
block1:
- br i1 %C, label %block2, label %block3
+ br i1 %C, label %block2, label %block3
block2:
- %cond = icmp sgt i32 %N, 1
- br i1 %cond, label %block4, label %block5
-; CHECK: load i32, i32* %p
-; CHECK-NEXT: br label %block4
+ %cond = icmp sgt i32 %N, 1
+ br i1 %cond, label %block4, label %block5
block3:
store i32 0, i32* %p
@@ -387,8 +542,6 @@ block4:
block5:
%ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
ret i32 %ret
-; CHECK: block4:
-; CHECK-NEXT: phi i32
}
declare void @f()
@@ -398,13 +551,36 @@ declare i32 @__CxxFrameHandler3(...)
; Test that loads aren't PRE'd into EH pads.
define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
; CHECK-LABEL: @test12(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: invoke void @f()
+; CHECK-NEXT: to label [[BLOCK2:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
+; CHECK: block2:
+; CHECK-NEXT: invoke void @f()
+; CHECK-NEXT: to label [[BLOCK3:%.*]] unwind label [[CLEANUP:%.*]]
+; CHECK: block3:
+; CHECK-NEXT: ret void
+; CHECK: catch.dispatch:
+; CHECK-NEXT: [[CS1:%.*]] = catchswitch within none [label %catch] unwind label [[CLEANUP2:%.*]]
+; CHECK: catch:
+; CHECK-NEXT: [[C:%.*]] = catchpad within [[CS1]] []
+; CHECK-NEXT: catchret from [[C]] to label [[BLOCK2]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none []
+; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
+; CHECK-NEXT: cleanupret from [[C1]] unwind label [[CLEANUP2]]
+; CHECK: cleanup2:
+; CHECK-NEXT: [[C2:%.*]] = cleanuppad within none []
+; CHECK-NEXT: [[NOTPRE:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT: call void @g(i32 [[NOTPRE]])
+; CHECK-NEXT: cleanupret from [[C2]] unwind to caller
+;
block1:
invoke void @f()
- to label %block2 unwind label %catch.dispatch
+ to label %block2 unwind label %catch.dispatch
block2:
invoke void @f()
- to label %block3 unwind label %cleanup
+ to label %block3 unwind label %cleanup
block3:
ret void
@@ -421,10 +597,6 @@ cleanup:
store i32 0, i32* %p
cleanupret from %c1 unwind label %cleanup2
-; CHECK: cleanup2:
-; CHECK-NOT: phi
-; CHECK-NEXT: %c2 = cleanuppad within none []
-; CHECK-NEXT: %NOTPRE = load i32, i32* %p
cleanup2:
%c2 = cleanuppad within none []
%NOTPRE = load i32, i32* %p
@@ -435,28 +607,30 @@ cleanup2:
; Don't PRE load across potentially throwing calls.
define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
-
; CHECK-LABEL: @test13(
-; CHECK: entry:
-; CHECK-NEXT: icmp eq
-; CHECK-NEXT: br i1
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[UU:%.*]] = load i32, i32* [[X:%.*]], align 4
+; CHECK-NEXT: store i32 [[UU]], i32* [[R:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: [[VV:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT: ret i32 [[VV]]
+;
entry:
%tobool = icmp eq i32 %a, 0
br i1 %tobool, label %if.end, label %if.then
-; CHECK: if.then:
-; CHECK-NEXT: load i32
-; CHECK-NEXT: store i32
if.then:
%uu = load i32, i32* %x, align 4
store i32 %uu, i32* %r, align 4
br label %if.end
-; CHECK: if.end:
-; CHECK-NEXT: call void @f()
-; CHECK-NEXT: load i32
if.end:
call void @f()
@@ -468,28 +642,30 @@ if.end:
; block.
define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
-
; CHECK-LABEL: @test14(
-; CHECK: entry:
-; CHECK-NEXT: icmp eq
-; CHECK-NEXT: br i1
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[UU:%.*]] = load i32, i32* [[X:%.*]], align 4
+; CHECK-NEXT: store i32 [[UU]], i32* [[R:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: [[VV:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT: ret i32 [[VV]]
+;
entry:
%tobool = icmp eq i32 %a, 0
br i1 %tobool, label %if.end, label %if.then
-; CHECK: if.then:
-; CHECK-NEXT: load i32
-; CHECK-NEXT: store i32
if.then:
%uu = load i32, i32* %x, align 4
store i32 %uu, i32* %r, align 4
br label %if.end
-; CHECK: if.end:
-; CHECK-NEXT: call void @f()
-; CHECK-NEXT: load i32
if.end:
call void @f()
@@ -508,39 +684,39 @@ follow_2:
; Since it is OK to speculate, PRE is allowed.
define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) {
-
-; CHECK-LABEL: @test15
-; CHECK: entry:
-; CHECK-NEXT: icmp eq
-; CHECK-NEXT: br i1
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[ENTRY_IF_END_CRIT_EDGE:%.*]], label [[IF_THEN:%.*]]
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: [[VV_PRE:%.*]] = load i32, i32* [[X:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[UU:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT: store i32 [[UU]], i32* [[R:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[VV:%.*]] = phi i32 [ [[VV_PRE]], [[ENTRY_IF_END_CRIT_EDGE]] ], [ [[UU]], [[IF_THEN]] ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 [[VV]]
+;
entry:
%tobool = icmp eq i32 %a, 0
br i1 %tobool, label %if.end, label %if.then
-; CHECK: entry.if.end_crit_edge:
-; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
-; CHECK-NEXT: br label %if.end
if.then:
%uu = load i32, i32* %x, align 4
store i32 %uu, i32* %r, align 4
br label %if.end
-; CHECK: if.then:
-; CHECK-NEXT: %uu = load i32, i32* %x, align 4
-; CHECK-NEXT: store i32 %uu, i32* %r, align 4
-; CHECK-NEXT: br label %if.end
if.end:
call void @f()
%vv = load i32, i32* %x, align 4
ret i32 %vv
-; CHECK: if.end:
-; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
-; CHECK-NEXT: call void @f()
-; CHECK-NEXT: ret i32 %vv
}
@@ -549,38 +725,38 @@ if.end:
; Since it is OK to speculate, PRE is allowed.
define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) align 4 %x, i32* noalias nocapture %r, i32 %a) {
-
; CHECK-LABEL: @test16(
-; CHECK: entry:
-; CHECK-NEXT: icmp eq
-; CHECK-NEXT: br i1
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[ENTRY_IF_END_CRIT_EDGE:%.*]], label [[IF_THEN:%.*]]
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: [[VV_PRE:%.*]] = load i32, i32* [[X:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[UU:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT: store i32 [[UU]], i32* [[R:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[VV:%.*]] = phi i32 [ [[VV_PRE]], [[ENTRY_IF_END_CRIT_EDGE]] ], [ [[UU]], [[IF_THEN]] ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 [[VV]]
+;
entry:
%tobool = icmp eq i32 %a, 0
br i1 %tobool, label %if.end, label %if.then
-; CHECK: entry.if.end_crit_edge:
-; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
-; CHECK-NEXT: br label %if.end
if.then:
%uu = load i32, i32* %x, align 4
store i32 %uu, i32* %r, align 4
br label %if.end
-; CHECK: if.then:
-; CHECK-NEXT: %uu = load i32, i32* %x, align 4
-; CHECK-NEXT: store i32 %uu, i32* %r, align 4
-; CHECK-NEXT: br label %if.end
if.end:
call void @f()
br label %follow_1
-; CHECK: if.end:
-; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
-; CHECK-NEXT: call void @f()
-; CHECK-NEXT: ret i32 %vv
follow_1:
br label %follow_2
diff --git a/llvm/test/Transforms/InstCombine/call-guard.ll b/llvm/test/Transforms/InstCombine/call-guard.ll
index e9ff9fa68155..c219300fdaf1 100644
--- a/llvm/test/Transforms/InstCombine/call-guard.ll
+++ b/llvm/test/Transforms/InstCombine/call-guard.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s
; RUN: opt < %s -instcombine -S -debugify-each | FileCheck %s
; RUN: opt < %s -passes=instcombine -S -debugify-each | FileCheck %s
@@ -6,8 +7,9 @@ declare void @llvm.experimental.guard(i1, ...)
define void @test_guard_adjacent_same_cond(i1 %A) {
; CHECK-LABEL: @test_guard_adjacent_same_cond(
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %A) [ "deopt"() ]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[A:%.*]]) [ "deopt"() ]
; CHECK-NEXT: ret void
+;
call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
@@ -23,10 +25,11 @@ define void @test_guard_adjacent_same_cond(i1 %A) {
define void @test_guard_adjacent_
diff _cond(i1 %A, i1 %B, i1 %C) {
; CHECK-LABEL: @test_guard_adjacent_
diff _cond(
-; CHECK-NEXT: %1 = and i1 %A, %B
-; CHECK-NEXT: %2 = and i1 %1, %C
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %2, i32 123) [ "deopt"() ]
+; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[C:%.*]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 123) [ "deopt"() ]
; CHECK-NEXT: ret void
+;
call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
call void(i1, ...) @llvm.experimental.guard( i1 %C, i32 789 )[ "deopt"() ]
@@ -37,13 +40,14 @@ define void @test_guard_adjacent_
diff _cond(i1 %A, i1 %B, i1 %C) {
; between the guards
define void @test_guard_adjacent_
diff _cond2(i32 %V1, i32 %V2) {
; CHECK-LABEL: @test_guard_adjacent_
diff _cond2(
-; CHECK-NEXT: %1 = and i32 %V1, %V2
-; CHECK-NEXT: %2 = icmp slt i32 %1, 0
-; CHECK-NEXT: %and = and i32 %V1, 255
-; CHECK-NEXT: %C = icmp ult i32 %and, 129
-; CHECK-NEXT: %3 = and i1 %2, %C
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %3, i32 123) [ "deopt"() ]
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[V1:%.*]], [[V2:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[V1]], 255
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[AND]], 129
+; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[C]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 123) [ "deopt"() ]
; CHECK-NEXT: ret void
+;
%A = icmp slt i32 %V1, 0
call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
%B = icmp slt i32 %V2, 0
@@ -57,9 +61,14 @@ define void @test_guard_adjacent_
diff _cond2(i32 %V1, i32 %V2) {
; Might not be legal to hoist the load above the first guard since the
; guard might control dereferenceability
define void @negative_load(i32 %V1, i32* %P) {
-; CHECK-LABEL: @negative_load
-; CHECK: @llvm.experimental.guard
-; CHECK: @llvm.experimental.guard
+; CHECK-LABEL: @negative_load(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[V1:%.*]], 0
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[A]], i32 123) [ "deopt"() ]
+; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: [[B:%.*]] = icmp slt i32 [[V2]], 0
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[B]], i32 456) [ "deopt"() ]
+; CHECK-NEXT: ret void
+;
%A = icmp slt i32 %V1, 0
call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
%V2 = load i32, i32* %P
@@ -69,11 +78,13 @@ define void @negative_load(i32 %V1, i32* %P) {
}
define void @deref_load(i32 %V1, i32* dereferenceable(4) align 4 %P) {
-; CHECK-LABEL: @deref_load
-; CHECK-NEXT: %V2 = load i32, i32* %P, align 4
-; CHECK-NEXT: %1 = and i32 %V2, %V1
-; CHECK-NEXT: %2 = icmp slt i32 %1, 0
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %2, i32 123) [ "deopt"() ]
+; CHECK-LABEL: @deref_load(
+; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[V2]], [[V1:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 123) [ "deopt"() ]
+; CHECK-NEXT: ret void
+;
%A = icmp slt i32 %V1, 0
call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
%V2 = load i32, i32* %P
@@ -84,12 +95,17 @@ define void @deref_load(i32 %V1, i32* dereferenceable(4) align 4 %P) {
; The divide might fault above the guard
define void @negative_div(i32 %V1, i32 %D) {
-; CHECK-LABEL: @negative_div
-; CHECK: @llvm.experimental.guard
-; CHECK: @llvm.experimental.guard
+; CHECK-LABEL: @negative_div(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[V1:%.*]], 0
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[A]], i32 123) [ "deopt"() ]
+; CHECK-NEXT: [[V2:%.*]] = udiv i32 [[V1]], [[D:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp slt i32 [[V2]], 0
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[B]], i32 456) [ "deopt"() ]
+; CHECK-NEXT: ret void
+;
%A = icmp slt i32 %V1, 0
call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
- %V2 = udiv i32 %V1, %D
+ %V2 = udiv i32 %V1, %D
%B = icmp slt i32 %V2, 0
call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
ret void
@@ -97,9 +113,16 @@ define void @negative_div(i32 %V1, i32 %D) {
; Highlight the limit of the window in a case which would otherwise be mergable
define void @negative_window(i32 %V1, i32 %a, i32 %b, i32 %c, i32 %d) {
-; CHECK-LABEL: @negative_window
-; CHECK: @llvm.experimental.guard
-; CHECK: @llvm.experimental.guard
+; CHECK-LABEL: @negative_window(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i32 [[V1:%.*]], 0
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[A]], i32 123) [ "deopt"() ]
+; CHECK-NEXT: [[V2:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[V3:%.*]] = add i32 [[V2]], [[C:%.*]]
+; CHECK-NEXT: [[V4:%.*]] = add i32 [[V3]], [[D:%.*]]
+; CHECK-NEXT: [[B:%.*]] = icmp slt i32 [[V4]], 0
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[B]], i32 456) [ "deopt"() ]
+; CHECK-NEXT: ret void
+;
%A = icmp slt i32 %V1, 0
call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
%V2 = add i32 %a, %b
diff --git a/llvm/test/Transforms/JumpThreading/guards.ll b/llvm/test/Transforms/JumpThreading/guards.ll
index 911eec8dad27..f487efa045b0 100644
--- a/llvm/test/Transforms/JumpThreading/guards.ll
+++ b/llvm/test/Transforms/JumpThreading/guards.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -jump-threading -dce -S | FileCheck %s
declare void @llvm.experimental.guard(i1, ...)
@@ -7,30 +8,34 @@ declare i32 @f2()
define i32 @branch_implies_guard(i32 %a) {
; CHECK-LABEL: @branch_implies_guard(
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[A:%.*]], 10
+; CHECK-NEXT: br i1 [[COND]], label [[T1_SPLIT:%.*]], label [[F1_SPLIT:%.*]]
+; CHECK: T1.split:
+; CHECK-NEXT: [[V1:%.*]] = call i32 @f1()
+; CHECK-NEXT: [[RETVAL3:%.*]] = add i32 [[V1]], 10
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: F1.split:
+; CHECK-NEXT: [[V2:%.*]] = call i32 @f2()
+; CHECK-NEXT: [[RETVAL1:%.*]] = add i32 [[V2]], 10
+; CHECK-NEXT: [[CONDGUARD2:%.*]] = icmp slt i32 [[A]], 20
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CONDGUARD2]]) [ "deopt"() ]
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: Merge:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[RETVAL3]], [[T1_SPLIT]] ], [ [[RETVAL1]], [[F1_SPLIT]] ]
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
%cond = icmp slt i32 %a, 10
br i1 %cond, label %T1, label %F1
T1:
-; CHECK: T1.split
-; CHECK: %v1 = call i32 @f1()
-; CHECK-NEXT: %retVal
-; CHECK-NEXT: br label %Merge
%v1 = call i32 @f1()
br label %Merge
F1:
-; CHECK: F1.split
-; CHECK: %v2 = call i32 @f2()
-; CHECK-NEXT: %retVal
-; CHECK-NEXT: %condGuard
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard
-; CHECK-NEXT: br label %Merge
%v2 = call i32 @f2()
br label %Merge
Merge:
-; CHECK: Merge
-; CHECK-NOT: call void(i1, ...) @llvm.experimental.guard(
%retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ]
%retVal = add i32 %retPhi, 10
%condGuard = icmp slt i32 %a, 20
@@ -40,30 +45,34 @@ Merge:
define i32 @not_branch_implies_guard(i32 %a) {
; CHECK-LABEL: @not_branch_implies_guard(
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[A:%.*]], 20
+; CHECK-NEXT: br i1 [[COND]], label [[T1_SPLIT:%.*]], label [[F1_SPLIT:%.*]]
+; CHECK: T1.split:
+; CHECK-NEXT: [[V1:%.*]] = call i32 @f1()
+; CHECK-NEXT: [[RETVAL1:%.*]] = add i32 [[V1]], 10
+; CHECK-NEXT: [[CONDGUARD2:%.*]] = icmp sgt i32 [[A]], 10
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CONDGUARD2]]) [ "deopt"() ]
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: F1.split:
+; CHECK-NEXT: [[V2:%.*]] = call i32 @f2()
+; CHECK-NEXT: [[RETVAL3:%.*]] = add i32 [[V2]], 10
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: Merge:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[RETVAL3]], [[F1_SPLIT]] ], [ [[RETVAL1]], [[T1_SPLIT]] ]
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
%cond = icmp slt i32 %a, 20
br i1 %cond, label %T1, label %F1
T1:
-; CHECK: T1.split:
-; CHECK-NEXT: %v1 = call i32 @f1()
-; CHECK-NEXT: %retVal
-; CHECK-NEXT: %condGuard
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard
-; CHECK-NEXT: br label %Merge
%v1 = call i32 @f1()
br label %Merge
F1:
-; CHECK: F1.split:
-; CHECK-NEXT: %v2 = call i32 @f2()
-; CHECK-NEXT: %retVal
-; CHECK-NEXT: br label %Merge
%v2 = call i32 @f2()
br label %Merge
Merge:
-; CHECK: Merge
-; CHECK-NOT: call void(i1, ...) @llvm.experimental.guard(
%retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ]
%retVal = add i32 %retPhi, 10
%condGuard = icmp sgt i32 %a, 10
@@ -73,27 +82,33 @@ Merge:
define i32 @branch_overlaps_guard(i32 %a) {
; CHECK-LABEL: @branch_overlaps_guard(
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[A:%.*]], 20
+; CHECK-NEXT: br i1 [[COND]], label [[T1:%.*]], label [[F1:%.*]]
+; CHECK: T1:
+; CHECK-NEXT: [[V1:%.*]] = call i32 @f1()
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: F1:
+; CHECK-NEXT: [[V2:%.*]] = call i32 @f2()
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: Merge:
+; CHECK-NEXT: [[RETPHI:%.*]] = phi i32 [ [[V1]], [[T1]] ], [ [[V2]], [[F1]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[RETPHI]], 10
+; CHECK-NEXT: [[CONDGUARD:%.*]] = icmp slt i32 [[A]], 10
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CONDGUARD]]) [ "deopt"() ]
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
%cond = icmp slt i32 %a, 20
br i1 %cond, label %T1, label %F1
T1:
-; CHECK: T1:
-; CHECK-NEXT: %v1 = call i32 @f1()
-; CHECK-NEXT: br label %Merge
%v1 = call i32 @f1()
br label %Merge
F1:
-; CHECK: F1:
-; CHECK-NEXT: %v2 = call i32 @f2()
-; CHECK-NEXT: br label %Merge
%v2 = call i32 @f2()
br label %Merge
Merge:
-; CHECK: Merge
-; CHECK: %condGuard = icmp slt i32 %a, 10
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard) [ "deopt"() ]
%retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ]
%retVal = add i32 %retPhi, 10
%condGuard = icmp slt i32 %a, 10
@@ -103,27 +118,33 @@ Merge:
define i32 @branch_doesnt_overlap_guard(i32 %a) {
; CHECK-LABEL: @branch_doesnt_overlap_guard(
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[A:%.*]], 10
+; CHECK-NEXT: br i1 [[COND]], label [[T1:%.*]], label [[F1:%.*]]
+; CHECK: T1:
+; CHECK-NEXT: [[V1:%.*]] = call i32 @f1()
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: F1:
+; CHECK-NEXT: [[V2:%.*]] = call i32 @f2()
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: Merge:
+; CHECK-NEXT: [[RETPHI:%.*]] = phi i32 [ [[V1]], [[T1]] ], [ [[V2]], [[F1]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[RETPHI]], 10
+; CHECK-NEXT: [[CONDGUARD:%.*]] = icmp sgt i32 [[A]], 20
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CONDGUARD]]) [ "deopt"() ]
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
%cond = icmp slt i32 %a, 10
br i1 %cond, label %T1, label %F1
T1:
-; CHECK: T1:
-; CHECK-NEXT: %v1 = call i32 @f1()
-; CHECK-NEXT: br label %Merge
%v1 = call i32 @f1()
br label %Merge
F1:
-; CHECK: F1:
-; CHECK-NEXT: %v2 = call i32 @f2()
-; CHECK-NEXT: br label %Merge
%v2 = call i32 @f2()
br label %Merge
Merge:
-; CHECK: Merge
-; CHECK: %condGuard = icmp sgt i32 %a, 20
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard) [ "deopt"() ]
%retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ]
%retVal = add i32 %retPhi, 10
%condGuard = icmp sgt i32 %a, 20
@@ -133,31 +154,47 @@ Merge:
define i32 @not_a_diamond1(i32 %a, i1 %cond1) {
; CHECK-LABEL: @not_a_diamond1(
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[PRED:%.*]], label [[EXIT:%.*]]
+; CHECK: Pred:
+; CHECK-NEXT: switch i32 [[A:%.*]], label [[EXIT]] [
+; CHECK-NEXT: i32 10, label [[MERGE:%.*]]
+; CHECK-NEXT: i32 20, label [[MERGE]]
+; CHECK-NEXT: ]
+; CHECK: Merge:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND1]]) [ "deopt"() ]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 [[A]]
+;
br i1 %cond1, label %Pred, label %Exit
Pred:
-; CHECK: Pred:
-; CHECK-NEXT: switch i32 %a, label %Exit
switch i32 %a, label %Exit [
- i32 10, label %Merge
- i32 20, label %Merge
+ i32 10, label %Merge
+ i32 20, label %Merge
]
Merge:
-; CHECK: Merge:
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
-; CHECK-NEXT: br label %Exit
call void(i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
br label %Exit
Exit:
-; CHECK: Exit:
-; CHECK-NEXT: ret i32 %a
ret i32 %a
}
define void @not_a_diamond2(i32 %a, i1 %cond1) {
; CHECK-LABEL: @not_a_diamond2(
+; CHECK-NEXT: Pred:
+; CHECK-NEXT: switch i32 [[A:%.*]], label [[EXIT:%.*]] [
+; CHECK-NEXT: i32 10, label [[MERGE:%.*]]
+; CHECK-NEXT: i32 20, label [[MERGE]]
+; CHECK-NEXT: ]
+; CHECK: Merge:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND1:%.*]]) [ "deopt"() ]
+; CHECK-NEXT: ret void
+; CHECK: Exit:
+; CHECK-NEXT: ret void
+;
br label %Parent
Merge:
@@ -165,20 +202,15 @@ Merge:
ret void
Pred:
-; CHECK-NEXT: Pred:
-; CHECK-NEXT: switch i32 %a, label %Exit
switch i32 %a, label %Exit [
- i32 10, label %Merge
- i32 20, label %Merge
+ i32 10, label %Merge
+ i32 20, label %Merge
]
Parent:
br label %Pred
Exit:
-; CHECK: Merge:
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
-; CHECK-NEXT: ret void
ret void
}
@@ -187,11 +219,15 @@ declare void @never_called(i1)
; LVI uses guard to identify value of %c2 in branch as true, we cannot replace that
; guard with guard(true & c1).
define void @dont_fold_guard(i8* %addr, i32 %i, i32 %length) {
-; CHECK-LABEL: dont_fold_guard
-; CHECK: %wide.chk = and i1 %c1, %c2
-; CHECK-NEXT: experimental.guard(i1 %wide.chk)
-; CHECK-NEXT: call void @never_called(i1 true)
-; CHECK-NEXT: ret void
+; CHECK-LABEL: @dont_fold_guard(
+; CHECK-NEXT: BB1:
+; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[I:%.*]], [[LENGTH:%.*]]
+; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[I]], 0
+; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C1]], [[C2]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
+; CHECK-NEXT: call void @never_called(i1 true)
+; CHECK-NEXT: ret void
+;
%c1 = icmp ult i32 %i, %length
%c2 = icmp eq i32 %i, 0
%wide.chk = and i1 %c1, %c2
@@ -210,12 +246,16 @@ declare void @dummy(i1) nounwind willreturn
; same as dont_fold_guard1 but there's a use immediately after guard and before
; branch. We can fold that use.
define void @dont_fold_guard2(i8* %addr, i32 %i, i32 %length) {
-; CHECK-LABEL: dont_fold_guard2
-; CHECK: %wide.chk = and i1 %c1, %c2
-; CHECK-NEXT: experimental.guard(i1 %wide.chk)
-; CHECK-NEXT: dummy(i1 true)
-; CHECK-NEXT: call void @never_called(i1 true)
-; CHECK-NEXT: ret void
+; CHECK-LABEL: @dont_fold_guard2(
+; CHECK-NEXT: BB1:
+; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[I:%.*]], [[LENGTH:%.*]]
+; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[I]], 0
+; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C1]], [[C2]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ]
+; CHECK-NEXT: call void @dummy(i1 true)
+; CHECK-NEXT: call void @never_called(i1 true)
+; CHECK-NEXT: ret void
+;
%c1 = icmp ult i32 %i, %length
%c2 = icmp eq i32 %i, 0
%wide.chk = and i1 %c1, %c2
@@ -235,8 +275,15 @@ BB2:
; We cannot fold the guard under any circumstance.
; FIXME: We can merge unreachableBB2 into not_zero.
define void @dont_fold_guard3(i8* %addr, i1 %cmp, i32 %i, i32 %length) {
-; CHECK-LABEL: dont_fold_guard3
-; CHECK: guard(i1 %cmp)
+; CHECK-LABEL: @dont_fold_guard3(
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CMP:%.*]]) [ "deopt"() ]
+; CHECK-NEXT: br i1 [[CMP]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: BB1:
+; CHECK-NEXT: call void @never_called(i1 [[CMP]])
+; CHECK-NEXT: ret void
+; CHECK: BB2:
+; CHECK-NEXT: ret void
+;
call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
br i1 %cmp, label %BB1, label %BB2
@@ -252,23 +299,28 @@ declare void @f(i1)
; Same as dont_fold_guard1 but use switch instead of branch.
; triggers source code `ProcessThreadableEdges`.
define void @dont_fold_guard4(i1 %cmp1, i32 %i) nounwind {
-; CHECK-LABEL: dont_fold_guard4
-; CHECK-LABEL: L2:
-; CHECK-NEXT: %cmp = icmp eq i32 %i, 0
-; CHECK-NEXT: guard(i1 %cmp)
-; CHECK-NEXT: dummy(i1 true)
-; CHECK-NEXT: @f(i1 true)
-; CHECK-NEXT: ret void
+; CHECK-LABEL: @dont_fold_guard4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[L2:%.*]], label [[L3:%.*]]
+; CHECK: L2:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CMP]]) [ "deopt"() ]
+; CHECK-NEXT: call void @dummy(i1 true)
+; CHECK-NEXT: call void @f(i1 true)
+; CHECK-NEXT: ret void
+; CHECK: L3:
+; CHECK-NEXT: ret void
+;
entry:
- br i1 %cmp1, label %L0, label %L3
+ br i1 %cmp1, label %L0, label %L3
L0:
%cmp = icmp eq i32 %i, 0
call void(i1, ...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
call void @dummy(i1 %cmp)
switch i1 %cmp, label %L3 [
- i1 false, label %L1
- i1 true, label %L2
- ]
+ i1 false, label %L1
+ i1 true, label %L2
+ ]
L1:
ret void
@@ -281,16 +333,17 @@ L3:
; Make sure that we don't PRE a non-speculable load across a guard.
define void @unsafe_pre_across_guard(i8* %p, i1 %load.is.valid) {
-
; CHECK-LABEL: @unsafe_pre_across_guard(
-; CHECK-NOT: loaded.pr
-; CHECK: entry:
-; CHECK-NEXT: br label %loop
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ]
-; CHECK-NEXT: %loaded = load i8, i8* %p
-; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0
-; CHECK-NEXT: br i1 %continue, label %exit, label %loop
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[LOAD_IS_VALID:%.*]]) [ "deopt"() ]
+; CHECK-NEXT: [[LOADED:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp eq i8 [[LOADED]], 0
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
entry:
br label %loop
@@ -306,16 +359,18 @@ exit: ; preds = %loop
; Make sure that we can safely PRE a speculable load across a guard.
define void @safe_pre_across_guard(i8* noalias nocapture readonly dereferenceable(8) %p, i1 %load.is.valid) {
-
; CHECK-LABEL: @safe_pre_across_guard(
-; CHECK: entry:
-; CHECK-NEXT: %loaded.pr = load i8, i8* %p
-; CHECK-NEXT: br label %loop
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LOADED_PR:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: %loaded = phi i8 [ %loaded, %loop ], [ %loaded.pr, %entry ]
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %load.is.valid) [ "deopt"() ]
-; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0
-; CHECK-NEXT: br i1 %continue, label %exit, label %loop
+; CHECK-NEXT: [[LOADED:%.*]] = phi i8 [ [[LOADED]], [[LOOP]] ], [ [[LOADED_PR]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[LOAD_IS_VALID:%.*]]) [ "deopt"() ]
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp eq i8 [[LOADED]], 0
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
entry:
br label %loop
@@ -333,16 +388,17 @@ exit: ; preds = %loop
; Make sure that we don't PRE a non-speculable load across a call which may
; alias with the load.
define void @unsafe_pre_across_call(i8* %p) {
-
; CHECK-LABEL: @unsafe_pre_across_call(
-; CHECK-NOT: loaded.pr
-; CHECK: entry:
-; CHECK-NEXT: br label %loop
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: call i32 @f1()
-; CHECK-NEXT: %loaded = load i8, i8* %p
-; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0
-; CHECK-NEXT: br i1 %continue, label %exit, label %loop
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @f1()
+; CHECK-NEXT: [[LOADED:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp eq i8 [[LOADED]], 0
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
entry:
br label %loop
@@ -358,16 +414,18 @@ exit: ; preds = %loop
; Make sure that we can safely PRE a speculable load across a call.
define void @safe_pre_across_call(i8* noalias nocapture readonly dereferenceable(8) %p) {
-
; CHECK-LABEL: @safe_pre_across_call(
-; CHECK: entry:
-; CHECK-NEXT: %loaded.pr = load i8, i8* %p
-; CHECK-NEXT: br label %loop
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LOADED_PR:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: %loaded = phi i8 [ %loaded, %loop ], [ %loaded.pr, %entry ]
-; CHECK-NEXT: call i32 @f1()
-; CHECK-NEXT: %continue = icmp eq i8 %loaded, 0
-; CHECK-NEXT: br i1 %continue, label %exit, label %loop
+; CHECK-NEXT: [[LOADED:%.*]] = phi i8 [ [[LOADED]], [[LOOP]] ], [ [[LOADED_PR]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @f1()
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp eq i8 [[LOADED]], 0
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LICM/hoist-deref-load.ll b/llvm/test/Transforms/LICM/hoist-deref-load.ll
index a5b4aed68124..e198394bc92a 100644
--- a/llvm/test/Transforms/LICM/hoist-deref-load.ll
+++ b/llvm/test/Transforms/LICM/hoist-deref-load.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -basic-aa -licm < %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<opt-remark-emit>,loop(loop-simplifycfg,licm)' -S < %s | FileCheck %s
; RUN: opt -S -basic-aa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
@@ -15,11 +16,36 @@ target triple = "x86_64-unknown-linux-gnu"
; and we want to hoist the load of %c out of the loop. This can be done only
; because the dereferenceable attribute is on %c.
-; CHECK-LABEL: @test1
-; CHECK: load i32, i32* %c, align 4
-; CHECK: for.body:
-
define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly nonnull dereferenceable(4) align 4 %c, i32 %n) #0 {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C:%.*]], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
@@ -51,12 +77,36 @@ for.end: ; preds = %for.inc, %entry
; This is the same as @test1, but without the dereferenceable attribute on %c.
; Without this attribute, we should not hoist the load of %c.
-
-; CHECK-LABEL: @test2
-; CHECK: if.then:
-; CHECK: load i32, i32* %c, align 4
-
define void @test2(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly nonnull %c, i32 %n) #0 {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
@@ -94,12 +144,37 @@ for.end: ; preds = %for.inc, %entry
; }
; and we want to hoist the load of c[2] out of the loop. This can be done only
; because the dereferenceable attribute is on %c.
-
-; CHECK-LABEL: @test3
-; CHECK: load i32, i32* %c2, align 4
-; CHECK: for.body:
-
define void @test3(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable(12) align 4 %c, i32 %n) #0 {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 2
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C2]], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
@@ -132,12 +207,37 @@ for.end: ; preds = %for.inc, %entry
; This is the same as @test3, but with a dereferenceable attribute on %c with a
; size too small to cover c[2] (and so we should not hoist it).
-
-; CHECK-LABEL: @test4
-; CHECK: if.then:
-; CHECK: load i32, i32* %c2, align 4
-
define void @test4(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable(11) %c, i32 %n) #0 {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 2
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C2]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
@@ -178,12 +278,39 @@ for.end: ; preds = %for.inc, %entry
; and we want to hoist the load of %c out of the loop. This can be done only
; because the dereferenceable_or_null attribute is on %c and there is a null
; check on %c.
-
-; CHECK-LABEL: @test5
-; CHECK: load i32, i32* %c, align 4
-; CHECK: for.body:
-
define void @test5(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) align 4 %c, i32 %n) #0 {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i32* [[C:%.*]], null
+; CHECK-NEXT: br i1 [[NOT_NULL]], label [[NOT_NULL:%.*]], label [[FOR_END:%.*]]
+; CHECK: not.null:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%not_null = icmp ne i32* %c, null
br i1 %not_null, label %not.null, label %for.end
@@ -221,13 +348,38 @@ for.end: ; preds = %for.inc, %entry, %n
; Without this check, we should not hoist the load of %c.
; This test case has an icmp on c but the use of this comparison is
-; not a branch.
-
-; CHECK-LABEL: @test6
-; CHECK: if.then:
-; CHECK: load i32, i32* %c, align 4
-
+; not a branch.
define i1 @test6(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n) #0 {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i32* [[C:%.*]], null
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret i1 [[NOT_NULL]]
+;
entry:
%not_null = icmp ne i32* %c, null
%cmp11 = icmp sgt i32 %n, 0
@@ -267,12 +419,37 @@ for.end: ; preds = %for.inc, %entry
; }
; and we want to hoist the load of %c out of the loop. This can be done only
; because the dereferenceable meatdata on the c = *cptr load.
-
-; CHECK-LABEL: @test7
-; CHECK: load i32, i32* %c, align 4
-; CHECK: for.body:
-
define void @test7(i32* noalias %a, i32* %b, i32** %cptr, i32 %n) #0 {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = load i32*, i32** [[CPTR:%.*]], align 8, !dereferenceable !0, !align !0
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%c = load i32*, i32** %cptr, !dereferenceable !0, !align !{i64 4}
%cmp11 = icmp sgt i32 %n, 0
@@ -312,14 +489,42 @@ for.end: ; preds = %for.inc, %entry
; a[i] = (*c)*b[i];
; }
; and we want to hoist the load of %c out of the loop. This can be done only
-; because the dereferenceable_or_null meatdata on the c = *cptr load and there
+; because the dereferenceable_or_null meatdata on the c = *cptr load and there
; is a null check on %c.
-
-; CHECK-LABEL: @test8
-; CHECK: load i32, i32* %c, align 4
-; CHECK: for.body:
-
define void @test8(i32* noalias %a, i32* %b, i32** %cptr, i32 %n) #0 {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = load i32*, i32** [[CPTR:%.*]], align 8, !dereferenceable_or_null !0, !align !0
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i32* [[C]], null
+; CHECK-NEXT: br i1 [[NOT_NULL]], label [[NOT_NULL:%.*]], label [[FOR_END:%.*]]
+; CHECK: not.null:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%c = load i32*, i32** %cptr, !dereferenceable_or_null !0, !align !{i64 4}
%not_null = icmp ne i32* %c, null
@@ -356,12 +561,37 @@ for.end: ; preds = %for.inc, %entry, %n
; This is the same as @test8, but without the null check on %c.
; Without this check, we should not hoist the load of %c.
-
-; CHECK-LABEL: @test9
-; CHECK: if.then:
-; CHECK: load i32, i32* %c, align 4
-
define void @test9(i32* noalias %a, i32* %b, i32** %cptr, i32 %n) #0 {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[C:%.*]] = load i32*, i32** [[CPTR:%.*]], align 8, !dereferenceable_or_null !0
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%c = load i32*, i32** %cptr, !dereferenceable_or_null !0
%cmp11 = icmp sgt i32 %n, 0
@@ -398,14 +628,37 @@ for.end: ; preds = %for.inc, %entry
; on the dereferenceability anymore.
; In other words this test checks that we strip dereferenceability metadata
; after hoisting an instruction.
-
-; CHECK-LABEL: @test10
-; CHECK: %c = load i32*, i32** %cptr
-; CHECK-NOT: dereferenceable
-; CHECK: if.then:
-; CHECK: load i32, i32* %c, align 4
-
define void @test10(i32* noalias %a, i32* %b, i32** dereferenceable(8) align 8 %cptr, i32 %n) #0 {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[C:%.*]] = load i32*, i32** [[CPTR:%.*]], align 8
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
@@ -438,13 +691,39 @@ for.end: ; preds = %for.inc, %entry
define void @test11(i32* noalias %a, i32* %b, i32** dereferenceable(8) %cptr, i32 %n) #0 {
; CHECK-LABEL: @test11(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[C:%.*]] = load i32*, i32** [[CPTR:%.*]], align 8, !dereferenceable !0
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[E]], [[D]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
-; CHECK: for.body.preheader:
-; CHECK: %c = load i32*, i32** %cptr, align 8, !dereferenceable !0
-; CHECK: %d = load i32, i32* %c, align 4
for.body: ; preds = %entry, %for.inc
@@ -475,19 +754,45 @@ for.end: ; preds = %for.inc, %entry
declare void @llvm.experimental.guard(i1, ...)
-define void @test12(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) align 4 %c, i32 %n) #0 {
; Prove non-null ness of %c via a guard, not a branch.
-
+define void @test12(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) align 4 %c, i32 %n) #0 {
; CHECK-LABEL: @test12(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i32* [[C:%.*]], null
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[NOT_NULL]]) [ "deopt"() ]
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%not_null = icmp ne i32* %c, null
call void(i1, ...) @llvm.experimental.guard(i1 %not_null) [ "deopt"() ]
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
-; CHECK: for.body.preheader:
-; CHECK-NEXT: [[VAL:%[^ ]]] = load i32, i32* %c, align 4
-; CHECK-NEXT: br label %for.body
for.body: ; preds = %entry, %for.inc
@@ -515,19 +820,45 @@ for.end: ; preds = %for.inc, %entry, %e
ret void
}
-define void @test13(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n) #0 {
; Like @test12, but has a post-dominating guard, which cannot be used
; to prove %c is nonnull at the point of the load.
-
+define void @test13(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n) #0 {
; CHECK-LABEL: @test13(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i32* [[C:%.*]], null
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[NOT_NULL]]) [ "deopt"() ]
+; CHECK-NEXT: ret void
+;
entry:
%not_null = icmp ne i32* %c, null
%cmp11 = icmp sgt i32 %n, 0
br i1 %cmp11, label %for.body, label %for.end
-; CHECK: for.body.preheader:
-; CHECK-NOT: load i32, i32* %c
-; CHECK: br label %for.body
for.body: ; preds = %entry, %for.inc
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
@@ -537,9 +868,6 @@ for.body: ; preds = %entry, %for.inc
br i1 %cmp1, label %if.then, label %for.inc
if.then: ; preds = %for.body
-; CHECK: if.then:
-; CHECK: load i32, i32* %c
-; CHECK: br label %for.inc
%1 = load i32, i32* %c, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
%2 = load i32, i32* %arrayidx3, align 4
@@ -561,10 +889,39 @@ for.end: ; preds = %for.inc, %entry, %e
; Check that branch by condition "null check AND something" allows to hoist the
; load.
define void @test14(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) align 4 %c, i32 %n, i1 %dummy_cond) #0 {
-
-; CHECK-LABEL: @test14
-; CHECK: load i32, i32* %c, align 4
-; CHECK: for.body:
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i32* [[C:%.*]], null
+; CHECK-NEXT: [[DUMMY_AND:%.*]] = and i1 [[NOT_NULL]], [[DUMMY_COND:%.*]]
+; CHECK-NEXT: br i1 [[DUMMY_AND]], label [[NOT_NULL:%.*]], label [[FOR_END:%.*]]
+; CHECK: not.null:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%not_null = icmp ne i32* %c, null
@@ -603,10 +960,38 @@ for.end: ; preds = %for.inc, %entry, %n
; Check that guard by condition "null check AND something" allows to hoist the
; load.
define void @test15(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) align 4 %c, i32 %n, i1 %dummy_cond) #0 {
-
-; CHECK-LABEL: @test15
-; CHECK: load i32, i32* %c, align 4
-; CHECK: for.body:
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne i32* [[C:%.*]], null
+; CHECK-NEXT: [[DUMMY_AND:%.*]] = and i1 [[NOT_NULL]], [[DUMMY_COND:%.*]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[DUMMY_AND]]) [ "deopt"() ]
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%not_null = icmp ne i32* %c, null
@@ -644,10 +1029,39 @@ for.end: ; preds = %for.inc, %entry
; non-null in false branch. So the condition ((c == null && other_cond) == false)
; is not sufficient to conclude that c != null.
define void @test16(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n, i1 %dummy_cond) #0 {
-
-; CHECK-LABEL: @test16
-; CHECK: for.body:
-; CHECK: load i32, i32* %c, align 4
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp eq i32* [[C:%.*]], null
+; CHECK-NEXT: [[DUMMY_AND:%.*]] = and i1 [[NOT_NULL]], [[DUMMY_COND:%.*]]
+; CHECK-NEXT: br i1 [[DUMMY_AND]], label [[FOR_END:%.*]], label [[NOT_NULL:%.*]]
+; CHECK: not.null:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%not_null = icmp eq i32* %c, null
@@ -687,10 +1101,38 @@ for.end: ; preds = %for.inc, %entry, %n
; non-null in false branch. So the condition ((c == null && other_cond) == false)
; is not sufficient to conclude that c != null.
define void @test17(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n, i1 %dummy_cond) #0 {
-
-; CHECK-LABEL: @test17
-; CHECK: for.body:
-; CHECK: load i32, i32* %c, align 4
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp eq i32* [[C:%.*]], null
+; CHECK-NEXT: [[DUMMY_AND:%.*]] = and i1 [[NOT_NULL]], [[DUMMY_COND:%.*]]
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[DUMMY_AND]]) [ "deopt"() ]
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
%not_null = icmp eq i32* %c, null
diff --git a/llvm/test/Transforms/TailCallElim/reorder_load.ll b/llvm/test/Transforms/TailCallElim/reorder_load.ll
index 027cfe78bb4b..59d9703a80ac 100644
--- a/llvm/test/Transforms/TailCallElim/reorder_load.ll
+++ b/llvm/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
; PR4323
@@ -16,23 +17,35 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; and the call has no side effects.
define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
; CHECK-LABEL: @raise_load_1(
-; CHECK-NOT: call
-; CHECK: load i32, i32*
-; CHECK-NOT: call
-; CHECK: }
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
+; CHECK: tailrecurse:
+; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
+; CHECK: if:
+; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
+; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK: else:
+; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ARG:%.*]], align 4
+; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
+; CHECK-NEXT: br label [[TAILRECURSE]]
+;
entry:
- %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
- br i1 %tmp2, label %if, label %else
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
if: ; preds = %entry
- ret i32 0
+ ret i32 0
else: ; preds = %entry
- %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
- %tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
- %tmp9 = load i32, i32* %a_arg ; <i32> [#uses=1]
- %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
- ret i32 %tmp10
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* %a_arg ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
}
@@ -40,30 +53,47 @@ else: ; preds = %entry
; and the load provably can't trap.
define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
; CHECK-LABEL: @raise_load_2(
-; CHECK-NOT: call
-; CHECK: load i32, i32*
-; CHECK-NOT: call
-; CHECK: }
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
+; CHECK: tailrecurse:
+; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[RECURSE:%.*]] ]
+; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[RECURSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK: if:
+; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
+; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK: else:
+; CHECK-NEXT: [[NULLCHECK:%.*]] = icmp eq i32* [[A_ARG:%.*]], null
+; CHECK-NEXT: br i1 [[NULLCHECK]], label [[UNWIND:%.*]], label [[RECURSE]]
+; CHECK: unwind:
+; CHECK-NEXT: unreachable
+; CHECK: recurse:
+; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* @global, align 4
+; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
+; CHECK-NEXT: br label [[TAILRECURSE]]
+;
entry:
- %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
- br i1 %tmp2, label %if, label %else
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
if: ; preds = %entry
- ret i32 0
+ ret i32 0
else: ; preds = %entry
- %nullcheck = icmp eq i32* %a_arg, null ; <i1> [#uses=1]
- br i1 %nullcheck, label %unwind, label %recurse
+ %nullcheck = icmp eq i32* %a_arg, null ; <i1> [#uses=1]
+ br i1 %nullcheck, label %unwind, label %recurse
unwind: ; preds = %else
- unreachable
+ unreachable
recurse: ; preds = %else
- %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
- %tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
- %tmp9 = load i32, i32* @global ; <i32> [#uses=1]
- %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
- ret i32 %tmp10
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* @global ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
}
@@ -71,23 +101,35 @@ recurse: ; preds = %else
; extern_weak global) because the call has no side effects.
define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
; CHECK-LABEL: @raise_load_3(
-; CHECK-NOT: call
-; CHECK: load i32, i32*
-; CHECK-NOT: call
-; CHECK: }
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
+; CHECK: tailrecurse:
+; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
+; CHECK: if:
+; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
+; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK: else:
+; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* @extern_weak_global, align 4
+; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
+; CHECK-NEXT: br label [[TAILRECURSE]]
+;
entry:
- %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
- br i1 %tmp2, label %if, label %else
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
if: ; preds = %entry
- ret i32 0
+ ret i32 0
else: ; preds = %entry
- %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
- %tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
- %tmp9 = load i32, i32* @extern_weak_global ; <i32> [#uses=1]
- %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
- ret i32 %tmp10
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* @extern_weak_global ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
}
@@ -96,79 +138,124 @@ else: ; preds = %entry
; proves it doesn't trap.
define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
; CHECK-LABEL: @raise_load_4(
-; CHECK-NOT: call
-; CHECK: load i32, i32*
-; CHECK-NEXT: load i32, i32*
-; CHECK-NOT: call
-; CHECK: }
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
+; CHECK: tailrecurse:
+; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[RECURSE:%.*]] ]
+; CHECK-NEXT: [[A_LEN_ARG_TR:%.*]] = phi i32 [ [[A_LEN_ARG:%.*]], [[ENTRY]] ], [ [[FIRST:%.*]], [[RECURSE]] ]
+; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[RECURSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG_TR]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK: if:
+; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
+; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK: else:
+; CHECK-NEXT: [[NULLCHECK:%.*]] = icmp eq i32* [[A_ARG:%.*]], null
+; CHECK-NEXT: br i1 [[NULLCHECK]], label [[UNWIND:%.*]], label [[RECURSE]]
+; CHECK: unwind:
+; CHECK-NEXT: unreachable
+; CHECK: recurse:
+; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
+; CHECK-NEXT: [[FIRST]] = load i32, i32* [[A_ARG]], align 4
+; CHECK-NEXT: [[SECOND:%.*]] = load i32, i32* [[A_ARG]], align 4
+; CHECK-NEXT: [[TMP10]] = add i32 [[SECOND]], [[ACCUMULATOR_TR]]
+; CHECK-NEXT: br label [[TAILRECURSE]]
+;
entry:
- %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
- br i1 %tmp2, label %if, label %else
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
if: ; preds = %entry
- ret i32 0
+ ret i32 0
else: ; preds = %entry
- %nullcheck = icmp eq i32* %a_arg, null ; <i1> [#uses=1]
- br i1 %nullcheck, label %unwind, label %recurse
+ %nullcheck = icmp eq i32* %a_arg, null ; <i1> [#uses=1]
+ br i1 %nullcheck, label %unwind, label %recurse
unwind: ; preds = %else
- unreachable
+ unreachable
recurse: ; preds = %else
- %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
- %first = load i32, i32* %a_arg ; <i32> [#uses=1]
- %tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7) ; <i32> [#uses=1]
- %second = load i32, i32* %a_arg ; <i32> [#uses=1]
- %tmp10 = add i32 %second, %tmp8 ; <i32> [#uses=1]
- ret i32 %tmp10
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %first = load i32, i32* %a_arg ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7) ; <i32> [#uses=1]
+ %second = load i32, i32* %a_arg ; <i32> [#uses=1]
+ %tmp10 = add i32 %second, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
}
; This load can be moved above the call because the function won't write to it
; and the a_arg is dereferenceable.
define fastcc i32 @raise_load_5(i32* dereferenceable(4) align 4 %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
; CHECK-LABEL: @raise_load_5(
-; CHECK-NOT: call
-; CHECK: load i32, i32*
-; CHECK-NOT: call
-; CHECK: }
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
+; CHECK: tailrecurse:
+; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
+; CHECK: if:
+; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
+; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK: else:
+; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ARG:%.*]], align 4
+; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
+; CHECK-NEXT: br label [[TAILRECURSE]]
+;
entry:
- %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
- br i1 %tmp2, label %if, label %else
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
if: ; preds = %entry
- ret i32 0
+ ret i32 0
else: ; preds = %entry
- %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
- %tmp8 = call fastcc i32 @raise_load_5(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
- %tmp9 = load i32, i32* %a_arg ; <i32> [#uses=1]
- %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
- ret i32 %tmp10
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @raise_load_5(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* %a_arg ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
}
; This load can be moved above the call because the function call does not write to the memory the load
; is accessing and the load is safe to speculate.
define fastcc i32 @raise_load_6(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
; CHECK-LABEL: @raise_load_6(
-; CHECK-NOT: call
-; CHECK: load i32, i32*
-; CHECK-NOT: call
-; CHECK: }
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[S:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
+; CHECK: tailrecurse:
+; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ]
+; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ]
+; CHECK-NEXT: store i32 4, i32* [[S]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]]
+; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]]
+; CHECK: if:
+; CHECK-NEXT: store i32 1, i32* [[A_ARG:%.*]], align 4
+; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]]
+; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK: else:
+; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[S]], align 4
+; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]]
+; CHECK-NEXT: br label [[TAILRECURSE]]
+;
entry:
%s = alloca i32
store i32 4, i32* %s
- %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
- br i1 %tmp2, label %if, label %else
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
if: ; preds = %entry
store i32 1, i32* %a_arg
- ret i32 0
+ ret i32 0
else: ; preds = %entry
- %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
- %tmp8 = call fastcc i32 @raise_load_6(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
- %tmp9 = load i32, i32* %s ; <i32> [#uses=1]
- %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
- ret i32 %tmp10
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @raise_load_6(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* %s ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
}
More information about the llvm-commits
mailing list