[llvm] 7a78756 - [LSR] Regenerate test checks (NFC)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 12 00:40:18 PDT 2023
Author: Nikita Popov
Date: 2023-07-12T09:40:10+02:00
New Revision: 7a7875611862627c811be8d0bbbc2a5a861862e4
URL: https://github.com/llvm/llvm-project/commit/7a7875611862627c811be8d0bbbc2a5a861862e4
DIFF: https://github.com/llvm/llvm-project/commit/7a7875611862627c811be8d0bbbc2a5a861862e4.diff
LOG: [LSR] Regenerate test checks (NFC)
Added:
Modified:
llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll
llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll
llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 6c90697920870b..70d6cffdbd0042 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -1,17 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -opaque-pointers=0 -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
; @simple is the most basic chain of address induction variables. Chaining
; saves at least one register and avoids complex addressing and setup
; code.
;
-; A9: @simple
; no expensive address computation in the preheader
-; A9: lsl
-; A9-NOT: lsl
-; A9: %loop
; no complex address modes
-; A9-NOT: lsl
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+; A9-LABEL: simple:
+; A9: @ %bb.0: @ %entry
+; A9-NEXT: .save {r4, r5, r6, lr}
+; A9-NEXT: push {r4, r5, r6, lr}
+; A9-NEXT: mov r3, r0
+; A9-NEXT: lsls r2, r2, #2
+; A9-NEXT: movs r0, #0
+; A9-NEXT: .LBB0_1: @ %loop
+; A9-NEXT: @ =>This Inner Loop Header: Depth=1
+; A9-NEXT: add.w lr, r3, r2
+; A9-NEXT: ldr.w r12, [r3, r2]
+; A9-NEXT: ldr r3, [r3]
+; A9-NEXT: add.w r4, lr, r2
+; A9-NEXT: ldr.w r6, [lr, r2]
+; A9-NEXT: add r0, r3
+; A9-NEXT: adds r3, r4, r2
+; A9-NEXT: add r0, r12
+; A9-NEXT: ldr r5, [r4, r2]
+; A9-NEXT: add r0, r6
+; A9-NEXT: add r3, r2
+; A9-NEXT: add r0, r5
+; A9-NEXT: cmp r3, r1
+; A9-NEXT: bne .LBB0_1
+; A9-NEXT: @ %bb.2: @ %exit
+; A9-NEXT: pop {r4, r5, r6, pc}
entry:
br label %loop
loop:
@@ -37,15 +58,34 @@ exit:
; @user is not currently chained because the IV is live across memory ops.
;
-; A9: @user
; stride multiples computed in the preheader
-; A9: lsl
-; A9: lsl
-; A9: %loop
; complex address modes
-; A9: lsl
-; A9: lsl
define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+; A9-LABEL: user:
+; A9: @ %bb.0: @ %entry
+; A9-NEXT: .save {r4, r5, r6, r7, lr}
+; A9-NEXT: push {r4, r5, r6, r7, lr}
+; A9-NEXT: add.w r3, r2, r2, lsl #1
+; A9-NEXT: lsl.w r12, r2, #4
+; A9-NEXT: lsl.w lr, r3, #2
+; A9-NEXT: movs r3, #0
+; A9-NEXT: .LBB1_1: @ %loop
+; A9-NEXT: @ =>This Inner Loop Header: Depth=1
+; A9-NEXT: ldr r4, [r0]
+; A9-NEXT: ldr.w r5, [r0, r2, lsl #3]
+; A9-NEXT: ldr.w r6, [r0, r2, lsl #2]
+; A9-NEXT: add r3, r4
+; A9-NEXT: ldr.w r7, [r0, lr]
+; A9-NEXT: add r3, r6
+; A9-NEXT: add r3, r5
+; A9-NEXT: add r3, r7
+; A9-NEXT: str r3, [r0]
+; A9-NEXT: add r0, r12
+; A9-NEXT: cmp r0, r1
+; A9-NEXT: bne .LBB1_1
+; A9-NEXT: @ %bb.2: @ %exit
+; A9-NEXT: mov r0, r3
+; A9-NEXT: pop {r4, r5, r6, r7, pc}
entry:
br label %loop
loop:
@@ -75,16 +115,43 @@ exit:
; used to do, and exactly what we don't want to do. LSR's new IV
; chaining feature should now undo the damage.
;
-; A9: extrastride:
; no spills
-; A9-NOT: str
; only one stride multiple in the preheader
-; A9: lsl
-; A9-NOT: {{str r|lsl}}
-; A9: %for.body{{$}}
; no complex address modes or reloads
-; A9-NOT: {{ldr .*[sp]|lsl}}
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+; A9-LABEL: extrastride:
+; A9: @ %bb.0: @ %entry
+; A9-NEXT: .save {r4, r5, r6, r7, lr}
+; A9-NEXT: push {r4, r5, r6, r7, lr}
+; A9-NEXT: ldr.w r12, [sp, #24]
+; A9-NEXT: cmp.w r12, #0
+; A9-NEXT: beq .LBB2_3
+; A9-NEXT: @ %bb.1: @ %for.body.lr.ph
+; A9-NEXT: ldr r4, [sp, #20]
+; A9-NEXT: add.w lr, r3, r1
+; A9-NEXT: lsls r3, r4, #2
+; A9-NEXT: .LBB2_2: @ %for.body
+; A9-NEXT: @ =>This Inner Loop Header: Depth=1
+; A9-NEXT: adds r5, r0, r1
+; A9-NEXT: ldr r4, [r0, r1]
+; A9-NEXT: ldr r0, [r0]
+; A9-NEXT: subs.w r12, r12, #1
+; A9-NEXT: ldr r6, [r5, r1]
+; A9-NEXT: add r5, r1
+; A9-NEXT: add r0, r4
+; A9-NEXT: ldr r7, [r5, r1]
+; A9-NEXT: add r5, r1
+; A9-NEXT: add r0, r6
+; A9-NEXT: ldr r4, [r5, r1]
+; A9-NEXT: add r0, r7
+; A9-NEXT: add r0, r4
+; A9-NEXT: str r0, [r2]
+; A9-NEXT: add.w r0, r5, r1
+; A9-NEXT: add r2, r3
+; A9-NEXT: add r0, lr
+; A9-NEXT: bne .LBB2_2
+; A9-NEXT: .LBB2_3: @ %for.end
+; A9-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%cmp8 = icmp eq i32 %z, 0
br i1 %cmp8, label %for.end, label %for.body.lr.ph
@@ -136,10 +203,38 @@ for.end: ; preds = %for.body, %entry
; }
; where 's' can be folded into the addressing mode.
; Consequently, we should *not* form any chains.
-;
-; A9: foldedidx:
-; A9: ldrb{{(.w)?}} {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+; A9-LABEL: foldedidx:
+; A9: @ %bb.0: @ %entry
+; A9-NEXT: .save {r4, r5, r6, lr}
+; A9-NEXT: push {r4, r5, r6, lr}
+; A9-NEXT: mov.w lr, #0
+; A9-NEXT: .LBB3_1: @ %for.body
+; A9-NEXT: @ =>This Inner Loop Header: Depth=1
+; A9-NEXT: ldrb.w r12, [r0, lr]
+; A9-NEXT: add.w r4, r1, lr
+; A9-NEXT: ldrb.w r3, [r1, lr]
+; A9-NEXT: add r3, r12
+; A9-NEXT: strb.w r3, [r2, lr]
+; A9-NEXT: add.w r3, r0, lr
+; A9-NEXT: ldrb.w r12, [r3, #1]
+; A9-NEXT: ldrb r5, [r4, #1]
+; A9-NEXT: add r12, r5
+; A9-NEXT: add.w r5, r2, lr
+; A9-NEXT: strb.w r12, [r5, #1]
+; A9-NEXT: add.w lr, lr, #4
+; A9-NEXT: cmp.w lr, #400
+; A9-NEXT: ldrb.w r12, [r3, #2]
+; A9-NEXT: ldrb r6, [r4, #2]
+; A9-NEXT: add r6, r12
+; A9-NEXT: strb r6, [r5, #2]
+; A9-NEXT: ldrb r3, [r3, #3]
+; A9-NEXT: ldrb r6, [r4, #3]
+; A9-NEXT: add r3, r6
+; A9-NEXT: strb r3, [r5, #3]
+; A9-NEXT: bne .LBB3_1
+; A9-NEXT: @ %bb.2: @ %for.end
+; A9-NEXT: pop {r4, r5, r6, pc}
entry:
br label %for.body
@@ -200,14 +295,45 @@ for.end: ; preds = %for.body
;
; Loads and stores should use post-increment addressing, no add's or add.w's.
; Most importantly, there should be no spills or reloads!
-;
-; A9: testNeon:
-; A9: %.lr.ph
-; A9-NOT: lsl.w
-; A9-NOT: {{ldr|str|adds|add r}}
-; A9-NOT: add.w r
-; A9: bne
define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
+; A9-LABEL: testNeon:
+; A9: @ %bb.0:
+; A9-NEXT: .save {r4, r5, r7, lr}
+; A9-NEXT: push {r4, r5, r7, lr}
+; A9-NEXT: vmov.i32 q8, #0x0
+; A9-NEXT: cmp r2, #1
+; A9-NEXT: blt .LBB4_4
+; A9-NEXT: @ %bb.1: @ %.lr.ph
+; A9-NEXT: movs r5, #0
+; A9-NEXT: movw r4, #64464
+; A9-NEXT: sub.w r12, r5, r2, lsl #6
+; A9-NEXT: sub.w lr, r1, r1, lsl #4
+; A9-NEXT: movt r4, #65535
+; A9-NEXT: mov r5, r3
+; A9-NEXT: .LBB4_2: @ =>This Inner Loop Header: Depth=1
+; A9-NEXT: vld1.64 {d18}, [r0], r1
+; A9-NEXT: subs r2, #1
+; A9-NEXT: vld1.64 {d19}, [r0], r1
+; A9-NEXT: vst1.8 {d18, d19}, [r5]!
+; A9-NEXT: vld1.64 {d20}, [r0], r1
+; A9-NEXT: vld1.64 {d21}, [r0], r1
+; A9-NEXT: vst1.8 {d20, d21}, [r5]!
+; A9-NEXT: vld1.64 {d22}, [r0], r1
+; A9-NEXT: vadd.i8 q9, q9, q10
+; A9-NEXT: vld1.64 {d23}, [r0], r1
+; A9-NEXT: vst1.8 {d22, d23}, [r5]!
+; A9-NEXT: vld1.64 {d20}, [r0], r1
+; A9-NEXT: vadd.i8 q9, q9, q11
+; A9-NEXT: vld1.64 {d21}, [r0], lr
+; A9-NEXT: vadd.i8 q9, q9, q10
+; A9-NEXT: vadd.i8 q8, q8, q9
+; A9-NEXT: vst1.8 {d20, d21}, [r5], r4
+; A9-NEXT: bne .LBB4_2
+; A9-NEXT: @ %bb.3: @ %._crit_edge
+; A9-NEXT: add.w r3, r3, r12, lsl #4
+; A9-NEXT: .LBB4_4:
+; A9-NEXT: vst1.32 {d16, d17}, [r3]
+; A9-NEXT: pop {r4, r5, r7, pc}
%1 = icmp sgt i32 %limit, 0
br i1 %1, label %.lr.ph, label %45
@@ -284,24 +410,41 @@ declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly
; Handle chains in which the same offset is used for both loads and
; stores to the same array.
; rdar://11410078.
-;
-; A9: @testReuse
-; A9: %for.body
-; A9: vld1.8 {d{{[0-9]+}}}, [[BASE:[r[0-9]+]]], [[INC:r[0-9]]]
-; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
-; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]]
-; A9: bne
define void @testReuse(i8* %src, i32 %stride) nounwind ssp {
+; A9-LABEL: testReuse:
+; A9: @ %bb.0: @ %entry
+; A9-NEXT: sub.w r12, r0, r1, lsl #2
+; A9-NEXT: sub.w r0, r1, r1, lsl #2
+; A9-NEXT: lsls r2, r0, #1
+; A9-NEXT: movs r3, #0
+; A9-NEXT: .LBB5_1: @ %for.body
+; A9-NEXT: @ =>This Inner Loop Header: Depth=1
+; A9-NEXT: add.w r0, r12, r3
+; A9-NEXT: adds r3, #8
+; A9-NEXT: vld1.8 {d16}, [r0], r1
+; A9-NEXT: cmp r3, #32
+; A9-NEXT: vld1.8 {d17}, [r0], r1
+; A9-NEXT: vhadd.u8 d16, d16, d17
+; A9-NEXT: vld1.8 {d18}, [r0], r1
+; A9-NEXT: vhadd.u8 d17, d17, d18
+; A9-NEXT: vld1.8 {d19}, [r0], r1
+; A9-NEXT: vhadd.u8 d18, d18, d19
+; A9-NEXT: vld1.8 {d20}, [r0], r1
+; A9-NEXT: vhadd.u8 d19, d19, d20
+; A9-NEXT: vld1.8 {d21}, [r0], r1
+; A9-NEXT: vhadd.u8 d20, d20, d21
+; A9-NEXT: vld1.8 {d22}, [r0], r1
+; A9-NEXT: vhadd.u8 d21, d21, d22
+; A9-NEXT: vld1.8 {d23}, [r0], r2
+; A9-NEXT: vst1.8 {d16}, [r0], r1
+; A9-NEXT: vst1.8 {d17}, [r0], r1
+; A9-NEXT: vst1.8 {d18}, [r0], r1
+; A9-NEXT: vst1.8 {d19}, [r0], r1
+; A9-NEXT: vst1.8 {d20}, [r0], r1
+; A9-NEXT: vst1.8 {d21}, [r0]
+; A9-NEXT: bne .LBB5_1
+; A9-NEXT: @ %bb.2: @ %for.end
+; A9-NEXT: bx lr
entry:
%mul = shl nsw i32 %stride, 2
%idx.neg = sub i32 0, %mul
diff --git a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
index 099d7d5023ed49..b9670176c15ddc 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
@@ -1,16 +1,5 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -opaque-pointers=0 < %s -loop-reduce -S | FileCheck %s
-; CHECK: bb1:
-; CHECK: load double, double addrspace(1)* [[IV:%[^,]+]]
-; CHECK: store double {{.*}}, double addrspace(1)* [[IV]]
-
-; CHECK-NOT: cast
-; Make sure the GEP has the right index type
-; CHECK: getelementptr double, double addrspace(1)* [[IV]], i16 1
-; CHECK: br {{.*}} label %bb1
-
-; Make sure the GEP has the right index type
-; CHECK: getelementptr double, double addrspace(1)* {{.*}}, i16
-
; This test tests several things. The load and store should use the
; same address instead of having it computed twice, and SCEVExpander should
@@ -22,67 +11,119 @@
target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64"
define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind {
+; CHECK-LABEL: define void @foo
+; CHECK-SAME: (i64 [[N:%.*]], i64 [[M:%.*]], i64 [[O:%.*]], i64 [[Q:%.*]], double addrspace(1)* nocapture [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i64 [[N]], 0
+; CHECK-NEXT: br i1 [[TMP]], label [[BB_NPH3:%.*]], label [[RETURN:%.*]]
+; CHECK: bb.nph:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[LSR_IV3:%.*]] = phi double addrspace(1)* [ [[SCEVGEP4:%.*]], [[BB2:%.*]] ], [ [[LSR_IV:%.*]], [[BB_NPH:%.*]] ]
+; CHECK-NEXT: [[J_01:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB2]] ], [ 0, [[BB_NPH]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = load double, double addrspace(1)* [[LSR_IV3]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = fdiv double [[TMP6]], 2.100000e+00
+; CHECK-NEXT: store double [[TMP7]], double addrspace(1)* [[LSR_IV3]], align 8
+; CHECK-NEXT: [[TMP9]] = add i64 [[J_01]], 1
+; CHECK-NEXT: br label [[BB2]]
+; CHECK: bb2:
+; CHECK-NEXT: [[SCEVGEP4]] = getelementptr double, double addrspace(1)* [[LSR_IV3]], i16 1
+; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP9]], [[M]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[BB1]], label [[BB2_BB3_CRIT_EDGE:%.*]]
+; CHECK: bb2.bb3_crit_edge:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[I_02:%.*]], 1
+; CHECK-NEXT: br label [[BB4:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i1, i1 addrspace(1)* [[LSR_IV1:%.*]], i16 [[TMP5:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 addrspace(1)* [[SCEVGEP2]] to double addrspace(1)*
+; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[TMP11]], [[N]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[BB2_PREHEADER:%.*]], label [[BB4_RETURN_CRIT_EDGE:%.*]]
+; CHECK: bb4.return_crit_edge:
+; CHECK-NEXT: br label [[BB4_RETURN_CRIT_EDGE_SPLIT:%.*]]
+; CHECK: bb4.return_crit_edge.split:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb.nph3:
+; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[M]], 0
+; CHECK-NEXT: br i1 [[TMP13]], label [[BB_NPH3_SPLIT:%.*]], label [[BB4_RETURN_CRIT_EDGE_SPLIT]]
+; CHECK: bb.nph3.split:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double addrspace(1)* [[P]], i16 -2989
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[Q]], [[O]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], [[N]]
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 37
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i16
+; CHECK-NEXT: [[TMP5]] = shl i16 [[TMP4]], 3
+; CHECK-NEXT: br label [[BB2_PREHEADER]]
+; CHECK: bb2.preheader:
+; CHECK-NEXT: [[LSR_IV]] = phi double addrspace(1)* [ [[SCEVGEP]], [[BB_NPH3_SPLIT]] ], [ [[TMP0]], [[BB4]] ]
+; CHECK-NEXT: [[I_02]] = phi i64 [ [[TMP11]], [[BB4]] ], [ 0, [[BB_NPH3_SPLIT]] ]
+; CHECK-NEXT: [[LSR_IV1]] = bitcast double addrspace(1)* [[LSR_IV]] to i1 addrspace(1)*
+; CHECK-NEXT: br i1 true, label [[BB_NPH]], label [[BB3]]
+; CHECK: return:
+; CHECK-NEXT: ret void
+;
entry:
- %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
- br i1 %tmp, label %bb.nph3, label %return
+ %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
+ br i1 %tmp, label %bb.nph3, label %return
bb.nph: ; preds = %bb2.preheader
- %tmp1 = mul i64 %tmp16, %i.02 ; <i64> [#uses=1]
- %tmp2 = mul i64 %tmp19, %i.02 ; <i64> [#uses=1]
- br label %bb1
+ %tmp1 = mul i64 %tmp16, %i.02 ; <i64> [#uses=1]
+ %tmp2 = mul i64 %tmp19, %i.02 ; <i64> [#uses=1]
+ br label %bb1
bb1: ; preds = %bb2, %bb.nph
- %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3]
- %tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1]
- %tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1]
- %z0 = add i64 %tmp3, 5203
- %tmp5 = getelementptr double, double addrspace(1)* %p, i64 %z0 ; <double addrspace(1)*> [#uses=1]
- %tmp6 = load double, double addrspace(1)* %tmp5, align 8 ; <double> [#uses=1]
- %tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1]
- %z1 = add i64 %tmp4, 5203
- %tmp8 = getelementptr double, double addrspace(1)* %p, i64 %z1 ; <double addrspace(1)*> [#uses=1]
- store double %tmp7, double addrspace(1)* %tmp8, align 8
- %tmp9 = add i64 %j.01, 1 ; <i64> [#uses=2]
- br label %bb2
+ %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3]
+ %tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1]
+ %tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1]
+ %z0 = add i64 %tmp3, 5203
+ %tmp5 = getelementptr double, double addrspace(1)* %p, i64 %z0 ; <double addrspace(1)*> [#uses=1]
+ %tmp6 = load double, double addrspace(1)* %tmp5, align 8 ; <double> [#uses=1]
+ %tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1]
+ %z1 = add i64 %tmp4, 5203
+ %tmp8 = getelementptr double, double addrspace(1)* %p, i64 %z1 ; <double addrspace(1)*> [#uses=1]
+ store double %tmp7, double addrspace(1)* %tmp8, align 8
+ %tmp9 = add i64 %j.01, 1 ; <i64> [#uses=2]
+ br label %bb2
bb2: ; preds = %bb1
- %tmp10 = icmp slt i64 %tmp9, %m ; <i1> [#uses=1]
- br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+ %tmp10 = icmp slt i64 %tmp9, %m ; <i1> [#uses=1]
+ br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
bb2.bb3_crit_edge: ; preds = %bb2
- br label %bb3
+ br label %bb3
bb3: ; preds = %bb2.preheader, %bb2.bb3_crit_edge
- %tmp11 = add i64 %i.02, 1 ; <i64> [#uses=2]
- br label %bb4
+ %tmp11 = add i64 %i.02, 1 ; <i64> [#uses=2]
+ br label %bb4
bb4: ; preds = %bb3
- %tmp12 = icmp slt i64 %tmp11, %n ; <i1> [#uses=1]
- br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+ %tmp12 = icmp slt i64 %tmp11, %n ; <i1> [#uses=1]
+ br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
bb4.return_crit_edge: ; preds = %bb4
- br label %bb4.return_crit_edge.split
+ br label %bb4.return_crit_edge.split
bb4.return_crit_edge.split: ; preds = %bb.nph3, %bb4.return_crit_edge
- br label %return
+ br label %return
bb.nph3: ; preds = %entry
- %tmp13 = icmp sgt i64 %m, 0 ; <i1> [#uses=1]
- %tmp14 = mul i64 %n, 37 ; <i64> [#uses=1]
- %tmp15 = mul i64 %tmp14, %o ; <i64> [#uses=1]
- %tmp16 = mul i64 %tmp15, %q ; <i64> [#uses=1]
- %tmp17 = mul i64 %n, 37 ; <i64> [#uses=1]
- %tmp18 = mul i64 %tmp17, %o ; <i64> [#uses=1]
- %tmp19 = mul i64 %tmp18, %q ; <i64> [#uses=1]
- br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+ %tmp13 = icmp sgt i64 %m, 0 ; <i1> [#uses=1]
+ %tmp14 = mul i64 %n, 37 ; <i64> [#uses=1]
+ %tmp15 = mul i64 %tmp14, %o ; <i64> [#uses=1]
+ %tmp16 = mul i64 %tmp15, %q ; <i64> [#uses=1]
+ %tmp17 = mul i64 %n, 37 ; <i64> [#uses=1]
+ %tmp18 = mul i64 %tmp17, %o ; <i64> [#uses=1]
+ %tmp19 = mul i64 %tmp18, %q ; <i64> [#uses=1]
+ br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
bb.nph3.split: ; preds = %bb.nph3
- br label %bb2.preheader
+ br label %bb2.preheader
bb2.preheader: ; preds = %bb.nph3.split, %bb4
- %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; <i64> [#uses=3]
- br i1 true, label %bb.nph, label %bb3
+ %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; <i64> [#uses=3]
+ br i1 true, label %bb.nph, label %bb3
return: ; preds = %bb4.return_crit_edge.split, %entry
- ret void
+ ret void
}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll
index 6bf066e11a87bd..9b44d2adb6324e 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll
@@ -1,10 +1,5 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -opaque-pointers=0 < %s -loop-reduce -S | FileCheck %s
-; CHECK: bb1:
-; CHECK: load double, double* [[IV:%[^,]+]]
-; CHECK: store double {{.*}}, double* [[IV]]
-; CHECK: getelementptr double, double*
-; CHECK-NOT: cast
-; CHECK: br {{.*}} label %bb1
; This test tests several things. The load and store should use the
; same address instead of having it computed twice, and SCEVExpander should
@@ -16,67 +11,117 @@
target datalayout = "e-p:64:64:64-n32:64"
define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
+; CHECK-LABEL: define void @foo
+; CHECK-SAME: (i64 [[N:%.*]], i64 [[M:%.*]], i64 [[O:%.*]], i64 [[Q:%.*]], double* nocapture [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i64 [[N]], 0
+; CHECK-NEXT: br i1 [[TMP]], label [[BB_NPH3:%.*]], label [[RETURN:%.*]]
+; CHECK: bb.nph:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[LSR_IV3:%.*]] = phi double* [ [[SCEVGEP4:%.*]], [[BB2:%.*]] ], [ [[LSR_IV:%.*]], [[BB_NPH:%.*]] ]
+; CHECK-NEXT: [[J_01:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB2]] ], [ 0, [[BB_NPH]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[LSR_IV3]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = fdiv double [[TMP6]], 2.100000e+00
+; CHECK-NEXT: store double [[TMP7]], double* [[LSR_IV3]], align 8
+; CHECK-NEXT: [[TMP9]] = add i64 [[J_01]], 1
+; CHECK-NEXT: br label [[BB2]]
+; CHECK: bb2:
+; CHECK-NEXT: [[SCEVGEP4]] = getelementptr double, double* [[LSR_IV3]], i64 1
+; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP9]], [[M]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[BB1]], label [[BB2_BB3_CRIT_EDGE:%.*]]
+; CHECK: bb2.bb3_crit_edge:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[I_02:%.*]], 1
+; CHECK-NEXT: br label [[BB4:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i1, i1* [[LSR_IV1:%.*]], i64 [[TMP3:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1* [[SCEVGEP2]] to double*
+; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[TMP11]], [[N]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[BB2_PREHEADER:%.*]], label [[BB4_RETURN_CRIT_EDGE:%.*]]
+; CHECK: bb4.return_crit_edge:
+; CHECK-NEXT: br label [[BB4_RETURN_CRIT_EDGE_SPLIT:%.*]]
+; CHECK: bb4.return_crit_edge.split:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: bb.nph3:
+; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[M]], 0
+; CHECK-NEXT: br i1 [[TMP13]], label [[BB_NPH3_SPLIT:%.*]], label [[BB4_RETURN_CRIT_EDGE_SPLIT]]
+; CHECK: bb.nph3.split:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[P]], i64 5203
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[Q]], [[O]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], [[N]]
+; CHECK-NEXT: [[TMP3]] = mul i64 [[TMP2]], 296
+; CHECK-NEXT: br label [[BB2_PREHEADER]]
+; CHECK: bb2.preheader:
+; CHECK-NEXT: [[LSR_IV]] = phi double* [ [[SCEVGEP]], [[BB_NPH3_SPLIT]] ], [ [[TMP0]], [[BB4]] ]
+; CHECK-NEXT: [[I_02]] = phi i64 [ [[TMP11]], [[BB4]] ], [ 0, [[BB_NPH3_SPLIT]] ]
+; CHECK-NEXT: [[LSR_IV1]] = bitcast double* [[LSR_IV]] to i1*
+; CHECK-NEXT: br i1 true, label [[BB_NPH]], label [[BB3]]
+; CHECK: return:
+; CHECK-NEXT: ret void
+;
entry:
- %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
- br i1 %tmp, label %bb.nph3, label %return
+ %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
+ br i1 %tmp, label %bb.nph3, label %return
bb.nph: ; preds = %bb2.preheader
- %tmp1 = mul i64 %tmp16, %i.02 ; <i64> [#uses=1]
- %tmp2 = mul i64 %tmp19, %i.02 ; <i64> [#uses=1]
- br label %bb1
+ %tmp1 = mul i64 %tmp16, %i.02 ; <i64> [#uses=1]
+ %tmp2 = mul i64 %tmp19, %i.02 ; <i64> [#uses=1]
+ br label %bb1
bb1: ; preds = %bb2, %bb.nph
- %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3]
- %tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1]
- %tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1]
- %z0 = add i64 %tmp3, 5203
- %tmp5 = getelementptr double, double* %p, i64 %z0 ; <double*> [#uses=1]
- %tmp6 = load double, double* %tmp5, align 8 ; <double> [#uses=1]
- %tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1]
- %z1 = add i64 %tmp4, 5203
- %tmp8 = getelementptr double, double* %p, i64 %z1 ; <double*> [#uses=1]
- store double %tmp7, double* %tmp8, align 8
- %tmp9 = add i64 %j.01, 1 ; <i64> [#uses=2]
- br label %bb2
+ %j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3]
+ %tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1]
+ %tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1]
+ %z0 = add i64 %tmp3, 5203
+ %tmp5 = getelementptr double, double* %p, i64 %z0 ; <double*> [#uses=1]
+ %tmp6 = load double, double* %tmp5, align 8 ; <double> [#uses=1]
+ %tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1]
+ %z1 = add i64 %tmp4, 5203
+ %tmp8 = getelementptr double, double* %p, i64 %z1 ; <double*> [#uses=1]
+ store double %tmp7, double* %tmp8, align 8
+ %tmp9 = add i64 %j.01, 1 ; <i64> [#uses=2]
+ br label %bb2
bb2: ; preds = %bb1
- %tmp10 = icmp slt i64 %tmp9, %m ; <i1> [#uses=1]
- br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+ %tmp10 = icmp slt i64 %tmp9, %m ; <i1> [#uses=1]
+ br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
bb2.bb3_crit_edge: ; preds = %bb2
- br label %bb3
+ br label %bb3
bb3: ; preds = %bb2.preheader, %bb2.bb3_crit_edge
- %tmp11 = add i64 %i.02, 1 ; <i64> [#uses=2]
- br label %bb4
+ %tmp11 = add i64 %i.02, 1 ; <i64> [#uses=2]
+ br label %bb4
bb4: ; preds = %bb3
- %tmp12 = icmp slt i64 %tmp11, %n ; <i1> [#uses=1]
- br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+ %tmp12 = icmp slt i64 %tmp11, %n ; <i1> [#uses=1]
+ br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
bb4.return_crit_edge: ; preds = %bb4
- br label %bb4.return_crit_edge.split
+ br label %bb4.return_crit_edge.split
bb4.return_crit_edge.split: ; preds = %bb.nph3, %bb4.return_crit_edge
- br label %return
+ br label %return
bb.nph3: ; preds = %entry
- %tmp13 = icmp sgt i64 %m, 0 ; <i1> [#uses=1]
- %tmp14 = mul i64 %n, 37 ; <i64> [#uses=1]
- %tmp15 = mul i64 %tmp14, %o ; <i64> [#uses=1]
- %tmp16 = mul i64 %tmp15, %q ; <i64> [#uses=1]
- %tmp17 = mul i64 %n, 37 ; <i64> [#uses=1]
- %tmp18 = mul i64 %tmp17, %o ; <i64> [#uses=1]
- %tmp19 = mul i64 %tmp18, %q ; <i64> [#uses=1]
- br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+ %tmp13 = icmp sgt i64 %m, 0 ; <i1> [#uses=1]
+ %tmp14 = mul i64 %n, 37 ; <i64> [#uses=1]
+ %tmp15 = mul i64 %tmp14, %o ; <i64> [#uses=1]
+ %tmp16 = mul i64 %tmp15, %q ; <i64> [#uses=1]
+ %tmp17 = mul i64 %n, 37 ; <i64> [#uses=1]
+ %tmp18 = mul i64 %tmp17, %o ; <i64> [#uses=1]
+ %tmp19 = mul i64 %tmp18, %q ; <i64> [#uses=1]
+ br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
bb.nph3.split: ; preds = %bb.nph3
- br label %bb2.preheader
+ br label %bb2.preheader
bb2.preheader: ; preds = %bb.nph3.split, %bb4
- %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; <i64> [#uses=3]
- br i1 true, label %bb.nph, label %bb3
+ %i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; <i64> [#uses=3]
+ br i1 true, label %bb.nph, label %bb3
return: ; preds = %bb4.return_crit_edge.split, %entry
- ret void
+ ret void
}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll b/llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll
index 3836030f3c0eef..6541b961c11c2e 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -opaque-pointers=0 < %s -loop-reduce -S | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -11,16 +12,42 @@ target triple = "thumbv6m-arm-none-eabi"
; "[LSR] Narrow search space by filtering non-optimal formulae with the
; same ScaledReg and Scale."
;
-; Due to a bug in ARMTargetLowering::isLegalAddressingMode LSR got
+; Due to a bug in ARMTargetLowering::isLegalAddressingMode LSR got
; 4*reg({0,+,-1}) and -4*reg({0,+,-1}) had the same cost for the Thumb1 target.
; Another issue was that LSR got that -1*reg was free for the Thumb1 target.
; Test case 01: -1*reg is not free for the Thumb1 target.
-;
-; CHECK-LABEL: @negativeOneCase
-; CHECK-NOT: mul
-; CHECK: ret i8
define i8* @negativeOneCase(i8* returned %a, i8* nocapture readonly %b, i32 %n) nounwind {
+; CHECK-LABEL: define i8* @negativeOneCase
+; CHECK-SAME: (i8* returned [[A:%.*]], i8* nocapture readonly [[B:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 -1
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ [[ADD_PTR]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[P_0]], i32 1
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[P_0]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[SCEVGEP5]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND2_PREHEADER:%.*]], label [[WHILE_COND]]
+; CHECK: while.cond2.preheader:
+; CHECK-NEXT: br label [[WHILE_COND2:%.*]]
+; CHECK: while.cond2:
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[WHILE_BODY5:%.*]] ], [ 0, [[WHILE_COND2_PREHEADER]] ]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[B]], i32 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, i8* [[INCDEC_PTR]], i32 [[LSR_IV]]
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[N]], [[LSR_IV]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[WHILE_END8:%.*]], label [[WHILE_BODY5]]
+; CHECK: while.body5:
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[SCEVGEP1]], align 1
+; CHECK-NEXT: store i8 [[TMP1]], i8* [[SCEVGEP3]], align 1
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
+; CHECK-NEXT: br label [[WHILE_COND2]]
+; CHECK: while.end8:
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, i8* [[INCDEC_PTR]], i32 [[N]]
+; CHECK-NEXT: store i8 0, i8* [[SCEVGEP4]], align 1
+; CHECK-NEXT: ret i8* [[A]]
+;
entry:
%add.ptr = getelementptr inbounds i8, i8* %a, i32 -1
br label %while.cond
@@ -58,11 +85,64 @@ while.end8: ; preds = %while.cond2
; Test case 02: 4*reg({0,+,-1}) and -4*reg({0,+,-1}) are not supported for
; the Thumb1 target.
-;
-; CHECK-LABEL: @negativeFourCase
-; CHECK-NOT: mul
-; CHECK: ret void
define void @negativeFourCase(i8* %ptr1, i32* %ptr2) nounwind {
+; CHECK-LABEL: define void @negativeFourCase
+; CHECK-SAME: (i8* [[PTR1:%.*]], i32* [[PTR2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_COND6_PREHEADER_US_I_I:%.*]]
+; CHECK: for.cond6.preheader.us.i.i:
+; CHECK-NEXT: [[ADDR_0108_US_I_I:%.*]] = phi i8* [ [[SCEVGEP_I_I:%.*]], [[IF_END48_US_I_I:%.*]] ], [ [[PTR1]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INC49_US_I_I:%.*]] = phi i32 [ [[INC50_US_I_I:%.*]], [[IF_END48_US_I_I]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[C1_0104_US_I_I:%.*]] = phi i32* [ [[C0_0103_US_I_I:%.*]], [[IF_END48_US_I_I]] ], [ [[PTR2]], [[ENTRY]] ]
+; CHECK-NEXT: [[C0_0103_US_I_I]] = phi i32* [ [[C1_0104_US_I_I]], [[IF_END48_US_I_I]] ], [ [[PTR2]], [[ENTRY]] ]
+; CHECK-NEXT: [[C0_0103_US_I_I1:%.*]] = bitcast i32* [[C0_0103_US_I_I]] to i8*
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[C1_0104_US_I_I]], i32 -1
+; CHECK-NEXT: [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
+; CHECK-NEXT: [[C1_0104_US_I_I7:%.*]] = bitcast i32* [[C1_0104_US_I_I]] to i8*
+; CHECK-NEXT: br label [[FOR_BODY8_US_I_I:%.*]]
+; CHECK: if.end48.us.i.i:
+; CHECK-NEXT: [[SCEVGEP_I_I]] = getelementptr i8, i8* [[ADDR_0108_US_I_I]], i32 256
+; CHECK-NEXT: [[INC50_US_I_I]] = add nuw nsw i32 [[INC49_US_I_I]], 1
+; CHECK-NEXT: [[EXITCOND110_I_I:%.*]] = icmp eq i32 [[INC50_US_I_I]], 256
+; CHECK-NEXT: br i1 [[EXITCOND110_I_I]], label [[EXIT_I:%.*]], label [[FOR_COND6_PREHEADER_US_I_I]]
+; CHECK: for.body8.us.i.i:
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_INC_US_I_I:%.*]] ], [ 0, [[FOR_COND6_PREHEADER_US_I_I]] ]
+; CHECK-NEXT: [[ADDR_198_US_I_I:%.*]] = phi i8* [ [[ADDR_0108_US_I_I]], [[FOR_COND6_PREHEADER_US_I_I]] ], [ [[INCDEC_PTR_US_I_I:%.*]], [[FOR_INC_US_I_I]] ]
+; CHECK-NEXT: [[INC_196_US_I_I:%.*]] = phi i32 [ 0, [[FOR_COND6_PREHEADER_US_I_I]] ], [ [[INC_2_US_I_I:%.*]], [[FOR_INC_US_I_I]] ]
+; CHECK-NEXT: [[INCDEC_PTR_US_I_I]] = getelementptr inbounds i8, i8* [[ADDR_198_US_I_I]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ADDR_198_US_I_I]], align 1
+; CHECK-NEXT: [[CMP9_US_I_I:%.*]] = icmp eq i8 [[TMP0]], -1
+; CHECK-NEXT: br i1 [[CMP9_US_I_I]], label [[IF_END37_US_I_I:%.*]], label [[IF_ELSE_US_I_I:%.*]]
+; CHECK: if.else.us.i.i:
+; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr i8, i8* [[C1_0104_US_I_I7]], i32 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP1011:%.*]] = bitcast i8* [[SCEVGEP10]] to i32*
+; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i32, i32* [[SCEVGEP1011]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP12]], align 4
+; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, i8* [[C1_0104_US_I_I7]], i32 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP89:%.*]] = bitcast i8* [[SCEVGEP8]] to i32*
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[SCEVGEP89]], align 4
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[SCEVGEP34]], i32 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP56:%.*]] = bitcast i8* [[SCEVGEP5]] to i32*
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SCEVGEP56]], align 4
+; CHECK-NEXT: br label [[IF_END37_US_I_I]]
+; CHECK: if.end37.us.i.i:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP3]], [[IF_ELSE_US_I_I]] ], [ 0, [[FOR_BODY8_US_I_I]] ]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[C0_0103_US_I_I1]], i32 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i8* [[SCEVGEP]] to i32*
+; CHECK-NEXT: store i32 [[TMP4]], i32* [[SCEVGEP2]], align 4
+; CHECK-NEXT: [[INC_US_I_I:%.*]] = add nsw i32 [[INC_196_US_I_I]], 1
+; CHECK-NEXT: [[CMP38_US_I_I:%.*]] = icmp sgt i32 [[INC_196_US_I_I]], 6
+; CHECK-NEXT: br i1 [[CMP38_US_I_I]], label [[IF_THEN40_US_I_I:%.*]], label [[FOR_INC_US_I_I]]
+; CHECK: if.then40.us.i.i:
+; CHECK-NEXT: br label [[FOR_INC_US_I_I]]
+; CHECK: for.inc.us.i.i:
+; CHECK-NEXT: [[INC_2_US_I_I]] = phi i32 [ 0, [[IF_THEN40_US_I_I]] ], [ [[INC_US_I_I]], [[IF_END37_US_I_I]] ]
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i32 [[LSR_IV]], 4
+; CHECK-NEXT: [[EXITCOND_I_I:%.*]] = icmp eq i32 1024, [[LSR_IV_NEXT]]
+; CHECK-NEXT: br i1 [[EXITCOND_I_I]], label [[IF_END48_US_I_I]], label [[FOR_BODY8_US_I_I]]
+; CHECK: exit.i:
+; CHECK-NEXT: ret void
+;
entry:
br label %for.cond6.preheader.us.i.i
diff --git a/llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll b/llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll
index 2e8c1772ae10a6..ac47e4ee47dfb8 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; PR41445: This test checks the case when LSR split critical edge
; and phi node has other pending fixup operands
@@ -12,6 +13,103 @@ target triple = "x86_64-unknown-linux-gnu"
; All the other PHI inputs besides %tmp1 go to a new phi node.
; This test checks that LSR is still able to rewrite %tmp2, %tmp3, %tmp4.
define i32 @foo(i32* %A, i32 %t) {
+; CHECK-LABEL: define i32 @foo
+; CHECK-SAME: (i32* [[A:%.*]], i32 [[T:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP_32:%.*]]
+; CHECK: loop.exit.loopexitsplitsplitsplit:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], -1
+; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]]
+; CHECK: ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge:
+; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_38:%.*]] ]
+; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]]
+; CHECK: loop.exit.loopexitsplitsplit:
+; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ]
+; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]]
+; CHECK: ifmerge.42.loop.exit.loopexitsplit_crit_edge:
+; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_42:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1
+; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT]]
+; CHECK: loop.exit.loopexitsplit:
+; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ]
+; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]]
+; CHECK: then.34.loop.exit.loopexit_crit_edge:
+; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV]], [[THEN_34:%.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2
+; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT]]
+; CHECK: loop.exit.loopexit:
+; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH:%.*]] = phi i64 [ [[TMP2]], [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLIT]] ]
+; CHECK-NEXT: br label [[LOOP_EXIT:%.*]]
+; CHECK: loop.exit:
+; CHECK-NEXT: [[INDVARS_IV_LCSSA:%.*]] = phi i64 [ 48, [[THEN_8:%.*]] ], [ 49, [[THEN_8_1:%.*]] ], [ [[INDVARS_IV_LCSSA_PH]], [[LOOP_EXIT_LOOPEXIT]] ]
+; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV_LCSSA]] to i32
+; CHECK-NEXT: br label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ]
+; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
+; CHECK: loop.32:
+; CHECK-NEXT: [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ]
+; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i32, i32* [[SCEVGEP7]], i64 -1
+; CHECK-NEXT: [[GEPLOAD:%.*]] = load i32, i32* [[SCEVGEP8]], align 4
+; CHECK-NEXT: [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]]
+; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34:%.*]]
+; CHECK: then.34:
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[SCEVGEP5]], i64 -2
+; CHECK-NEXT: [[GEPLOAD18:%.*]] = load i32, i32* [[SCEVGEP6]], align 4
+; CHECK-NEXT: [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]]
+; CHECK-NEXT: br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]]
+; CHECK: ifmerge.34:
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]]
+; CHECK-NEXT: [[GEPLOAD20:%.*]] = load i32, i32* [[SCEVGEP4]], align 4
+; CHECK-NEXT: [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]]
+; CHECK-NEXT: [[CMP_39:%.*]] = icmp slt i32 [[GEPLOAD]], [[T]]
+; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]]
+; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]]
+; CHECK: ifmerge.38:
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[SCEVGEP2]], i64 1
+; CHECK-NEXT: [[GEPLOAD24:%.*]] = load i32, i32* [[SCEVGEP3]], align 4
+; CHECK-NEXT: [[CMP_42:%.*]] = icmp sgt i32 [[GEPLOAD24]], [[T]]
+; CHECK-NEXT: [[CMP_43:%.*]] = icmp slt i32 [[GEPLOAD20]], [[T]]
+; CHECK-NEXT: [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]]
+; CHECK-NEXT: br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]]
+; CHECK: ifmerge.42:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 2
+; CHECK-NEXT: [[GEPLOAD28:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
+; CHECK-NEXT: [[CMP_46:%.*]] = icmp sgt i32 [[GEPLOAD28]], [[T]]
+; CHECK-NEXT: [[CMP_47:%.*]] = icmp slt i32 [[GEPLOAD24]], [[T]]
+; CHECK-NEXT: [[OR_COND56:%.*]] = and i1 [[CMP_46]], [[CMP_47]]
+; CHECK-NEXT: br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]]
+; CHECK: ifmerge.46:
+; CHECK-NEXT: [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
+; CHECK-NEXT: [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12
+; CHECK-NEXT: br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]]
+; CHECK: loop.25:
+; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 49
+; CHECK-NEXT: [[GEPLOAD32:%.*]] = load i32, i32* [[ARRAYIDX31]], align 4
+; CHECK-NEXT: [[CMP_8:%.*]] = icmp sgt i32 [[GEPLOAD32]], [[T]]
+; CHECK-NEXT: br i1 [[CMP_8]], label [[THEN_8]], label [[IFMERGE_8]]
+; CHECK: then.8:
+; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
+; CHECK-NEXT: [[GEPLOAD34:%.*]] = load i32, i32* [[ARRAYIDX33]], align 4
+; CHECK-NEXT: [[CMP_15:%.*]] = icmp slt i32 [[GEPLOAD34]], [[T]]
+; CHECK-NEXT: br i1 [[CMP_15]], label [[LOOP_EXIT]], label [[IFMERGE_8]]
+; CHECK: ifmerge.8:
+; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 50
+; CHECK-NEXT: [[GEPLOAD32_1:%.*]] = load i32, i32* [[ARRAYIDX31_1]], align 4
+; CHECK-NEXT: [[CMP_8_1:%.*]] = icmp sgt i32 [[GEPLOAD32_1]], [[T]]
+; CHECK-NEXT: br i1 [[CMP_8_1]], label [[THEN_8_1]], label [[FOR_END]]
+; CHECK: then.8.1:
+; CHECK-NEXT: [[ARRAYIDX33_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 49
+; CHECK-NEXT: [[GEPLOAD34_1:%.*]] = load i32, i32* [[ARRAYIDX33_1]], align 4
+; CHECK-NEXT: [[CMP_15_1:%.*]] = icmp slt i32 [[GEPLOAD34_1]], [[T]]
+; CHECK-NEXT: br i1 [[CMP_15_1]], label [[LOOP_EXIT]], label [[FOR_END]]
+;
entry:
br label %loop.32
@@ -25,8 +123,6 @@ for.end: ; preds = %then.8.1, %ifmerge.
ret i32 %i.0.lcssa
; shl instruction will be dead eliminated when all it's uses will be rewritten.
-; CHECK-LABEL: loop.32:
-; CHECK-NOT: shl
loop.32: ; preds = %ifmerge.46, %entry
%i1.i64.0 = phi i64 [ 0, %entry ], [ %nextivloop.32, %ifmerge.46 ]
%tmp1 = shl i64 %i1.i64.0, 2
@@ -36,7 +132,6 @@ loop.32: ; preds = %ifmerge.46, %entry
%cmp.34 = icmp sgt i32 %gepload, %t
br i1 %cmp.34, label %then.34, label %ifmerge.34
-; CHECK-LABEL: then.34:
then.34: ; preds = %loop.32
%arrayIdx17 = getelementptr inbounds i32, i32* %A, i64 %tmp1
%gepload18 = load i32, i32* %arrayIdx17, align 4
More information about the llvm-commits
mailing list