[llvm] 8c5352c - [LV] Add additional cost and folding test coverage. (NFC)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 14 14:19:48 PST 2026
Author: Florian Hahn
Date: 2026-01-14T22:19:11Z
New Revision: 8c5352cf3e14ec0c56f592091899d229de8436a7
URL: https://github.com/llvm/llvm-project/commit/8c5352cf3e14ec0c56f592091899d229de8436a7
DIFF: https://github.com/llvm/llvm-project/commit/8c5352cf3e14ec0c56f592091899d229de8436a7.diff
LOG: [LV] Add additional cost and folding test coverage. (NFC)
Added:
llvm/test/Transforms/LoopVectorize/AArch64/hoist-predicated-loads-scalable.ll
llvm/test/Transforms/LoopVectorize/select-folds.ll
Modified:
llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
llvm/test/Transforms/LoopVectorize/cast-induction.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/hoist-predicated-loads-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/hoist-predicated-loads-scalable.ll
new file mode 100644
index 0000000000000..308f6dbd12232
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/hoist-predicated-loads-scalable.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+target triple = "arm64-apple-macosx"
+
+define void @test_predicated_store(ptr %src, ptr %dst) #0 {
+; CHECK-LABEL: define void @test_predicated_store(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP_SRC]], align 8
+; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[L]], 10
+; CHECK-NEXT: br i1 false, label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: store i64 0, ptr [[GEP_DST]], align 8
+; CHECK-NEXT: [[GEP_DST_OFF:%.*]] = getelementptr i8, ptr [[GEP_DST]], i64 -31984
+; CHECK-NEXT: store i64 0, ptr [[GEP_DST_OFF]], align 8
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: store i64 0, ptr [[GEP_DST_1]], align 8
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 20
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 256
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.src = getelementptr inbounds i64, ptr %src, i64 %iv
+ %l = load i64, ptr %gep.src, align 8
+ %c = icmp eq i64 %l, 10
+ br i1 false, label %if.end, label %if.then
+
+if.then:
+ %gep.dst = getelementptr i64, ptr %dst, i64 %iv
+ store i64 0, ptr %gep.dst, align 8
+ %gep.dst.off = getelementptr i8, ptr %gep.dst, i64 -31984
+ store i64 0, ptr %gep.dst.off, align 8
+ br label %loop.latch
+
+if.end:
+ %gep.dst.1 = getelementptr i64, ptr %dst, i64 %iv
+ store i64 0, ptr %gep.dst.1, align 8
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add i64 %iv, 20
+ %ec = icmp ne i64 %iv.next, 256
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+attributes #0 = { "target-cpu"="neoverse-512tvb" }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index 3813560d9300a..51be3699719e9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -1069,6 +1069,265 @@ exit:
ret void
}
+define void @replicated_load_wide_store_derived_iv_zext_and(ptr noalias %src, ptr %dst, i32 %step) {
+; I64-LABEL: define void @replicated_load_wide_store_derived_iv_zext_and(
+; I64-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i32 [[STEP:%.*]]) {
+; I64-NEXT: [[ENTRY:.*:]]
+; I64-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
+; I64: [[VECTOR_SCEVCHECK]]:
+; I64-NEXT: [[TMP0:%.*]] = trunc i32 [[STEP]] to i1
+; I64-NEXT: [[MUL:%.*]] = call { i1, i1 } @llvm.umul.with.overflow.i1(i1 [[TMP0]], i1 false)
+; I64-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i1, i1 } [[MUL]], 0
+; I64-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i1, i1 } [[MUL]], 1
+; I64-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i1 [[MUL_RESULT]], i1 false
+; I64-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[MUL_OVERFLOW]]
+; I64-NEXT: [[TMP3:%.*]] = or i1 [[TMP2]], [[TMP0]]
+; I64-NEXT: [[TMP4:%.*]] = sext i1 [[TMP0]] to i32
+; I64-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP4]]
+; I64-NEXT: [[TMP5:%.*]] = sub i32 0, [[STEP]]
+; I64-NEXT: [[TMP6:%.*]] = icmp slt i32 [[STEP]], 0
+; I64-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 [[STEP]]
+; I64-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP7]], i32 128)
+; I64-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
+; I64-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
+; I64-NEXT: [[TMP8:%.*]] = sub i32 0, [[MUL_RESULT2]]
+; I64-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[TMP8]], 0
+; I64-NEXT: [[TMP10:%.*]] = select i1 [[TMP6]], i1 [[TMP9]], i1 false
+; I64-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW3]]
+; I64-NEXT: [[TMP12:%.*]] = or i1 [[TMP3]], [[IDENT_CHECK]]
+; I64-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[TMP11]]
+; I64-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; I64: [[VECTOR_PH]]:
+; I64-NEXT: [[TMP14:%.*]] = mul i32 128, [[STEP]]
+; I64-NEXT: br label %[[VECTOR_BODY:.*]]
+; I64: [[VECTOR_BODY]]:
+; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; I64-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
+; I64-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
+; I64-NEXT: [[TMP15:%.*]] = mul i32 0, [[STEP]]
+; I64-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], [[TMP15]]
+; I64-NEXT: [[TMP17:%.*]] = mul i32 1, [[STEP]]
+; I64-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], [[TMP17]]
+; I64-NEXT: [[TMP19:%.*]] = mul i32 2, [[STEP]]
+; I64-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], [[TMP19]]
+; I64-NEXT: [[TMP21:%.*]] = mul i32 3, [[STEP]]
+; I64-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], [[TMP21]]
+; I64-NEXT: [[TMP23:%.*]] = mul i32 4, [[STEP]]
+; I64-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], [[TMP23]]
+; I64-NEXT: [[TMP25:%.*]] = mul i32 5, [[STEP]]
+; I64-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], [[TMP25]]
+; I64-NEXT: [[TMP27:%.*]] = mul i32 6, [[STEP]]
+; I64-NEXT: [[TMP28:%.*]] = add i32 [[OFFSET_IDX]], [[TMP27]]
+; I64-NEXT: [[TMP29:%.*]] = mul i32 7, [[STEP]]
+; I64-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], [[TMP29]]
+; I64-NEXT: [[TMP31:%.*]] = zext i32 [[TMP16]] to i64
+; I64-NEXT: [[TMP32:%.*]] = zext i32 [[TMP18]] to i64
+; I64-NEXT: [[TMP33:%.*]] = zext i32 [[TMP20]] to i64
+; I64-NEXT: [[TMP34:%.*]] = zext i32 [[TMP22]] to i64
+; I64-NEXT: [[TMP35:%.*]] = zext i32 [[TMP24]] to i64
+; I64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP26]] to i64
+; I64-NEXT: [[TMP37:%.*]] = zext i32 [[TMP28]] to i64
+; I64-NEXT: [[TMP38:%.*]] = zext i32 [[TMP30]] to i64
+; I64-NEXT: [[TMP39:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP31]]
+; I64-NEXT: [[TMP40:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP32]]
+; I64-NEXT: [[TMP41:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP33]]
+; I64-NEXT: [[TMP42:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP34]]
+; I64-NEXT: [[TMP43:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP35]]
+; I64-NEXT: [[TMP44:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP36]]
+; I64-NEXT: [[TMP45:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP37]]
+; I64-NEXT: [[TMP46:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP38]]
+; I64-NEXT: [[TMP47:%.*]] = load float, ptr [[TMP39]], align 4
+; I64-NEXT: [[TMP48:%.*]] = load float, ptr [[TMP40]], align 4
+; I64-NEXT: [[TMP49:%.*]] = load float, ptr [[TMP41]], align 4
+; I64-NEXT: [[TMP50:%.*]] = load float, ptr [[TMP42]], align 4
+; I64-NEXT: [[TMP51:%.*]] = insertelement <4 x float> poison, float [[TMP47]], i32 0
+; I64-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP48]], i32 1
+; I64-NEXT: [[TMP53:%.*]] = insertelement <4 x float> [[TMP52]], float [[TMP49]], i32 2
+; I64-NEXT: [[TMP54:%.*]] = insertelement <4 x float> [[TMP53]], float [[TMP50]], i32 3
+; I64-NEXT: [[TMP55:%.*]] = load float, ptr [[TMP43]], align 4
+; I64-NEXT: [[TMP56:%.*]] = load float, ptr [[TMP44]], align 4
+; I64-NEXT: [[TMP57:%.*]] = load float, ptr [[TMP45]], align 4
+; I64-NEXT: [[TMP58:%.*]] = load float, ptr [[TMP46]], align 4
+; I64-NEXT: [[TMP59:%.*]] = insertelement <4 x float> poison, float [[TMP55]], i32 0
+; I64-NEXT: [[TMP60:%.*]] = insertelement <4 x float> [[TMP59]], float [[TMP56]], i32 1
+; I64-NEXT: [[TMP61:%.*]] = insertelement <4 x float> [[TMP60]], float [[TMP57]], i32 2
+; I64-NEXT: [[TMP62:%.*]] = insertelement <4 x float> [[TMP61]], float [[TMP58]], i32 3
+; I64-NEXT: [[TMP63:%.*]] = getelementptr float, ptr [[DST]], i64 [[INDEX]]
+; I64-NEXT: [[TMP64:%.*]] = getelementptr float, ptr [[TMP63]], i64 4
+; I64-NEXT: store <4 x float> [[TMP54]], ptr [[TMP63]], align 4
+; I64-NEXT: store <4 x float> [[TMP62]], ptr [[TMP64]], align 4
+; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; I64-NEXT: [[TMP65:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; I64-NEXT: br i1 [[TMP65]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; I64: [[MIDDLE_BLOCK]]:
+; I64-NEXT: br label %[[SCALAR_PH]]
+; I64: [[SCALAR_PH]]:
+;
+; I32-LABEL: define void @replicated_load_wide_store_derived_iv_zext_and(
+; I32-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i32 [[STEP:%.*]]) {
+; I32-NEXT: [[ENTRY:.*]]:
+; I32-NEXT: br label %[[LOOP:.*]]
+; I32: [[LOOP]]:
+; I32-NEXT: [[IV_0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_0_NEXT:%.*]], %[[LOOP]] ]
+; I32-NEXT: [[IV_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
+; I32-NEXT: [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64
+; I32-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_EXT]]
+; I32-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; I32-NEXT: [[GEP_DST:%.*]] = getelementptr float, ptr [[DST]], i64 [[IV_0]]
+; I32-NEXT: store float [[L]], ptr [[GEP_DST]], align 4
+; I32-NEXT: [[IV_1_ADD:%.*]] = and i32 [[IV_1]], 1
+; I32-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1_ADD]], [[STEP]]
+; I32-NEXT: [[IV_0_NEXT]] = add i64 [[IV_0]], 1
+; I32-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_0]], 128
+; I32-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; I32: [[EXIT]]:
+; I32-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.0.next, %loop ]
+ %iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop ]
+ %iv.ext = zext i32 %iv.1 to i64
+ %gep.src = getelementptr float, ptr %src, i64 %iv.ext
+ %l = load float, ptr %gep.src, align 4
+ %gep.dst = getelementptr float, ptr %dst, i64 %iv.0
+ store float %l, ptr %gep.dst, align 4
+ %iv.1.add = and i32 %iv.1, 1
+ %iv.1.next = add i32 %iv.1.add, %step
+ %iv.0.next = add i64 %iv.0, 1
+ %ec = icmp eq i64 %iv.0, 128
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @replicated_load_wide_store_derived_iv_zext_and2(ptr noalias %dst, ptr noalias %src, i32 %step) {
+; I64-LABEL: define void @replicated_load_wide_store_derived_iv_zext_and2(
+; I64-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], i32 [[STEP:%.*]]) {
+; I64-NEXT: [[ENTRY:.*:]]
+; I64-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
+; I64: [[VECTOR_SCEVCHECK]]:
+; I64-NEXT: [[TMP0:%.*]] = trunc i32 [[STEP]] to i1
+; I64-NEXT: [[MUL:%.*]] = call { i1, i1 } @llvm.umul.with.overflow.i1(i1 [[TMP0]], i1 false)
+; I64-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i1, i1 } [[MUL]], 0
+; I64-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i1, i1 } [[MUL]], 1
+; I64-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i1 [[MUL_RESULT]], i1 false
+; I64-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[MUL_OVERFLOW]]
+; I64-NEXT: [[TMP3:%.*]] = or i1 [[TMP2]], [[TMP0]]
+; I64-NEXT: [[TMP4:%.*]] = sext i1 [[TMP0]] to i32
+; I64-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP4]]
+; I64-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[IDENT_CHECK]]
+; I64-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; I64: [[VECTOR_PH]]:
+; I64-NEXT: [[TMP6:%.*]] = mul i32 128, [[STEP]]
+; I64-NEXT: br label %[[VECTOR_BODY:.*]]
+; I64: [[VECTOR_BODY]]:
+; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; I64-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
+; I64-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
+; I64-NEXT: [[TMP7:%.*]] = mul i32 0, [[STEP]]
+; I64-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], [[TMP7]]
+; I64-NEXT: [[TMP9:%.*]] = mul i32 1, [[STEP]]
+; I64-NEXT: [[TMP10:%.*]] = add i32 [[OFFSET_IDX]], [[TMP9]]
+; I64-NEXT: [[TMP11:%.*]] = mul i32 2, [[STEP]]
+; I64-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], [[TMP11]]
+; I64-NEXT: [[TMP13:%.*]] = mul i32 3, [[STEP]]
+; I64-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], [[TMP13]]
+; I64-NEXT: [[TMP15:%.*]] = mul i32 4, [[STEP]]
+; I64-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], [[TMP15]]
+; I64-NEXT: [[TMP17:%.*]] = mul i32 5, [[STEP]]
+; I64-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], [[TMP17]]
+; I64-NEXT: [[TMP19:%.*]] = mul i32 6, [[STEP]]
+; I64-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], [[TMP19]]
+; I64-NEXT: [[TMP21:%.*]] = mul i32 7, [[STEP]]
+; I64-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], [[TMP21]]
+; I64-NEXT: [[TMP23:%.*]] = zext i32 [[TMP8]] to i64
+; I64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP10]] to i64
+; I64-NEXT: [[TMP25:%.*]] = zext i32 [[TMP12]] to i64
+; I64-NEXT: [[TMP26:%.*]] = zext i32 [[TMP14]] to i64
+; I64-NEXT: [[TMP27:%.*]] = zext i32 [[TMP16]] to i64
+; I64-NEXT: [[TMP28:%.*]] = zext i32 [[TMP18]] to i64
+; I64-NEXT: [[TMP29:%.*]] = zext i32 [[TMP20]] to i64
+; I64-NEXT: [[TMP30:%.*]] = zext i32 [[TMP22]] to i64
+; I64-NEXT: [[TMP31:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP23]]
+; I64-NEXT: [[TMP32:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP24]]
+; I64-NEXT: [[TMP33:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP25]]
+; I64-NEXT: [[TMP34:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP26]]
+; I64-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP27]]
+; I64-NEXT: [[TMP36:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP28]]
+; I64-NEXT: [[TMP37:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP29]]
+; I64-NEXT: [[TMP38:%.*]] = getelementptr float, ptr [[SRC]], i64 [[TMP30]]
+; I64-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4
+; I64-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP32]], align 4
+; I64-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4
+; I64-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4
+; I64-NEXT: [[TMP43:%.*]] = insertelement <4 x float> poison, float [[TMP39]], i32 0
+; I64-NEXT: [[TMP44:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i32 1
+; I64-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP41]], i32 2
+; I64-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP42]], i32 3
+; I64-NEXT: [[TMP47:%.*]] = load float, ptr [[TMP35]], align 4
+; I64-NEXT: [[TMP48:%.*]] = load float, ptr [[TMP36]], align 4
+; I64-NEXT: [[TMP49:%.*]] = load float, ptr [[TMP37]], align 4
+; I64-NEXT: [[TMP50:%.*]] = load float, ptr [[TMP38]], align 4
+; I64-NEXT: [[TMP51:%.*]] = insertelement <4 x float> poison, float [[TMP47]], i32 0
+; I64-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP48]], i32 1
+; I64-NEXT: [[TMP53:%.*]] = insertelement <4 x float> [[TMP52]], float [[TMP49]], i32 2
+; I64-NEXT: [[TMP54:%.*]] = insertelement <4 x float> [[TMP53]], float [[TMP50]], i32 3
+; I64-NEXT: [[TMP55:%.*]] = getelementptr float, ptr [[DST]], i64 [[INDEX]]
+; I64-NEXT: [[TMP56:%.*]] = getelementptr float, ptr [[TMP55]], i64 4
+; I64-NEXT: store <4 x float> [[TMP46]], ptr [[TMP55]], align 4
+; I64-NEXT: store <4 x float> [[TMP54]], ptr [[TMP56]], align 4
+; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; I64-NEXT: [[TMP57:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; I64-NEXT: br i1 [[TMP57]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; I64: [[MIDDLE_BLOCK]]:
+; I64-NEXT: br label %[[SCALAR_PH]]
+; I64: [[SCALAR_PH]]:
+;
+; I32-LABEL: define void @replicated_load_wide_store_derived_iv_zext_and2(
+; I32-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], i32 [[STEP:%.*]]) {
+; I32-NEXT: [[ENTRY:.*]]:
+; I32-NEXT: br label %[[LOOP:.*]]
+; I32: [[LOOP]]:
+; I32-NEXT: [[IV_0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_0_NEXT:%.*]], %[[LOOP]] ]
+; I32-NEXT: [[IV_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
+; I32-NEXT: [[IV_1_AND:%.*]] = and i32 [[IV_1]], 1
+; I32-NEXT: [[IV_1_EXT:%.*]] = zext i32 [[IV_1_AND]] to i64
+; I32-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_1_EXT]]
+; I32-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; I32-NEXT: [[GEP_DST:%.*]] = getelementptr float, ptr [[DST]], i64 [[IV_0]]
+; I32-NEXT: store float [[L]], ptr [[GEP_DST]], align 4
+; I32-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1_AND]], [[STEP]]
+; I32-NEXT: [[IV_0_NEXT]] = add i64 [[IV_0]], 1
+; I32-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_0]], 128
+; I32-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; I32: [[EXIT]]:
+; I32-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.0.next, %loop ]
+ %iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop ]
+ %iv.1.and = and i32 %iv.1, 1
+ %iv.1.ext = zext i32 %iv.1.and to i64
+ %gep.src = getelementptr float, ptr %src, i64 %iv.1.ext
+ %l = load float, ptr %gep.src, align 4
+ %gep.dst = getelementptr float, ptr %dst, i64 %iv.0
+ store float %l, ptr %gep.dst, align 4
+ %iv.1.next = add i32 %iv.1.and, %step
+ %iv.0.next = add i64 %iv.0, 1
+ %ec = icmp eq i64 %iv.0, 128
+ br i1 %ec, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
attributes #0 = { "target-cpu"="znver2" }
!0 = distinct !{!0, !1}
diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index 0b58bb65e2bc5..2764a61728fe3 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck --check-prefix=VF4 %s
; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S %s | FileCheck --check-prefix=IC2 %s
@@ -8,19 +9,46 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
@a = common global [2048 x i32] zeroinitializer, align 16
define void @example12() {
-; VF4-LABEL: @example12(
-; VF4-LABEL: vector.body:
-; VF4: [[VEC_IND:%.+]] = phi <4 x i32>
-; VF4: store <4 x i32> [[VEC_IND]]
-; VF4: middle.block:
+; VF4-LABEL: define void @example12() {
+; VF4-NEXT: [[ENTRY:.*:]]
+; VF4-NEXT: br label %[[VECTOR_PH:.*]]
+; VF4: [[VECTOR_PH]]:
+; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4: [[VECTOR_BODY]]:
+; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[INDEX]]
+; VF4-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP0]], align 4
+; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; VF4-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; VF4-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VF4: [[MIDDLE_BLOCK]]:
+; VF4-NEXT: br label %[[EXIT:.*]]
+; VF4: [[EXIT]]:
+; VF4-NEXT: ret void
;
-; IC2-LABEL: @example12(
-; IC2-LABEL: vector.body:
-; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
-; IC2: [[TRUNC:%.+]] = trunc i64 [[INDEX]] to i32
-; IC2-NEXT: [[TRUNC1:%.+]] = add i32 [[TRUNC]], 1
-; IC2: store i32 [[TRUNC]],
-; IC2-NEXT: store i32 [[TRUNC1]],
+; IC2-LABEL: define void @example12() {
+; IC2-NEXT: [[ENTRY:.*:]]
+; IC2-NEXT: br label %[[VECTOR_PH:.*]]
+; IC2: [[VECTOR_PH]]:
+; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC2: [[VECTOR_BODY]]:
+; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; IC2-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32
+; IC2-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[INDEX]]
+; IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[TMP0]]
+; IC2-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4
+; IC2-NEXT: store i32 [[TMP2]], ptr [[TMP4]], align 4
+; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; IC2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; IC2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IC2: [[MIDDLE_BLOCK]]:
+; IC2-NEXT: br label %[[EXIT:.*]]
+; IC2: [[EXIT]]:
+; IC2-NEXT: ret void
;
entry:
br label %loop
@@ -40,19 +68,70 @@ exit:
}
define void @redundant_iv_cast(ptr %dst) {
-; VF4-LABEL: @redundant_iv_cast
-; VF4: vector.body:
-; VF4: [[VEC_IND:%.+]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ]
-; VF4: store <4 x i16> [[VEC_IND]]
-; VF4: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
+; VF4-LABEL: define void @redundant_iv_cast(
+; VF4-SAME: ptr [[DST:%.*]]) {
+; VF4-NEXT: [[ENTRY:.*:]]
+; VF4-NEXT: br label %[[VECTOR_PH:.*]]
+; VF4: [[VECTOR_PH]]:
+; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4: [[VECTOR_BODY]]:
+; VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
+; VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i16 [[OFFSET_IDX]]
+; VF4-NEXT: store <4 x i16> [[VEC_IND]], ptr [[TMP0]], align 2
+; VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
+; VF4-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10000
+; VF4-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF4: [[MIDDLE_BLOCK]]:
+; VF4-NEXT: br label %[[SCALAR_PH:.*]]
+; VF4: [[SCALAR_PH]]:
+; VF4-NEXT: br label %[[LOOP:.*]]
+; VF4: [[LOOP]]:
+; VF4-NEXT: [[J_0:%.*]] = phi i16 [ 10000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ]
+; VF4-NEXT: [[EXT:%.*]] = zext i16 [[J_0]] to i32
+; VF4-NEXT: [[TRUNC:%.*]] = trunc i32 [[EXT]] to i16
+; VF4-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i16 [[J_0]]
+; VF4-NEXT: store i16 [[TRUNC]], ptr [[GEP]], align 2
+; VF4-NEXT: [[TMP2:%.*]] = icmp eq i16 10000, [[J_0]]
+; VF4-NEXT: [[INC]] = add i16 [[J_0]], 1
+; VF4-NEXT: br i1 [[TMP2]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF4: [[EXIT]]:
+; VF4-NEXT: ret void
;
-; IC2-LABEL: @redundant_iv_cast
-; IC2: vector.body:
-; IC2-NEXT: [[CAN_IV:%.+]] = phi i32 [ 0, %vector.ph ], [ [[CAN_IV_NEXT:%.+]], %vector.body ]
-; IC2-NEXT: [[OFFSET_IDX:%.+]] = trunc i32 [[CAN_IV]] to i16
-; IC2-NEXT: [[P1:%.+]] = add i16 [[OFFSET_IDX]], 1
-; IC2: store i16 [[OFFSET_IDX]]
-; IC2-NEXT: store i16 [[P1]]
+; IC2-LABEL: define void @redundant_iv_cast(
+; IC2-SAME: ptr [[DST:%.*]]) {
+; IC2-NEXT: [[ENTRY:.*:]]
+; IC2-NEXT: br label %[[VECTOR_PH:.*]]
+; IC2: [[VECTOR_PH]]:
+; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC2: [[VECTOR_BODY]]:
+; IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC2-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
+; IC2-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 1
+; IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i16 [[OFFSET_IDX]]
+; IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i16 [[TMP0]]
+; IC2-NEXT: store i16 [[OFFSET_IDX]], ptr [[TMP1]], align 2
+; IC2-NEXT: store i16 [[TMP0]], ptr [[TMP2]], align 2
+; IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; IC2-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10000
+; IC2-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC2: [[MIDDLE_BLOCK]]:
+; IC2-NEXT: br label %[[SCALAR_PH:.*]]
+; IC2: [[SCALAR_PH]]:
+; IC2-NEXT: br label %[[LOOP:.*]]
+; IC2: [[LOOP]]:
+; IC2-NEXT: [[J_0:%.*]] = phi i16 [ 10000, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ]
+; IC2-NEXT: [[EXT:%.*]] = zext i16 [[J_0]] to i32
+; IC2-NEXT: [[TRUNC:%.*]] = trunc i32 [[EXT]] to i16
+; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i16 [[J_0]]
+; IC2-NEXT: store i16 [[TRUNC]], ptr [[GEP]], align 2
+; IC2-NEXT: [[TMP4:%.*]] = icmp eq i16 10000, [[J_0]]
+; IC2-NEXT: [[INC]] = add i16 [[J_0]], 1
+; IC2-NEXT: br i1 [[TMP4]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC2: [[EXIT]]:
+; IC2-NEXT: ret void
;
entry:
br label %loop
@@ -74,37 +153,73 @@ exit:
define void @cast_variable_step(i64 %step) {
-; VF4-LABEL: @cast_variable_step(
-; VF4-LABEL: vector.body:
-; VF4: [[VEC_IND:%.+]] = phi <4 x i32>
-; VF4: store <4 x i32> [[VEC_IND]]
-; VF4: middle.block:
+; VF4-LABEL: define void @cast_variable_step(
+; VF4-SAME: i64 [[STEP:%.*]]) {
+; VF4-NEXT: [[ENTRY:.*:]]
+; VF4-NEXT: br label %[[VECTOR_PH:.*]]
+; VF4: [[VECTOR_PH]]:
+; VF4-NEXT: [[TMP0:%.*]] = trunc i64 [[STEP]] to i32
+; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
+; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; VF4-NEXT: [[TMP1:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[BROADCAST_SPLAT]]
+; VF4-NEXT: [[INDUCTION:%.*]] = add <4 x i32> splat (i32 10), [[TMP1]]
+; VF4-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0]], 2
+; VF4-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
+; VF4-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4: [[VECTOR_BODY]]:
+; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[INDEX]]
+; VF4-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP3]], align 4
+; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
+; VF4-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; VF4-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF4: [[MIDDLE_BLOCK]]:
+; VF4-NEXT: br label %[[EXIT:.*]]
+; VF4: [[EXIT]]:
+; VF4-NEXT: ret void
;
-; IC2-LABEL: @cast_variable_step(
-; IC2: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
-; IC2: br label %vector.body
-
-; IC2-LABEL: vector.body:
-; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
-; IC2: [[MUL:%.+]] = mul i64 %index, %step
-; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]]
-; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32
-; IC2-NEXT: [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]]
-; IC2-NEXT: [[T1:%.+]] = add i32 [[TRUNC_OFF]], [[STEP1]]
-; IC2: store i32 [[TRUNC_OFF]],
-; IC2-NEXT: store i32 [[T1]],
+; IC2-LABEL: define void @cast_variable_step(
+; IC2-SAME: i64 [[STEP:%.*]]) {
+; IC2-NEXT: [[ENTRY:.*:]]
+; IC2-NEXT: br label %[[VECTOR_PH:.*]]
+; IC2: [[VECTOR_PH]]:
+; IC2-NEXT: [[TMP0:%.*]] = trunc i64 [[STEP]] to i32
+; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC2: [[VECTOR_BODY]]:
+; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; IC2-NEXT: [[TMP2:%.*]] = mul i64 [[INDEX]], [[STEP]]
+; IC2-NEXT: [[OFFSET_IDX:%.*]] = add i64 10, [[TMP2]]
+; IC2-NEXT: [[TMP3:%.*]] = trunc i64 [[OFFSET_IDX]] to i32
+; IC2-NEXT: [[TMP4:%.*]] = mul i32 1, [[TMP0]]
+; IC2-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], [[TMP4]]
+; IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[INDEX]]
+; IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[TMP1]]
+; IC2-NEXT: store i32 [[TMP3]], ptr [[TMP6]], align 4
+; IC2-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4
+; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; IC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; IC2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; IC2: [[MIDDLE_BLOCK]]:
+; IC2-NEXT: br label %[[EXIT:.*]]
+; IC2: [[EXIT]]:
+; IC2-NEXT: ret void
;
+
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
- %iv.2 = phi i64 [ 10, %entry ], [ %iv.2.next, %loop ]
+ %iv.1 = phi i64 [ 10, %entry ], [ %iv.1.next, %loop ]
%gep = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %iv
- %iv.2.trunc = trunc i64 %iv.2 to i32
- store i32 %iv.2.trunc, ptr %gep, align 4
+ %iv.1.trunc = trunc i64 %iv.1 to i32
+ store i32 %iv.1.trunc, ptr %gep, align 4
%iv.next = add i64 %iv, 1
- %iv.2.next = add i64 %iv.2, %step
+ %iv.1.next = add i64 %iv.1, %step
%exitcond = icmp eq i64 %iv.next, 1024
br i1 %exitcond, label %exit, label %loop
@@ -113,17 +228,78 @@ exit:
}
define void @cast_induction_tail_folding(ptr %A) {
-; VF4-LABEL: @cast_induction_tail_folding(
-; VF4-LABEL: vector.body:
-; VF4-NEXT: br i1 true, label %pred.store.if, label %pred.store.continue
-
-; IC2-LABEL: @cast_induction_tail_folding(
-; IC2: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ]
-; IC2-NEXT: [[INDEX0:%.+]] = add i32 [[INDEX]], 0
-; IC2-NEXT: [[INDEX1:%.+]] = add i32 [[INDEX]], 1
-; IC2-NEXT: = icmp ule i32 [[INDEX0]], 2
-; IC2-NEXT: = icmp ule i32 [[INDEX1]], 2
+; VF4-LABEL: define void @cast_induction_tail_folding(
+; VF4-SAME: ptr [[A:%.*]]) {
+; VF4-NEXT: [[ENTRY:.*:]]
+; VF4-NEXT: br label %[[VECTOR_PH:.*]]
+; VF4: [[VECTOR_PH]]:
+; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4: [[VECTOR_BODY]]:
+; VF4-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; VF4: [[PRED_STORE_IF]]:
+; VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 0
+; VF4-NEXT: store i32 0, ptr [[TMP0]], align 4
+; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; VF4: [[PRED_STORE_CONTINUE]]:
+; VF4-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; VF4: [[PRED_STORE_IF1]]:
+; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
+; VF4-NEXT: store i32 1, ptr [[TMP1]], align 4
+; VF4-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; VF4: [[PRED_STORE_CONTINUE2]]:
+; VF4-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; VF4: [[PRED_STORE_IF3]]:
+; VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2
+; VF4-NEXT: store i32 2, ptr [[TMP2]], align 4
+; VF4-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; VF4: [[PRED_STORE_CONTINUE4]]:
+; VF4-NEXT: br i1 false, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; VF4: [[PRED_STORE_IF5]]:
+; VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3
+; VF4-NEXT: store i32 3, ptr [[TMP3]], align 4
+; VF4-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; VF4: [[PRED_STORE_CONTINUE6]]:
+; VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; VF4: [[MIDDLE_BLOCK]]:
+; VF4-NEXT: br label %[[EXIT:.*]]
+; VF4: [[EXIT]]:
+; VF4-NEXT: ret void
+;
+; IC2-LABEL: define void @cast_induction_tail_folding(
+; IC2-SAME: ptr [[A:%.*]]) {
+; IC2-NEXT: [[ENTRY:.*:]]
+; IC2-NEXT: br label %[[VECTOR_PH:.*]]
+; IC2: [[VECTOR_PH]]:
+; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC2: [[VECTOR_BODY]]:
+; IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
+; IC2-NEXT: [[INDEX0:%.*]] = add i32 [[INDEX]], 0
+; IC2-NEXT: [[INDEX1:%.*]] = add i32 [[INDEX]], 1
+; IC2-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDEX0]], 2
+; IC2-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDEX1]], 2
+; IC2-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; IC2: [[PRED_STORE_IF]]:
+; IC2-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX0]] to i64
+; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
+; IC2-NEXT: store i32 [[INDEX0]], ptr [[TMP5]], align 4
+; IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; IC2: [[PRED_STORE_CONTINUE]]:
+; IC2-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
+; IC2: [[PRED_STORE_IF1]]:
+; IC2-NEXT: [[TMP6:%.*]] = sext i32 [[INDEX1]] to i64
+; IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
+; IC2-NEXT: store i32 [[INDEX1]], ptr [[TMP7]], align 4
+; IC2-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; IC2: [[PRED_STORE_CONTINUE2]]:
+; IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; IC2-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
+; IC2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC2: [[MIDDLE_BLOCK]]:
+; IC2-NEXT: br label %[[EXIT:.*]]
+; IC2: [[EXIT]]:
+; IC2-NEXT: ret void
;
+
entry:
br label %loop
@@ -140,3 +316,102 @@ loop:
exit:
ret void
}
+
+define void @test_start_zext(i32 %start, ptr %dst) {
+; VF4-LABEL: define void @test_start_zext(
+; VF4-SAME: i32 [[START:%.*]], ptr [[DST:%.*]]) {
+; VF4-NEXT: [[ENTRY:.*:]]
+; VF4-NEXT: [[START_EXT:%.*]] = zext i32 [[START]] to i64
+; VF4-NEXT: [[TMP0:%.*]] = sub i64 100, [[START_EXT]]
+; VF4-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
+; VF4: [[VECTOR_SCEVCHECK]]:
+; VF4-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[START]], 1
+; VF4-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF4: [[VECTOR_PH]]:
+; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4: [[VECTOR_BODY]]:
+; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i64 [[INDEX]]
+; VF4-NEXT: store <4 x float> zeroinitializer, ptr [[TMP1]], align 4
+; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; VF4-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; VF4-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VF4: [[MIDDLE_BLOCK]]:
+; VF4-NEXT: br label %[[SCALAR_PH]]
+; VF4: [[SCALAR_PH]]:
+; VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 97, %[[MIDDLE_BLOCK]] ], [ [[START_EXT]], %[[VECTOR_SCEVCHECK]] ]
+; VF4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; VF4-NEXT: br label %[[LOOP:.*]]
+; VF4: [[LOOP]]:
+; VF4-NEXT: [[IV_0:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_0_NEXT:%.*]], %[[LOOP]] ]
+; VF4-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
+; VF4-NEXT: [[GEP_DST:%.*]] = getelementptr float, ptr [[DST]], i64 [[IV_1]]
+; VF4-NEXT: store float 0.000000e+00, ptr [[GEP_DST]], align 4
+; VF4-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], [[START_EXT]]
+; VF4-NEXT: [[IV_0_NEXT]] = add i64 [[IV_0]], 1
+; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_0_NEXT]], 100
+; VF4-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; VF4: [[EXIT]]:
+; VF4-NEXT: ret void
+;
+; IC2-LABEL: define void @test_start_zext(
+; IC2-SAME: i32 [[START:%.*]], ptr [[DST:%.*]]) {
+; IC2-NEXT: [[ENTRY:.*]]:
+; IC2-NEXT: [[START_EXT:%.*]] = zext i32 [[START]] to i64
+; IC2-NEXT: [[TMP0:%.*]] = sub i64 100, [[START_EXT]]
+; IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
+; IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC2: [[VECTOR_PH]]:
+; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
+; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; IC2-NEXT: [[TMP1:%.*]] = add i64 [[START_EXT]], [[N_VEC]]
+; IC2-NEXT: [[TMP2:%.*]] = mul i64 [[N_VEC]], [[START_EXT]]
+; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC2: [[VECTOR_BODY]]:
+; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[START_EXT]]
+; IC2-NEXT: [[TMP3:%.*]] = mul i64 1, [[START_EXT]]
+; IC2-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], [[TMP3]]
+; IC2-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[DST]], i64 [[OFFSET_IDX]]
+; IC2-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[DST]], i64 [[TMP4]]
+; IC2-NEXT: store float 0.000000e+00, ptr [[TMP5]], align 4
+; IC2-NEXT: store float 0.000000e+00, ptr [[TMP6]], align 4
+; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; IC2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IC2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; IC2: [[MIDDLE_BLOCK]]:
+; IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC2: [[SCALAR_PH]]:
+; IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[START_EXT]], %[[ENTRY]] ]
+; IC2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC2-NEXT: br label %[[LOOP:.*]]
+; IC2: [[LOOP]]:
+; IC2-NEXT: [[IV_0:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_0_NEXT:%.*]], %[[LOOP]] ]
+; IC2-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
+; IC2-NEXT: [[GEP_DST:%.*]] = getelementptr float, ptr [[DST]], i64 [[IV_1]]
+; IC2-NEXT: store float 0.000000e+00, ptr [[GEP_DST]], align 4
+; IC2-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], [[START_EXT]]
+; IC2-NEXT: [[IV_0_NEXT]] = add i64 [[IV_0]], 1
+; IC2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_0_NEXT]], 100
+; IC2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; IC2: [[EXIT]]:
+; IC2-NEXT: ret void
+;
+entry:
+ %start.ext = zext i32 %start to i64
+ br label %loop
+
+loop:
+ %iv.0 = phi i64 [ %start.ext, %entry ], [ %iv.0.next, %loop ]
+ %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ]
+ %gep.dst = getelementptr float, ptr %dst, i64 %iv.1
+ store float 0.0, ptr %gep.dst, align 4
+ %iv.1.next = add i64 %iv.1, %start.ext
+ %iv.0.next = add i64 %iv.0, 1
+ %ec = icmp eq i64 %iv.0.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/select-folds.ll b/llvm/test/Transforms/LoopVectorize/select-folds.ll
new file mode 100644
index 0000000000000..fc926d9b8d3a4
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/select-folds.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
+
+define i32 @select_not_cond_true_false(ptr %src, i64 %n) {
+; CHECK-LABEL: define i32 @select_not_cond_true_false(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i8> [[WIDE_LOAD]] to <4 x i1>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> zeroinitializer, <4 x i1> splat (i1 true)
+; CHECK-NEXT: [[TMP5]] = zext <4 x i1> [[TMP3]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP4]], splat (i32 1)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[SEL_ZEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: [[L_TRUNC:%.*]] = trunc i8 [[L]] to i1
+; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[L_TRUNC]], true
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT]], i1 true, i1 false
+; CHECK-NEXT: [[SEL_ZEXT]] = zext i1 [[SEL]] to i32
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[RECUR]], 1
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[OR_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %recur = phi i32 [ 0, %entry ], [ %sel.zext, %loop ]
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep = getelementptr inbounds i8, ptr %src, i64 %iv
+ %l = load i8, ptr %gep, align 1
+ %l.trunc = trunc i8 %l to i1
+ %not = xor i1 %l.trunc, true
+ %sel = select i1 %not, i1 true, i1 false
+ %sel.zext = zext i1 %sel to i32
+ %or = or i32 %recur, 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %or
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list