[llvm] [LV] Regen a test with UTC (PR #139235)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 02:55:27 PDT 2025
https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/139235
None
>From 493ad3939a53dd1d72dac3739177423bcc55e6bc Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 9 May 2025 10:53:42 +0100
Subject: [PATCH] [LV] Regen a test with UTC
---
.../X86/drop-poison-generating-flags.ll | 584 +++++++++++-------
1 file changed, 351 insertions(+), 233 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index 8beb467572826..207f3e8ba915e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^middle.block" --version 5
; RUN: opt %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
; Make sure that integer poison-generating flags (i.e., nuw/nsw, exact and inbounds)
@@ -18,18 +19,31 @@ target triple = "x86_64-pc-linux-gnu"
; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
; Test for PR52111.
-define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input,
- ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @drop_scalar_nuw_nsw(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
+define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @drop_scalar_nuw_nsw(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -57,18 +71,31 @@ loop.exit:
}
; Variant with getelementptr nusw.
-define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input,
- ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @drop_scalar_gep_nusw(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
+define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @drop_scalar_gep_nusw(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw float, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nusw float, ptr [[TMP5]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -96,18 +123,31 @@ loop.exit:
}
; Variant with getelementptr nuw.
-define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input,
- ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @drop_scalar_gep_nuw(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
+define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @drop_scalar_gep_nuw(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw float, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nuw float, ptr [[TMP5]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -136,18 +176,31 @@ loop.exit:
; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
; In this case, 'sub' and 'getelementptr' are not guarded by the predicate.
-define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input,
- ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @drop_nonpred_scalar_nuw_nsw(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP5:%.*]] = sub i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
+define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @drop_nonpred_scalar_nuw_nsw(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[TMP3]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -175,18 +228,31 @@ loop.exit:
}
; Preserve poison-generating flags from vector 'sub', 'mul' and 'getelementptr' feeding a masked gather.
-define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input,
- ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @preserve_vector_nuw_nsw(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], splat (i64 2)
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP7]], i32 4, <4 x i1> [[TMP8]], <4 x float> poison), !invariant.load !0
+define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @preserve_vector_nuw_nsw(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-NEXT: [[TMP2:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[TMP2]], splat (i64 2)
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> [[TMP3]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -215,20 +281,35 @@ loop.exit:
}
; Drop poison-generating flags from vector 'sub' and 'gep' feeding a masked load.
-define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input,
- ptr %output, ptr noalias %ptrs) local_unnamed_addr #0 {
-; CHECK-LABEL: @drop_vector_nuw_nsw(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[INPUT:%.*]], <4 x i64> [[TMP6]]
-; CHECK: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP11]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP12]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
+define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %output, ptr noalias %ptrs) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @drop_vector_nuw_nsw(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]], ptr noalias [[PTRS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], <4 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0
+; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP7]], i32 4, <4 x i1> [[TMP5]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -260,16 +341,26 @@ loop.exit:
; Preserve poison-generating flags from 'sub', which is not contributing to any address computation
; of any masked load/store/gather/scatter.
define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @preserve_nuw_nsw_no_addr(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> zeroinitializer, <4 x i64> [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
-; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4
+; CHECK-LABEL: define void @preserve_nuw_nsw_no_addr(
+; CHECK-SAME: ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
+; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -295,21 +386,34 @@ loop.exit:
}
; Drop poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked load.
-define void @drop_scalar_exact(ptr noalias nocapture readonly %input,
- ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @drop_scalar_exact(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = sdiv i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP9]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP11]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
+define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @drop_scalar_exact(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP0]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = sdiv i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP7]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -341,14 +445,14 @@ loop.exit:
define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
; CHECK-LABEL: define void @drop_zext_nneg(
-; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
@@ -356,36 +460,13 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
-; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
+; CHECK-NEXT: store double [[TMP5]], ptr [[P1]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: br label [[BODY:%.*]]
-; CHECK: body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32
-; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE]]
-; CHECK: then:
-; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[TMP8]] to i64
-; CHECK-NEXT: [[IDX1:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]]
-; CHECK-NEXT: [[IDX2:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]]
-; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[IDX2]], align 8
-; CHECK-NEXT: br label [[ELSE]]
-; CHECK: else:
-; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ 0.000000e+00, [[BODY]] ]
-; CHECK-NEXT: store double [[PHI]], ptr [[P1]], align 8
-; CHECK-NEXT: [[NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEXT]], 1024
-; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[BODY]], !llvm.loop [[LOOP18:![0-9]+]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
;
entry:
br label %body
@@ -415,20 +496,32 @@ exit:
}
; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather.
-define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input,
- ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @preserve_vector_exact_no_addr(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], <4 x i64> [[TMP8]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP9]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
+define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @preserve_vector_exact_no_addr(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP0]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> [[TMP5]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
;
entry:
br label %loop.header
@@ -462,16 +555,26 @@ loop.exit:
; Preserve poison-generating flags from 'sdiv', which is not contributing to any address computation
; of any masked load/store/gather/scatter.
define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 {
-; CHECK-LABEL: @preserve_exact_no_addr(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
-; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> zeroinitializer, <4 x i64> [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
-; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 4
+; CHECK-LABEL: define void @preserve_exact_no_addr(
+; CHECK-SAME: ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
+; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -498,30 +601,30 @@ loop.exit:
; Make sure we don't vectorize a loop with a phi feeding a poison value to
; a masked load/gather.
-define void @dont_vectorize_poison_phi(ptr noalias nocapture readonly %input,
-; CHECK-LABEL: @dont_vectorize_poison_phi(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
-; CHECK: loop.header:
-; CHECK-NEXT: [[POISON:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[IF_END:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_INC]], [[IF_END]] ]
+define void @dont_vectorize_poison_phi(ptr noalias nocapture readonly %input, ptr %output) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @dont_vectorize_poison_phi(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[POISON:%.*]] = phi i64 [ poison, %[[ENTRY]] ], [ [[IV_INC:%.*]], %[[IF_END:.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_INC]], %[[IF_END]] ]
; CHECK-NEXT: [[I23:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT: br i1 [[I23]], label [[IF_END]], label [[IF_THEN:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: [[I29:%.*]] = getelementptr inbounds float, ptr [[INPUT:%.*]], i64 [[POISON]]
-; CHECK-NEXT: [[I30:%.*]] = load float, ptr [[I29]], align 4, !invariant.load !0
-; CHECK-NEXT: br label [[IF_END]]
-; CHECK: if.end:
-; CHECK-NEXT: [[I34:%.*]] = phi float [ 0.000000e+00, [[LOOP_HEADER]] ], [ [[I30]], [[IF_THEN]] ]
-; CHECK-NEXT: [[I35:%.*]] = getelementptr inbounds float, ptr [[OUTPUT:%.*]], i64 [[IV]]
+; CHECK-NEXT: br i1 [[I23]], label %[[IF_END]], label %[[IF_THEN:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[I29:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[POISON]]
+; CHECK-NEXT: [[I30:%.*]] = load float, ptr [[I29]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[I34:%.*]] = phi float [ 0.000000e+00, %[[LOOP_HEADER]] ], [ [[I30]], %[[IF_THEN]] ]
+; CHECK-NEXT: [[I35:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[IV]]
; CHECK-NEXT: store float [[I34]], ptr [[I35]], align 4
; CHECK-NEXT: [[IV_INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_INC]], 4
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]]
-; CHECK: loop.exit:
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_EXIT:.*]], label %[[LOOP_HEADER]]
+; CHECK: [[LOOP_EXIT]]:
; CHECK-NEXT: ret void
;
- ptr %output) local_unnamed_addr #0 {
entry:
br label %loop.header
@@ -554,50 +657,58 @@ loop.exit:
; Note that the then block has UB, but I could not find any other way to
; construct a suitable test case.
define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
-; CHECK-LABEL: @pr70590_recipe_without_underlying_instr(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.+]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE6:%.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_SREM_CONTINUE6]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i64> [[VEC_IND]],
-; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
-; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]]
-; CHECK: pred.srem.if:
-; CHECK-NEXT: [[TMP4:%.*]] = srem i64 3, 0
-; CHECK-NEXT: br label [[PRED_SREM_CONTINUE]]
-; CHECK: pred.srem.continue:
-; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, %vector.body ], [ [[TMP4]], [[PRED_SREM_IF]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
-; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_SREM_IF1:%.*]], label [[PRED_SREM_CONTINUE2:%.*]]
-; CHECK: pred.srem.if1:
-; CHECK-NEXT: [[TMP7:%.*]] = srem i64 3, 0
-; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]]
-; CHECK: pred.srem.continue2:
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
-; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4:%.*]]
-; CHECK: pred.srem.if3:
+; CHECK-LABEL: define void @pr70590_recipe_without_underlying_instr(
+; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6:.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
+; CHECK: [[PRED_SREM_IF]]:
+; CHECK-NEXT: [[TMP3:%.*]] = srem i64 3, 0
+; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]]
+; CHECK: [[PRED_SREM_CONTINUE]]:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_SREM_IF]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
+; CHECK: [[PRED_SREM_IF1]]:
+; CHECK-NEXT: [[TMP6:%.*]] = srem i64 3, 0
+; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]]
+; CHECK: [[PRED_SREM_CONTINUE2]]:
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]]
+; CHECK: [[PRED_SREM_IF3]]:
+; CHECK-NEXT: [[TMP8:%.*]] = srem i64 3, 0
+; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]]
+; CHECK: [[PRED_SREM_CONTINUE4]]:
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]]
+; CHECK: [[PRED_SREM_IF5]]:
; CHECK-NEXT: [[TMP10:%.*]] = srem i64 3, 0
-; CHECK-NEXT: br label [[PRED_SREM_CONTINUE4]]
-; CHECK: pred.srem.continue4:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
-; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_SREM_IF5:%.*]], label [[PRED_SREM_CONTINUE6]]
-; CHECK: pred.srem.if5:
-; CHECK-NEXT: [[TMP13:%.*]] = srem i64 3, 0
-; CHECK-NEXT: br label [[PRED_SREM_CONTINUE6]]
-; CHECK: pred.srem.continue6:
-; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP5]], -3
-; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr %dst, i64 [[INDEX]]
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i32 0
-; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP20]], align 4
+; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]]
+; CHECK: [[PRED_SREM_CONTINUE6]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP4]], -3
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0
+; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP16]], align 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body
-; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.header
@@ -632,22 +743,29 @@ exit:
; FIXME: at the moment, inbounds is dropped from both the GEP feeding the vector load ans tore
define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr readonly %C) #0 {
; CHECK-LABEL: define void @Bgep_inbounds_unconditionally_due_to_store(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr %C, i64 [[INDEX]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20)
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr %B, i64 [[INDEX]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], splat (float 2.000000e+00)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> splat (float 3.300000e+01), <4 x float> [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0
-; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
+; CHECK-SAME: ptr noalias [[B:%.*]], ptr readonly [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20)
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD1]], splat (float 2.000000e+00)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> splat (float 3.300000e+01), <4 x float> [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
entry:
br label %loop.body
More information about the llvm-commits
mailing list