[llvm] [VPlan] Fix miscompile after PR #142433. (PR #147398)
James Y Knight via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 07:52:41 PDT 2025
https://github.com/jyknight updated https://github.com/llvm/llvm-project/pull/147398
>From 7bfb250100048dcb7fad412bf24defb8b254a901 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 7 Jul 2025 12:52:01 -0400
Subject: [PATCH 1/4] Fix miscompile after PR #142433.
Commit aa2402931908317f5cc19b164ef17c5a74f2ae67, "[VPlan] Unroll VPReplicateRecipe by VF" cloned a VPReplicateRecipe without transferring the flags from the original. This caused incorrect flags to be emitted on the new instructions, which resulted later passes introducing miscompiles.
---
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 2dd43c092ff7a..b89cd21595efd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -486,6 +486,7 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
auto *New =
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
+ New->transferFlags(*RepR);
New->insertBefore(RepR);
return New;
}
>From 874dfafa1ee3c275a58aad88a5f75137f32a2eaf Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 7 Jul 2025 17:11:20 -0400
Subject: [PATCH 2/4] Add test.
---
.../Transforms/LoopVectorize/X86/pr147398.ll | 126 ++++++++++++++++++
1 file changed, 126 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll b/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
new file mode 100644
index 0000000000000..d772cb1e817bd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-linux-gnu"
+
+define i64 @test(ptr %arg2, i64 %dim) #0 {
+; CHECK-LABEL: define i64 @test(
+; CHECK-SAME: ptr [[ARG2:%.*]], i64 [[DIM:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[START:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1)
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP2]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> [[TMP11]], i64 [[TMP8]], i32 2
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP9]], i32 3
+; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <4 x i32> zeroinitializer, [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x double> splat (double 1.000000e+00), <4 x double> zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP5]], [[TMP17]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP17]], <4 x double> zeroinitializer, <4 x double> [[TMP16]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i32 0
+; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 0
+; CHECK-NEXT: store double [[TMP20]], ptr null, align 8
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP18]], i32 1
+; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 1
+; CHECK-NEXT: store double [[TMP22]], ptr null, align 8
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; CHECK: [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP18]], i32 2
+; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 2
+; CHECK-NEXT: store double [[TMP24]], ptr null, align 8
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; CHECK: [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP18]], i32 3
+; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
+; CHECK: [[PRED_STORE_IF5]]:
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
+; CHECK-NEXT: store double [[TMP26]], ptr null, align 8
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; CHECK: [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[START]] ]
+; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
+; CHECK: [[LOOP_BODY]]:
+; CHECK-NEXT: [[INVAR_021:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INVAR_INC11:%.*]], %[[AFTER:.*]] ]
+; CHECK-NEXT: br i1 false, label %[[AFTER]], label %[[CHECK_TRUE:.*]]
+; CHECK: [[LOOP_EXIT]]:
+; CHECK-NEXT: ret i64 0
+; CHECK: [[AFTER]]:
+; CHECK-NEXT: [[RET_VALUE_ADDR_0:%.*]] = phi double [ [[TMP32:%.*]], %[[CHECK_TRUE]] ], [ 0.000000e+00, %[[LOOP_BODY]] ]
+; CHECK-NEXT: store double [[RET_VALUE_ADDR_0]], ptr null, align 8
+; CHECK-NEXT: [[INVAR_INC11]] = add i64 [[INVAR_021]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INVAR_021]], 1
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_EXIT]], label %[[LOOP_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[CHECK_TRUE]]:
+; CHECK-NEXT: [[TMP27:%.*]] = sub nsw i64 [[INVAR_021]], 0
+; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
+; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 0, [[TMP28]]
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [16 x [16 x double]], ptr [[ARG2]], i64 0, i64 [[DIM]], i64 [[TMP27]]
+; CHECK-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 8
+; CHECK-NEXT: [[TMP32]] = select i1 [[TMP29]], double 1.000000e+00, double 0.000000e+00
+; CHECK-NEXT: br label %[[AFTER]]
+;
+start:
+ br label %loop_body
+
+loop_body: ; preds = %after, %start
+ %invar.021 = phi i64 [ 0, %start ], [ %invar.inc11, %after ]
+ br i1 false, label %after, label %check_true
+
+loop_exit: ; preds = %after
+ ret i64 0
+
+after: ; preds = %check_true, %loop_body
+ %ret_value_addr.0 = phi double [ %5, %check_true ], [ 0.000000e+00, %loop_body ]
+ store double %ret_value_addr.0, ptr null, align 8
+ %invar.inc11 = add i64 %invar.021, 1
+ %exitcond = icmp eq i64 %invar.021, 1
+ br i1 %exitcond, label %loop_exit, label %loop_body
+
+check_true: ; preds = %loop_body
+ %0 = sub nsw i64 %invar.021, 0
+ %1 = trunc i64 %0 to i32
+ %2 = icmp sgt i32 0, %1
+ %3 = getelementptr [16 x [16 x double]], ptr %arg2, i64 0, i64 %dim, i64 %0
+ %4 = load double, ptr %3, align 8
+ %5 = select i1 %2, double 1.000000e+00, double 0.000000e+00
+ br label %after
+}
+
+attributes #0 = { "target-features"="+avx" }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
>From 2da6f8975954636a875ab258288fc5485ec23b4e Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Tue, 8 Jul 2025 10:35:42 -0400
Subject: [PATCH 3/4] Replace test with a copy of drop_vector_nuw_nsw built
under avx.
---
.../X86/drop-poison-generating-flags.ll | 90 +++++++++++--
.../Transforms/LoopVectorize/X86/pr147398.ll | 126 ------------------
2 files changed, 82 insertions(+), 134 deletions(-)
delete mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index ff9cf682b6e9b..0739683221fe7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -338,6 +338,79 @@ loop.exit:
ret void
}
+; Same as @drop_vector_nuw_nsw, except built with avx1; in this case,
+; we make scalar clones of the 'sub' operation. These clones also need
+; cleared flags.
+define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ptr %output, ptr noalias %ptrs) local_unnamed_addr #1 {
+; CHECK-LABEL: define void @drop_nonvector_nuw_nsw_avx1(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]], ptr noalias [[PTRS:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP6:%.*]] = sub nuw nsw i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = sub nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = sub nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = sub nuw nsw i64 [[TMP3]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
+; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP18]], align 8
+; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP10]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP20]], i32 4, <4 x i1> [[TMP19]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 0
+; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP22]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
+ %i23 = icmp eq i64 %iv, 0
+ %gep = getelementptr inbounds ptr, ptr %ptrs, i64 %iv
+ %i27 = sub nuw nsw i64 %iv, 1
+ %i29 = getelementptr inbounds float, ptr %input, i64 %i27
+ store ptr %i29, ptr %gep
+ br i1 %i23, label %if.end, label %if.then
+
+if.then:
+ %i30 = load float, ptr %i29, align 4, !invariant.load !0
+ br label %if.end
+
+if.end:
+ %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
+ %i35 = getelementptr inbounds float, ptr %output, i64 %iv
+ store float %i34, ptr %i35, align 4
+ %iv.inc = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.inc, 4
+ br i1 %exitcond, label %loop.exit, label %loop.header
+
+loop.exit:
+ ret void
+}
+
; Preserve poison-generating flags from 'sub', which is not contributing to any address computation
; of any masked load/store/gather/scatter.
define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
@@ -358,7 +431,7 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
;
entry:
@@ -411,7 +484,7 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %outpu
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
;
entry:
@@ -465,7 +538,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
;
entry:
@@ -520,7 +593,7 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
;
entry:
@@ -572,7 +645,7 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 {
; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
;
entry:
@@ -720,7 +793,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP16]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
;
@@ -820,7 +893,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
;
@@ -879,7 +952,7 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
;
@@ -911,5 +984,6 @@ exit:
}
attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
+attributes #1 = { "target-features"="+avx" }
!0 = !{}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll b/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
deleted file mode 100644
index d772cb1e817bd..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
+++ /dev/null
@@ -1,126 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-linux-gnu"
-
-define i64 @test(ptr %arg2, i64 %dim) #0 {
-; CHECK-LABEL: define i64 @test(
-; CHECK-SAME: ptr [[ARG2:%.*]], i64 [[DIM:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[START:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1)
-; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP2]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> [[TMP11]], i64 [[TMP8]], i32 2
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP9]], i32 3
-; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <4 x i32> zeroinitializer, [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x double> splat (double 1.000000e+00), <4 x double> zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP5]], [[TMP17]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP17]], <4 x double> zeroinitializer, <4 x double> [[TMP16]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i32 0
-; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 0
-; CHECK-NEXT: store double [[TMP20]], ptr null, align 8
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
-; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP18]], i32 1
-; CHECK-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
-; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 1
-; CHECK-NEXT: store double [[TMP22]], ptr null, align 8
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
-; CHECK: [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP18]], i32 2
-; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
-; CHECK: [[PRED_STORE_IF3]]:
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 2
-; CHECK-NEXT: store double [[TMP24]], ptr null, align 8
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
-; CHECK: [[PRED_STORE_CONTINUE4]]:
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP18]], i32 3
-; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
-; CHECK: [[PRED_STORE_IF5]]:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
-; CHECK-NEXT: store double [[TMP26]], ptr null, align 8
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
-; CHECK: [[PRED_STORE_CONTINUE6]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
-; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[LOOP_EXIT:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[START]] ]
-; CHECK-NEXT: br label %[[LOOP_BODY:.*]]
-; CHECK: [[LOOP_BODY]]:
-; CHECK-NEXT: [[INVAR_021:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INVAR_INC11:%.*]], %[[AFTER:.*]] ]
-; CHECK-NEXT: br i1 false, label %[[AFTER]], label %[[CHECK_TRUE:.*]]
-; CHECK: [[LOOP_EXIT]]:
-; CHECK-NEXT: ret i64 0
-; CHECK: [[AFTER]]:
-; CHECK-NEXT: [[RET_VALUE_ADDR_0:%.*]] = phi double [ [[TMP32:%.*]], %[[CHECK_TRUE]] ], [ 0.000000e+00, %[[LOOP_BODY]] ]
-; CHECK-NEXT: store double [[RET_VALUE_ADDR_0]], ptr null, align 8
-; CHECK-NEXT: [[INVAR_INC11]] = add i64 [[INVAR_021]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INVAR_021]], 1
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_EXIT]], label %[[LOOP_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: [[CHECK_TRUE]]:
-; CHECK-NEXT: [[TMP27:%.*]] = sub nsw i64 [[INVAR_021]], 0
-; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
-; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 0, [[TMP28]]
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [16 x [16 x double]], ptr [[ARG2]], i64 0, i64 [[DIM]], i64 [[TMP27]]
-; CHECK-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 8
-; CHECK-NEXT: [[TMP32]] = select i1 [[TMP29]], double 1.000000e+00, double 0.000000e+00
-; CHECK-NEXT: br label %[[AFTER]]
-;
-start:
- br label %loop_body
-
-loop_body: ; preds = %after, %start
- %invar.021 = phi i64 [ 0, %start ], [ %invar.inc11, %after ]
- br i1 false, label %after, label %check_true
-
-loop_exit: ; preds = %after
- ret i64 0
-
-after: ; preds = %check_true, %loop_body
- %ret_value_addr.0 = phi double [ %5, %check_true ], [ 0.000000e+00, %loop_body ]
- store double %ret_value_addr.0, ptr null, align 8
- %invar.inc11 = add i64 %invar.021, 1
- %exitcond = icmp eq i64 %invar.021, 1
- br i1 %exitcond, label %loop_exit, label %loop_body
-
-check_true: ; preds = %loop_body
- %0 = sub nsw i64 %invar.021, 0
- %1 = trunc i64 %0 to i32
- %2 = icmp sgt i32 0, %1
- %3 = getelementptr [16 x [16 x double]], ptr %arg2, i64 0, i64 %dim, i64 %0
- %4 = load double, ptr %3, align 8
- %5 = select i1 %2, double 1.000000e+00, double 0.000000e+00
- br label %after
-}
-
-attributes #0 = { "target-features"="+avx" }
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-;.
>From 53207947576f4a38f3cee7b825d5e2208d2de25c Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Tue, 8 Jul 2025 10:52:27 -0400
Subject: [PATCH 4/4] Oops, the "expected" output was from the pre-fix opt in
last commit.
---
.../X86/drop-poison-generating-flags.ll | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index 0739683221fe7..49d33d3087ed2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -357,14 +357,14 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input,
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP6:%.*]] = sub nuw nsw i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = sub nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = sub nuw nsw i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = sub nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP10]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2
More information about the llvm-commits
mailing list