[llvm] [VPlan] Fix miscompile after PR #142433. (PR #147398)

James Y Knight via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 8 07:52:41 PDT 2025


https://github.com/jyknight updated https://github.com/llvm/llvm-project/pull/147398

>From 7bfb250100048dcb7fad412bf24defb8b254a901 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 7 Jul 2025 12:52:01 -0400
Subject: [PATCH 1/4] Fix miscompile after PR #142433.

Commit aa2402931908317f5cc19b164ef17c5a74f2ae67, "[VPlan] Unroll VPReplicateRecipe by VF" cloned a VPReplicateRecipe without transferring the flags from the original. This caused incorrect flags to be emitted on the new instructions, which resulted later passes introducing miscompiles.
---
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 2dd43c092ff7a..b89cd21595efd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -486,6 +486,7 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
   auto *New =
       new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
                             /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
+  New->transferFlags(*RepR);
   New->insertBefore(RepR);
   return New;
 }

>From 874dfafa1ee3c275a58aad88a5f75137f32a2eaf Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 7 Jul 2025 17:11:20 -0400
Subject: [PATCH 2/4] Add test.

---
 .../Transforms/LoopVectorize/X86/pr147398.ll  | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr147398.ll

diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll b/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
new file mode 100644
index 0000000000000..d772cb1e817bd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-linux-gnu"
+
+define i64 @test(ptr %arg2, i64 %dim) #0 {
+; CHECK-LABEL: define i64 @test(
+; CHECK-SAME: ptr [[ARG2:%.*]], i64 [[DIM:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[START:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1)
+; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP7]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i64> [[TMP11]], i64 [[TMP8]], i32 2
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP9]], i32 3
+; CHECK-NEXT:    [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32>
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt <4 x i32> zeroinitializer, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x double> splat (double 1.000000e+00), <4 x double> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP5]], [[TMP17]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP17]], <4 x double> zeroinitializer, <4 x double> [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i32 0
+; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 0
+; CHECK-NEXT:    store double [[TMP20]], ptr null, align 8
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP18]], i32 1
+; CHECK-NEXT:    br i1 [[TMP21]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK:       [[PRED_STORE_IF1]]:
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 1
+; CHECK-NEXT:    store double [[TMP22]], ptr null, align 8
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; CHECK:       [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP18]], i32 2
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK:       [[PRED_STORE_IF3]]:
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 2
+; CHECK-NEXT:    store double [[TMP24]], ptr null, align 8
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; CHECK:       [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP18]], i32 3
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_IF5]]:
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
+; CHECK-NEXT:    store double [[TMP26]], ptr null, align 8
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; CHECK:       [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[LOOP_EXIT:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[START]] ]
+; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
+; CHECK:       [[LOOP_BODY]]:
+; CHECK-NEXT:    [[INVAR_021:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INVAR_INC11:%.*]], %[[AFTER:.*]] ]
+; CHECK-NEXT:    br i1 false, label %[[AFTER]], label %[[CHECK_TRUE:.*]]
+; CHECK:       [[LOOP_EXIT]]:
+; CHECK-NEXT:    ret i64 0
+; CHECK:       [[AFTER]]:
+; CHECK-NEXT:    [[RET_VALUE_ADDR_0:%.*]] = phi double [ [[TMP32:%.*]], %[[CHECK_TRUE]] ], [ 0.000000e+00, %[[LOOP_BODY]] ]
+; CHECK-NEXT:    store double [[RET_VALUE_ADDR_0]], ptr null, align 8
+; CHECK-NEXT:    [[INVAR_INC11]] = add i64 [[INVAR_021]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INVAR_021]], 1
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[LOOP_EXIT]], label %[[LOOP_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[CHECK_TRUE]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = sub nsw i64 [[INVAR_021]], 0
+; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
+; CHECK-NEXT:    [[TMP29:%.*]] = icmp sgt i32 0, [[TMP28]]
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr [16 x [16 x double]], ptr [[ARG2]], i64 0, i64 [[DIM]], i64 [[TMP27]]
+; CHECK-NEXT:    [[TMP31:%.*]] = load double, ptr [[TMP30]], align 8
+; CHECK-NEXT:    [[TMP32]] = select i1 [[TMP29]], double 1.000000e+00, double 0.000000e+00
+; CHECK-NEXT:    br label %[[AFTER]]
+;
+start:
+  br label %loop_body
+
+loop_body:                                        ; preds = %after, %start
+  %invar.021 = phi i64 [ 0, %start ], [ %invar.inc11, %after ]
+  br i1 false, label %after, label %check_true
+
+loop_exit:                                        ; preds = %after
+  ret i64 0
+
+after:                                            ; preds = %check_true, %loop_body
+  %ret_value_addr.0 = phi double [ %5, %check_true ], [ 0.000000e+00, %loop_body ]
+  store double %ret_value_addr.0, ptr null, align 8
+  %invar.inc11 = add i64 %invar.021, 1
+  %exitcond = icmp eq i64 %invar.021, 1
+  br i1 %exitcond, label %loop_exit, label %loop_body
+
+check_true:                                       ; preds = %loop_body
+  %0 = sub nsw i64 %invar.021, 0
+  %1 = trunc i64 %0 to i32
+  %2 = icmp sgt i32 0, %1
+  %3 = getelementptr [16 x [16 x double]], ptr %arg2, i64 0, i64 %dim, i64 %0
+  %4 = load double, ptr %3, align 8
+  %5 = select i1 %2, double 1.000000e+00, double 0.000000e+00
+  br label %after
+}
+
+attributes #0 = { "target-features"="+avx" }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.

>From 2da6f8975954636a875ab258288fc5485ec23b4e Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Tue, 8 Jul 2025 10:35:42 -0400
Subject: [PATCH 3/4] Replace test with a copy of drop_vector_nuw_nsw built
 under avx.

---
 .../X86/drop-poison-generating-flags.ll       |  90 +++++++++++--
 .../Transforms/LoopVectorize/X86/pr147398.ll  | 126 ------------------
 2 files changed, 82 insertions(+), 134 deletions(-)
 delete mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr147398.ll

diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index ff9cf682b6e9b..0739683221fe7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -338,6 +338,79 @@ loop.exit:
   ret void
 }
 
+; Same as @drop_vector_nuw_nsw, except built with avx1; in this case,
+; we make scalar clones of the 'sub' operation. These clones also need
+; cleared flags.
+define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ptr %output, ptr noalias %ptrs) local_unnamed_addr #1 {
+; CHECK-LABEL: define void @drop_nonvector_nuw_nsw_avx1(
+; CHECK-SAME: ptr noalias readonly captures(none) [[INPUT:%.*]], ptr [[OUTPUT:%.*]], ptr noalias [[PTRS:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sub nuw nsw i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = sub nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = sub nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = sub nuw nsw i64 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0
+; CHECK-NEXT:    store <4 x ptr> [[TMP17]], ptr [[TMP18]], align 8
+; CHECK-NEXT:    [[TMP19:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr float, ptr [[TMP10]], i32 0
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP20]], i32 4, <4 x i1> [[TMP19]], <4 x float> poison), !invariant.load [[META0]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 0
+; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP22]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
+  %i23 = icmp eq i64 %iv, 0
+  %gep = getelementptr inbounds ptr, ptr %ptrs, i64 %iv
+  %i27 = sub nuw nsw i64 %iv, 1
+  %i29 = getelementptr inbounds float, ptr %input, i64 %i27
+  store ptr %i29, ptr %gep
+  br i1 %i23, label %if.end, label %if.then
+
+if.then:
+  %i30 = load float, ptr %i29, align 4, !invariant.load !0
+  br label %if.end
+
+if.end:
+  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
+  %i35 = getelementptr inbounds float, ptr %output, i64 %iv
+  store float %i34, ptr %i35, align 4
+  %iv.inc = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.inc, 4
+  br i1 %exitcond, label %loop.exit, label %loop.header
+
+loop.exit:
+  ret void
+}
+
 ; Preserve poison-generating flags from 'sub', which is not contributing to any address computation
 ; of any masked load/store/gather/scatter.
 define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
@@ -358,7 +431,7 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 {
 ; CHECK-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ;
 entry:
@@ -411,7 +484,7 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %outpu
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ;
 entry:
@@ -465,7 +538,7 @@ define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ;
 entry:
@@ -520,7 +593,7 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ;
 entry:
@@ -572,7 +645,7 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 {
 ; CHECK-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ;
 entry:
@@ -720,7 +793,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
 ; CHECK-NEXT:    store <4 x i8> [[PREDPHI]], ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ;
 
@@ -820,7 +893,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
 ; CHECK-NEXT:    store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ;
 
@@ -879,7 +952,7 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT:    br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ;
 
@@ -911,5 +984,6 @@ exit:
 }
 
 attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
+attributes #1 = { "target-features"="+avx" }
 
 !0 = !{}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll b/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
deleted file mode 100644
index d772cb1e817bd..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/X86/pr147398.ll
+++ /dev/null
@@ -1,126 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
-
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-linux-gnu"
-
-define i64 @test(ptr %arg2, i64 %dim) #0 {
-; CHECK-LABEL: define i64 @test(
-; CHECK-SAME: ptr [[ARG2:%.*]], i64 [[DIM:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  [[START:.*]]:
-; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1)
-; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP0]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP3]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i64> [[TMP11]], i64 [[TMP8]], i32 2
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP9]], i32 3
-; CHECK-NEXT:    [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32>
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt <4 x i32> zeroinitializer, [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x double> splat (double 1.000000e+00), <4 x double> zeroinitializer
-; CHECK-NEXT:    [[TMP17:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP5]], [[TMP17]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP17]], <4 x double> zeroinitializer, <4 x double> [[TMP16]]
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i32 0
-; CHECK-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; CHECK:       [[PRED_STORE_IF]]:
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 0
-; CHECK-NEXT:    store double [[TMP20]], ptr null, align 8
-; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
-; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP18]], i32 1
-; CHECK-NEXT:    br i1 [[TMP21]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
-; CHECK:       [[PRED_STORE_IF1]]:
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 1
-; CHECK-NEXT:    store double [[TMP22]], ptr null, align 8
-; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
-; CHECK:       [[PRED_STORE_CONTINUE2]]:
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP18]], i32 2
-; CHECK-NEXT:    br i1 [[TMP23]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
-; CHECK:       [[PRED_STORE_IF3]]:
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 2
-; CHECK-NEXT:    store double [[TMP24]], ptr null, align 8
-; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
-; CHECK:       [[PRED_STORE_CONTINUE4]]:
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP18]], i32 3
-; CHECK-NEXT:    br i1 [[TMP25]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
-; CHECK:       [[PRED_STORE_IF5]]:
-; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
-; CHECK-NEXT:    store double [[TMP26]], ptr null, align 8
-; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
-; CHECK:       [[PRED_STORE_CONTINUE6]]:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br label %[[LOOP_EXIT:.*]]
-; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[START]] ]
-; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
-; CHECK:       [[LOOP_BODY]]:
-; CHECK-NEXT:    [[INVAR_021:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INVAR_INC11:%.*]], %[[AFTER:.*]] ]
-; CHECK-NEXT:    br i1 false, label %[[AFTER]], label %[[CHECK_TRUE:.*]]
-; CHECK:       [[LOOP_EXIT]]:
-; CHECK-NEXT:    ret i64 0
-; CHECK:       [[AFTER]]:
-; CHECK-NEXT:    [[RET_VALUE_ADDR_0:%.*]] = phi double [ [[TMP32:%.*]], %[[CHECK_TRUE]] ], [ 0.000000e+00, %[[LOOP_BODY]] ]
-; CHECK-NEXT:    store double [[RET_VALUE_ADDR_0]], ptr null, align 8
-; CHECK-NEXT:    [[INVAR_INC11]] = add i64 [[INVAR_021]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INVAR_021]], 1
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[LOOP_EXIT]], label %[[LOOP_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK:       [[CHECK_TRUE]]:
-; CHECK-NEXT:    [[TMP27:%.*]] = sub nsw i64 [[INVAR_021]], 0
-; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32
-; CHECK-NEXT:    [[TMP29:%.*]] = icmp sgt i32 0, [[TMP28]]
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr [16 x [16 x double]], ptr [[ARG2]], i64 0, i64 [[DIM]], i64 [[TMP27]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load double, ptr [[TMP30]], align 8
-; CHECK-NEXT:    [[TMP32]] = select i1 [[TMP29]], double 1.000000e+00, double 0.000000e+00
-; CHECK-NEXT:    br label %[[AFTER]]
-;
-start:
-  br label %loop_body
-
-loop_body:                                        ; preds = %after, %start
-  %invar.021 = phi i64 [ 0, %start ], [ %invar.inc11, %after ]
-  br i1 false, label %after, label %check_true
-
-loop_exit:                                        ; preds = %after
-  ret i64 0
-
-after:                                            ; preds = %check_true, %loop_body
-  %ret_value_addr.0 = phi double [ %5, %check_true ], [ 0.000000e+00, %loop_body ]
-  store double %ret_value_addr.0, ptr null, align 8
-  %invar.inc11 = add i64 %invar.021, 1
-  %exitcond = icmp eq i64 %invar.021, 1
-  br i1 %exitcond, label %loop_exit, label %loop_body
-
-check_true:                                       ; preds = %loop_body
-  %0 = sub nsw i64 %invar.021, 0
-  %1 = trunc i64 %0 to i32
-  %2 = icmp sgt i32 0, %1
-  %3 = getelementptr [16 x [16 x double]], ptr %arg2, i64 0, i64 %dim, i64 %0
-  %4 = load double, ptr %3, align 8
-  %5 = select i1 %2, double 1.000000e+00, double 0.000000e+00
-  br label %after
-}
-
-attributes #0 = { "target-features"="+avx" }
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-;.

>From 53207947576f4a38f3cee7b825d5e2208d2de25c Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Tue, 8 Jul 2025 10:52:27 -0400
Subject: [PATCH 4/4] Oops, the "expected" output was from the pre-fix opt in
 last commit.

---
 .../X86/drop-poison-generating-flags.ll          | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
index 0739683221fe7..49d33d3087ed2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll
@@ -357,14 +357,14 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input,
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP6:%.*]] = sub nuw nsw i64 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP7:%.*]] = sub nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = sub nuw nsw i64 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP9:%.*]] = sub nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[INPUT]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP10]], i32 0
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2



More information about the llvm-commits mailing list