[llvm] [BasicTTIImpl] Don't unroll loops with vector insts (PR #147982)

Thu Jul 10 08:15:08 PDT 2025

llvmbot wrote:



@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-powerpc

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>

The vectorizer should have already picked a profitable interleave count, and unrolling is counter-productive at this stage. AArch64 and RISCV already do this in their overrides of getUnrollingPreferences.

---
Full diff: https://github.com/llvm/llvm-project/pull/147982.diff


5 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+18-4) 
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (-19) 
- (modified) llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll (+6-43) 
- (modified) llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll (+6-43) 
- (modified) llvm/test/Transforms/LoopUnroll/X86/partial.ll (+1-2) 


``````````diff

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 829b84e02542d..6c6a3c4a779af 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -718,14 +718,28 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     else
       return;
 
-    // Scan the loop: don't unroll loops with calls.
     for (BasicBlock *BB : L->blocks()) {
       for (Instruction &I : *BB) {
-        if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
-          if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
+        // Don't unroll loops containing vectorized instructions, as the
+        // vectorizer would have already picked a profitable interleave count.
+        if (I.getType()->isVectorTy()) {
+          if (ORE) {
+            ORE->emit([&]() {
+              return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
+                                        L->getHeader())
+                     << "advising against unrolling the loop because it "
+                        "contains a "
+                     << ore::NV("vectorized instruction", &I);
+            });
+          }
+          return;
+        }
+
+        // Don't unroll loops with calls, as this could prevent inlining.
+        if (auto *Call = dyn_cast<CallBase>(&I)) {
+          if (const Function *F = Call->getCalledFunction())
             if (!thisT()->isLoweredToCall(F))
               continue;
-          }
 
           if (ORE) {
             ORE->emit([&]() {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 20e7726558117..cd0059050200c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4894,25 +4894,6 @@ void AArch64TTIImpl::getUnrollingPreferences(
   // Disable partial & runtime unrolling on -Os.
   UP.PartialOptSizeThreshold = 0;
 
-  // Scan the loop: don't unroll loops with calls as this could prevent
-  // inlining. Don't unroll vector loops either, as they don't benefit much from
-  // unrolling.
-  for (auto *BB : L->getBlocks()) {
-    for (auto &I : *BB) {
-      // Don't unroll vectorised loop.
-      if (I.getType()->isVectorTy())
-        return;
-
-      if (isa<CallBase>(I)) {
-        if (isa<CallInst>(I) || isa<InvokeInst>(I))
-          if (const Function *F = cast<CallBase>(I).getCalledFunction())
-            if (!isLoweredToCall(F))
-              continue;
-        return;
-      }
-    }
-  }
-
   // Apply subtarget-specific unrolling preferences.
   switch (ST->getProcFamily()) {
   case AArch64Subtarget::AppleA14:
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
index 456875ecb7d1f..e8c8701990f7c 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
@@ -19,57 +19,20 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1
-; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
-; CHECK:       vector.ph.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], splat (i32 16)
-; CHECK-NEXT:    [[TMP9:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND_NEXT13]]
-; CHECK-NEXT:    [[TMP10:%.*]] = and <16 x i32> [[TMP9]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]]
-; CHECK-NEXT:    store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT_1]] = add i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], splat (i32 32)
-; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
-; CHECK:       middle.block.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[INDEX_UNR_PH:%.*]] = phi i64 [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND12_UNR_PH:%.*]] = phi <16 x i32> [ [[VEC_IND_NEXT13_1]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    br label [[MIDDLE_BLOCK_UNR_LCSSA]]
-; CHECK:       middle.block.unr-lcssa:
-; CHECK-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
-; CHECK:       vector.body.epil.preheader:
-; CHECK-NEXT:    br label [[VECTOR_BODY_EPIL:%.*]]
-; CHECK:       vector.body.epil:
-; CHECK-NEXT:    [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]]
-; CHECK-NEXT:    [[TMP15:%.*]] = and <16 x i32> [[TMP14]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]]
-; CHECK-NEXT:    store <16 x i8> [[TMP17]], ptr [[TMP18]], align 1
-; CHECK-NEXT:    br label [[MIDDLE_BLOCK]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
+; CHECK-NEXT:    [[VEC_IND_NEXT13]] = add <16 x i32> [[VEC_IND12]], splat (i32 16)
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]]
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
index cd4198f8160f7..ebf4b487f2e01 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
@@ -19,57 +19,20 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> undef, i32 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1
-; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
-; CHECK:       vector.ph.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], splat (i32 16)
-; CHECK-NEXT:    [[TMP9:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND_NEXT13]]
-; CHECK-NEXT:    [[TMP10:%.*]] = and <16 x i32> [[TMP9]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]]
-; CHECK-NEXT:    store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT_1]] = add i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], splat (i32 32)
-; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
-; CHECK:       middle.block.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[INDEX_UNR_PH:%.*]] = phi i64 [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND12_UNR_PH:%.*]] = phi <16 x i32> [ [[VEC_IND_NEXT13_1]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    br label [[MIDDLE_BLOCK_UNR_LCSSA]]
-; CHECK:       middle.block.unr-lcssa:
-; CHECK-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
-; CHECK:       vector.body.epil.preheader:
-; CHECK-NEXT:    br label [[VECTOR_BODY_EPIL:%.*]]
-; CHECK:       vector.body.epil:
-; CHECK-NEXT:    [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]]
-; CHECK-NEXT:    [[TMP15:%.*]] = and <16 x i32> [[TMP14]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]]
-; CHECK-NEXT:    store <16 x i8> [[TMP17]], ptr [[TMP18]], align 1
-; CHECK-NEXT:    br label [[MIDDLE_BLOCK]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
+; CHECK-NEXT:    [[VEC_IND_NEXT13]] = add <16 x i32> [[VEC_IND12]], splat (i32 16)
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]]
diff --git a/llvm/test/Transforms/LoopUnroll/X86/partial.ll b/llvm/test/Transforms/LoopUnroll/X86/partial.ll
index eec3b4bbaebf0..7372d515125ed 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/partial.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/partial.ll
@@ -58,8 +58,7 @@ vector.body:                                      ; preds = %vector.body, %entry
 ; CHECK-LABEL: @bar
 ; CHECK: fadd
 ; CHECK-NEXT: fmul
-; CHECK: fadd
-; CHECK-NEXT: fmul
+; CHECK-NOT: fadd
 
 ; CHECK-NOUNRL-LABEL: @bar
 ; CHECK-NOUNRL: fadd

``````````

</details>


https://github.com/llvm/llvm-project/pull/147982