[llvm] [VPlan] Don't cost FOR splice if unused in legacy cost model (PR #131486)

via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 15 19:37:01 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Luke Lau (lukel97)

<details>
<summary>Changes</summary>

Fixes #<!-- -->131359

After #<!-- -->129645, a first-order recurrence will no longer have it's splice costed if the VPInstruction::FirstOrderRecurrenceSplice has no users and is dead.

The legacy cost model didn't account for this, so update this to avoid the "VPlan cost model and legacy cost model disagreed" assertion.

Alternatively we could also account for this in planContainsAdditionalSimplifications


---
Full diff: https://github.com/llvm/llvm-project/pull/131486.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+5) 
- (added) llvm/test/Transforms/LoopVectorize/X86/pr131359.ll (+61) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 08e125eca591e..f6b91000a2e7b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6541,6 +6541,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
       // TODO: Consider vscale_range info.
       if (VF.isScalable() && VF.getKnownMinValue() == 1)
         return InstructionCost::getInvalid();
+      // If a FOR has no users inside the loop we won't generate a splice.
+      if (none_of(Phi->users(), [this](User *U) {
+            return TheLoop->contains(cast<Instruction>(U));
+          }))
+        return 0;
       SmallVector<int> Mask(VF.getKnownMinValue());
       std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
       return TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr131359.ll b/llvm/test/Transforms/LoopVectorize/X86/pr131359.ll
new file mode 100644
index 0000000000000..2d0c0ce891ae7
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr131359.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+; Make sure the legacy cost model doesn't add a cost for a splice when the
+; first-order recurrence isn't used inside the loop. The VPlan cost model
+; eliminates the dead VPInstruction::FirstOrderRecurrenceSplice so the two cost
+; models would go out of sync otherwise.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64"
+
+define void @h() {
+; CHECK-LABEL: define void @h() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
+; CHECK-NEXT:    br i1 false, label %[[F_EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[FOR_COND_I:.*]]
+; CHECK:       [[FOR_COND_I]]:
+; CHECK-NEXT:    [[D_0_I:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[FOR_COND_I]] ]
+; CHECK-NEXT:    [[E_0_I]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[FOR_COND_I]] ]
+; CHECK-NEXT:    [[INC_I]] = add i32 [[E_0_I]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT_I]], label %[[F_EXIT]], label %[[FOR_COND_I]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[F_EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.cond.i
+
+for.cond.i:
+  %d.0.i = phi i32 [ 0, %entry ], [ %e.0.i, %for.cond.i ]
+  %e.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.cond.i ]
+  %inc.i = add i32 %e.0.i, 1
+  %exitcond.not.i = icmp eq i32 %e.0.i, 43
+  br i1 %exitcond.not.i, label %f.exit, label %for.cond.i
+
+f.exit:
+  ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.

``````````

</details>


https://github.com/llvm/llvm-project/pull/131486


More information about the llvm-commits mailing list