[llvm] [VPlan] Don't cost FOR splice if unused in legacy cost model (PR #131486)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 15 19:36:29 PDT 2025


https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/131486

Fixes #131359

After #129645, a first-order recurrence will no longer have it's splice costed if the VPInstruction::FirstOrderRecurrenceSplice has no users and is dead.

The legacy cost model didn't account for this, so update this to avoid the "VPlan cost model and legacy cost model disagreed" assertion.

Alternatively we could also account for this in planContainsAdditionalSimplifications


>From 3e3a16732e3ec81ad781e4ac7b1a4d4fc32e0c6c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sun, 16 Mar 2025 10:32:57 +0800
Subject: [PATCH] [VPlan] Don't cost FOR splice if unused in legacy cost model

Fixes #131359

After #129645, a first-order recurrence will no longer have it's splice costed if the VPInstruction::FirstOrderRecurrenceSplice has no users and is dead.

The legacy cost model didn't account for this, so update this to avoid the "VPlan cost model and legacy cost model disagreed" assertion.

Alternatively we could also account for this in planContainsAdditionalSimplifications
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  5 ++
 .../Transforms/LoopVectorize/X86/pr131359.ll  | 61 +++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr131359.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 08e125eca591e..f6b91000a2e7b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6541,6 +6541,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
       // TODO: Consider vscale_range info.
       if (VF.isScalable() && VF.getKnownMinValue() == 1)
         return InstructionCost::getInvalid();
+      // If a FOR has no users inside the loop we won't generate a splice.
+      if (none_of(Phi->users(), [this](User *U) {
+            return TheLoop->contains(cast<Instruction>(U));
+          }))
+        return 0;
       SmallVector<int> Mask(VF.getKnownMinValue());
       std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
       return TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr131359.ll b/llvm/test/Transforms/LoopVectorize/X86/pr131359.ll
new file mode 100644
index 0000000000000..2d0c0ce891ae7
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr131359.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+; Make sure the legacy cost model doesn't add a cost for a splice when the
+; first-order recurrence isn't used inside the loop. The VPlan cost model
+; eliminates the dead VPInstruction::FirstOrderRecurrenceSplice so the two cost
+; models would go out of sync otherwise.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64"
+
+define void @h() {
+; CHECK-LABEL: define void @h() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
+; CHECK-NEXT:    br i1 false, label %[[F_EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[FOR_COND_I:.*]]
+; CHECK:       [[FOR_COND_I]]:
+; CHECK-NEXT:    [[D_0_I:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[FOR_COND_I]] ]
+; CHECK-NEXT:    [[E_0_I]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[FOR_COND_I]] ]
+; CHECK-NEXT:    [[INC_I]] = add i32 [[E_0_I]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT_I]], label %[[F_EXIT]], label %[[FOR_COND_I]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[F_EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.cond.i
+
+for.cond.i:
+  %d.0.i = phi i32 [ 0, %entry ], [ %e.0.i, %for.cond.i ]
+  %e.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.cond.i ]
+  %inc.i = add i32 %e.0.i, 1
+  %exitcond.not.i = icmp eq i32 %e.0.i, 43
+  br i1 %exitcond.not.i, label %f.exit, label %for.cond.i
+
+f.exit:
+  ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.



More information about the llvm-commits mailing list