[llvm] [VPlan] Only apply forced cost to recipes with underlying values. (PR #168372)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 21 02:17:17 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/168372
>From 3aecdf12b9b2a4c70f3d37bc2b7cbb2f1cfa9c4d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 17 Nov 2025 14:06:43 +0000
Subject: [PATCH] [VPlan] Only apply forced cost to recipes with underlying
values.
Only apply forced instruction costs to recipes with underlying values to
match the legacy cost model. A VPlan may have a number of additional
VPInstructions without underlying values that are not considered for its
cost, and assigning forced costs to them would incorrectly inflate its
cost.
This fixes a cost divergence between legacy and VPlan-based cost models
with forced instruction costs.
---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 11 +-
.../AArch64/force-target-instruction-cost.ll | 118 ++++++++++++++++++
2 files changed, 126 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index cf95b4eac9d75..e91ab4fcafee4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -278,9 +278,14 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
RecipeCost = 0;
} else {
RecipeCost = computeCost(VF, Ctx);
- if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
- RecipeCost.isValid())
- RecipeCost = InstructionCost(ForceTargetInstructionCost);
+ RecipeCost = computeCost(VF, Ctx);
+ if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
+ RecipeCost.isValid()) {
+ if (UI)
+ RecipeCost = InstructionCost(ForceTargetInstructionCost);
+ else
+ RecipeCost = InstructionCost(0);
+ }
}
LLVM_DEBUG({
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 29bbd015eed1f..a780b6409b93e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -380,6 +380,124 @@ for.end:
ret void
}
+define void @loop_with_freeze_and_conditional_srem(ptr %dst, ptr %keyinfo, ptr %invariant.ptr, i32 %divisor) #1 {
+; COMMON-LABEL: define void @loop_with_freeze_and_conditional_srem(
+; COMMON-SAME: ptr [[DST:%.*]], ptr [[KEYINFO:%.*]], ptr [[INVARIANT_PTR:%.*]], i32 [[DIVISOR:%.*]]) {
+; COMMON-NEXT: [[ENTRY:.*:]]
+; COMMON-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
+; COMMON: [[VECTOR_MEMCHECK]]:
+; COMMON-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
+; COMMON-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[KEYINFO]], i64 4
+; COMMON-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 4
+; COMMON-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; COMMON-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[KEYINFO]], [[SCEVGEP]]
+; COMMON-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; COMMON-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
+; COMMON-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]]
+; COMMON-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; COMMON-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; COMMON-NEXT: [[BOUND06:%.*]] = icmp ult ptr [[KEYINFO]], [[SCEVGEP2]]
+; COMMON-NEXT: [[BOUND17:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP1]]
+; COMMON-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]
+; COMMON-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]]
+; COMMON-NEXT: br i1 [[CONFLICT_RDX9]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; COMMON: [[VECTOR_PH]]:
+; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
+; COMMON: [[VECTOR_BODY]]:
+; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE23:.*]] ]
+; COMMON-NEXT: [[TMP0:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META16:![0-9]+]]
+; COMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
+; COMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; COMMON-NEXT: [[TMP1:%.*]] = freeze <4 x i32> [[BROADCAST_SPLAT]]
+; COMMON-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
+; COMMON-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true)
+; COMMON-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
+; COMMON-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; COMMON: [[PRED_STORE_IF]]:
+; COMMON-NEXT: [[TMP5:%.*]] = srem i32 1, [[DIVISOR]]
+; COMMON-NEXT: store i32 [[TMP5]], ptr [[DST]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META21:![0-9]+]]
+; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; COMMON: [[PRED_STORE_CONTINUE]]:
+; COMMON-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
+; COMMON-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]]
+; COMMON: [[PRED_STORE_IF10]]:
+; COMMON-NEXT: [[TMP7:%.*]] = srem i32 1, [[DIVISOR]]
+; COMMON-NEXT: store i32 [[TMP7]], ptr [[DST]], align 4, !alias.scope [[META19]], !noalias [[META21]]
+; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE11]]
+; COMMON: [[PRED_STORE_CONTINUE11]]:
+; COMMON-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
+; COMMON-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
+; COMMON: [[PRED_STORE_IF12]]:
+; COMMON-NEXT: [[TMP9:%.*]] = srem i32 1, [[DIVISOR]]
+; COMMON-NEXT: store i32 [[TMP9]], ptr [[DST]], align 4, !alias.scope [[META19]], !noalias [[META21]]
+; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE13]]
+; COMMON: [[PRED_STORE_CONTINUE13]]:
+; COMMON-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; COMMON-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
+; COMMON: [[PRED_STORE_IF14]]:
+; COMMON-NEXT: [[TMP11:%.*]] = srem i32 1, [[DIVISOR]]
+; COMMON-NEXT: store i32 [[TMP11]], ptr [[DST]], align 4, !alias.scope [[META19]], !noalias [[META21]]
+; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE15]]
+; COMMON: [[PRED_STORE_CONTINUE15]]:
+; COMMON-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
+; COMMON-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
+; COMMON: [[PRED_STORE_IF16]]:
+; COMMON-NEXT: store i32 0, ptr [[KEYINFO]], align 4, !alias.scope [[META23:![0-9]+]], !noalias [[META16]]
+; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE17]]
+; COMMON: [[PRED_STORE_CONTINUE17]]:
+; COMMON-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
+; COMMON-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
+; COMMON: [[PRED_STORE_IF18]]:
+; COMMON-NEXT: store i32 0, ptr [[KEYINFO]], align 4, !alias.scope [[META23]], !noalias [[META16]]
+; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE19]]
+; COMMON: [[PRED_STORE_CONTINUE19]]:
+; COMMON-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
+; COMMON-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
+; COMMON: [[PRED_STORE_IF20]]:
+; COMMON-NEXT: store i32 0, ptr [[KEYINFO]], align 4, !alias.scope [[META23]], !noalias [[META16]]
+; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE21]]
+; COMMON: [[PRED_STORE_CONTINUE21]]:
+; COMMON-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
+; COMMON-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23]]
+; COMMON: [[PRED_STORE_IF22]]:
+; COMMON-NEXT: store i32 0, ptr [[KEYINFO]], align 4, !alias.scope [[META23]], !noalias [[META16]]
+; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE23]]
+; COMMON: [[PRED_STORE_CONTINUE23]]:
+; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; COMMON-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
+; COMMON-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; COMMON: [[MIDDLE_BLOCK]]:
+; COMMON-NEXT: br label %[[SCALAR_PH]]
+; COMMON: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %loaded = load i32, ptr %invariant.ptr, align 4
+ %frozen = freeze i32 %loaded
+ %cmp = icmp eq i32 %frozen, 0
+ br i1 %cmp, label %if.zero, label %if.nonzero
+
+if.zero: ; preds = %loop
+ store i32 0, ptr %keyinfo, align 4
+ br label %loop.latch
+
+if.nonzero: ; preds = %loop
+ %rem = srem i32 1, %divisor
+ store i32 %rem, ptr %dst, align 4
+ br label %loop.latch
+
+loop.latch: ; preds = %if.nonzero, %if.zero
+ %iv.next = add i64 %iv, 1
+ %exitcond = icmp eq i64 %iv, 32
+ br i1 %exitcond, label %exit, label %loop
+
+exit: ; preds = %loop.latch
+ ret void
+}
+
attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }
declare void @llvm.assume(i1 noundef)
More information about the llvm-commits
mailing list