[llvm] eef5ea0 - [VPlan] Account for dead FOR splice simplification in cost model (#131486)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 09:00:58 PDT 2025
Author: Luke Lau
Date: 2025-03-18T00:00:54+08:00
New Revision: eef5ea0c42fc07ef2c948be59b57d0df8ec801ca
URL: https://github.com/llvm/llvm-project/commit/eef5ea0c42fc07ef2c948be59b57d0df8ec801ca
DIFF: https://github.com/llvm/llvm-project/commit/eef5ea0c42fc07ef2c948be59b57d0df8ec801ca.diff
LOG: [VPlan] Account for dead FOR splice simplification in cost model (#131486)
Fixes #131359
After #129645, a first-order recurrence will no longer have it's splice
costed if the VPInstruction::FirstOrderRecurrenceSplice has no users and
is dead.
The legacy cost model didn't account for this, so this accounts for it
in planContainsAdditionalSimplifications to avoid the "VPlan cost model
and legacy cost model disagreed" assertion.
Added:
llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b26b36f91a393..0e38ccc565ea2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7467,6 +7467,16 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
}
continue;
}
+ // Unused FOR splices are removed by VPlan transforms, so the VPlan-based
+ // cost model won't cost it whilst the legacy will.
+ if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) {
+ if (none_of(FOR->users(), [](VPUser *U) {
+ auto *VPI = dyn_cast<VPInstruction>(U);
+ return VPI && VPI->getOpcode() ==
+ VPInstruction::FirstOrderRecurrenceSplice;
+ }))
+ return true;
+ }
// The VPlan-based cost model is more accurate for partial reduction and
// comparing against the legacy cost isn't desirable.
if (isa<VPPartialReductionRecipe>(&R))
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll b/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll
new file mode 100644
index 0000000000000..bcfa212cf3644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+; If a FOR isn't used the VPInstruction::FirstOrderRecurrenceSplice will be dead
+; and won't be costed in the VPlan cost model. Make sure we account for this
+; simplifcation in comparison to the legacy cost model.
+
+target triple = "x86_64"
+
+define void @no_use() {
+; CHECK-LABEL: define void @no_use() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
+; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[E_0_I]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
+; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %for = phi i32 [ 0, %entry ], [ %iv, %loop ]
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %exitcond.not.i = icmp eq i32 %iv, 43
+ br i1 %exitcond.not.i, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @dead_use() {
+; CHECK-LABEL: define void @dead_use() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
+; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[D_0_I:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[E_0_I]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[DEAD:%.*]] = add i32 [[D_0_I]], 1
+; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
+; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %for = phi i32 [ 0, %entry ], [ %iv, %loop ]
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %dead = add i32 %for, 1
+ %iv.next = add i32 %iv, 1
+ %exitcond.not.i = icmp eq i32 %iv, 43
+ br i1 %exitcond.not.i, label %exit, label %loop
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list