[llvm] e724226 - [VPlan] Return cost of 0 for VPWidenCastRecipe without underlying value.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 25 12:33:06 PDT 2024
Author: Florian Hahn
Date: 2024-10-25T21:25:44+02:00
New Revision: e724226da753f10fd36fbb0ea392f04ab0fdbdab
URL: https://github.com/llvm/llvm-project/commit/e724226da753f10fd36fbb0ea392f04ab0fdbdab
DIFF: https://github.com/llvm/llvm-project/commit/e724226da753f10fd36fbb0ea392f04ab0fdbdab.diff
LOG: [VPlan] Return cost of 0 for VPWidenCastRecipe without underlying value.
In some cases, VPWidenCastRecipes are created but not considered in the
legacy cost model, including truncates/extends when evaluating a reduction
in a smaller type. Return 0 for such casts for now, to avoid divergences
between VPlan and legacy cost models.
Fixes https://github.com/llvm/llvm-project/issues/113526.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0eb4f7c7c88cee..2080b77157b6ca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1524,6 +1524,11 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
+ // TODO: In some cases, VPWidenCastRecipes are created but not considered in
+ // the legacy cost model, including truncates/extends when evaluating a
+ // reduction in a smaller type.
+ if (!getUnderlyingValue())
+ return 0;
// Computes the CastContextHint from a recipes that may access memory.
auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
if (VF.isScalar())
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 73647919aac360..29e54fabad0c1b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -1037,6 +1037,71 @@ exit:
ret i64 %red.mul
}
+; Test case for https://github.com/llvm/llvm-project/issues/113526.
+define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
+; CHECK-LABEL: @narrowed_reduction(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP:%.*]] to i32
+; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP6]] = zext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP7]] = zext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i1>
+; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i1> [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[BIN_RDX]])
+; CHECK-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR13]], 1
+; CHECK-NEXT: [[OR]] = or i32 [[AND]], [[CONV]]
+; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[OR_LCSSA]]
+;
+entry:
+ %conv = zext i1 %cmp to i32
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 1, %entry ], [ %inc, %loop ]
+ %or13 = phi i32 [ 0, %entry ], [ %or, %loop ]
+ %and = and i32 %or13, 1
+ %or = or i32 %and, %conv
+ %inc = add i32 %iv, 1
+ %ec = icmp eq i32 %iv, 0
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %or
+}
+
declare void @llvm.assume(i1 noundef) #0
attributes #0 = { "target-cpu"="penryn" }
More information about the llvm-commits
mailing list