[llvm] 3200385 - [VPlan] Return cost of PHI for scalar VFs in computeCost for FORs.

Thu Nov 21 13:11:43 PST 2024

Author: Florian Hahn
Date: 2024-11-21T21:11:21Z
New Revision: 320038579d3c23b78f99618b71640f51423fe321

URL: https://github.com/llvm/llvm-project/commit/320038579d3c23b78f99618b71640f51423fe321
DIFF: https://github.com/llvm/llvm-project/commit/320038579d3c23b78f99618b71640f51423fe321.diff

LOG: [VPlan] Return cost of PHI for scalar VFs in computeCost for FORs.

This fixes a crash when the VF is scalar.

Fixes https://github.com/llvm/llvm-project/issues/116375.

Added: 
    llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 71aca3be9e5dcb..24cf4666c62ce3 100644

--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3317,6 +3317,10 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
 InstructionCost
 VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
                                              VPCostContext &Ctx) const {
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+  if (VF.isScalar())
+  return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
+
   if (VF.isScalable() && VF.getKnownMinValue() == 1)
     return InstructionCost::getInvalid();
 
@@ -3325,7 +3329,6 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
   Type *VectorTy =
       ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
 
-  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
                                 cast<VectorType>(VectorTy), Mask, CostKind,
                                 VF.getKnownMinValue() - 1);

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
new file mode 100644
index 00000000000000..757d9e75a339b2
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="loop-vectorize" -pass-remarks-output=%t.yaml -S %s | FileCheck %s
+; RUN: FileCheck --input-file=%t.yaml --check-prefix=REMARKS %s
+
+; REMARKS: the cost-model indicates that vectorization is not beneficial
+
+; Test for https://github.com/llvm/llvm-project/issues/116375.
+define void @test_i24_load_for(ptr noalias %src, ptr %dst) {
+; CHECK-LABEL: define void @test_i24_load_for(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[FOR:%.*]] = phi i24 [ 0, %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds i24, ptr [[SRC]], i16 [[IV]]
+; CHECK-NEXT:    [[FOR_NEXT]] = load i24, ptr [[GEP_SRC]], align 1
+; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i24, ptr [[DST]], i16 [[IV]]
+; CHECK-NEXT:    store i24 [[FOR]], ptr [[GEP_DST]], align 4
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
+  %for = phi i24 [ 0, %entry ], [ %for.next, %loop ]
+  %iv.next = add i16 %iv, 1
+  %gep.src = getelementptr inbounds i24, ptr %src, i16 %iv
+  %for.next = load i24, ptr %gep.src, align 1
+  %gep.dst = getelementptr inbounds i24, ptr %dst, i16 %iv
+  store i24 %for, ptr %gep.dst
+  %ec = icmp eq i16 %iv.next, 1000
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}