[llvm] dd94537 - [LV] Update call widening decision when scalarzing calls.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 06:13:08 PDT 2024
Author: Florian Hahn
Date: 2024-09-03T14:12:41+01:00
New Revision: dd94537b40cfb8c480df27c08fc715ce91ba4089
URL: https://github.com/llvm/llvm-project/commit/dd94537b40cfb8c480df27c08fc715ce91ba4089
DIFF: https://github.com/llvm/llvm-project/commit/dd94537b40cfb8c480df27c08fc715ce91ba4089.diff
LOG: [LV] Update call widening decision when scalarzing calls.
collectInstsToScalarize may decide to scalarize a call. If so, we have
to update the widening decision for the call, otherwise the call won't
be scalarized as expected during VPlan construction.
This issue was uncovered by f82543d509.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a64848c1fdcebd..17050b2b433caa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5386,8 +5386,18 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// 3. Emulated masked memrefs, if a hacked cost is needed.
if (!isScalarAfterVectorization(&I, VF) && !VF.isScalable() &&
!useEmulatedMaskMemRefHack(&I, VF) &&
- computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
+ computePredInstDiscount(&I, ScalarCosts, VF) >= 0) {
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
+ // Check if we decided to scalarize a call. If so, update the widening
+ // decision of the call to CM_Scalarize with the computed scalar cost.
+ for (const auto &[I, _] : ScalarCosts) {
+ auto *CI = dyn_cast<CallInst>(I);
+ if (!CI || !CallWideningDecisions.contains({CI, VF}))
+ continue;
+ CallWideningDecisions[{CI, VF}].Kind = CM_Scalarize;
+ CallWideningDecisions[{CI, VF}].Cost = ScalarCosts[CI];
+ }
+ }
// Remember that BB will remain after vectorization.
PredicatedBBsAfterVectorization[VF].insert(BB);
for (auto *Pred : predecessors(BB)) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
index 0c3a8d342af1a8..a3f9459f2fc67e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
@@ -126,6 +126,59 @@ exit:
ret void
}
+define void @call_scalarized(ptr noalias %src, ptr noalias %dst, double %0) {
+; CHECK-LABEL: define void @call_scalarized(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], double [[TMP0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_SRC]], align 8
+; CHECK-NEXT: [[CMP295:%.*]] = fcmp ugt double [[TMP0]], 0.000000e+00
+; CHECK-NEXT: [[CMP299:%.*]] = fcmp ugt double [[L]], 0.000000e+00
+; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP295]], [[CMP299]]
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP_LATCH]], label %[[THEN:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[L]])
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV_NEXT]]
+; CHECK-NEXT: store double [[SQRT]], ptr [[GEP_DST]], align 8
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 100, %entry ], [ %iv.next, %loop.latch ]
+ %iv.next = add i64 %iv, -1
+ %gep.src = getelementptr double, ptr %src, i64 %iv.next
+ %l = load double, ptr %gep.src, align 8
+ %cmp295 = fcmp ugt double %0, 0.000000e+00
+ %cmp299 = fcmp ugt double %l, 0.000000e+00
+ %or.cond = or i1 %cmp295, %cmp299
+ br i1 %or.cond, label %loop.latch, label %then
+
+then:
+ %sqrt = call double @llvm.sqrt.f64(double %l)
+ %gep.dst = getelementptr double, ptr %dst, i64 %iv.next
+ store double %sqrt, ptr %gep.dst, align 8
+ br label %loop.latch
+
+loop.latch:
+ %tobool.not = icmp eq i64 %iv.next, 0
+ br i1 %tobool.not, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+declare double @llvm.sqrt.f64(double) #0
declare double @llvm.powi.f64.i32(double, i32)
declare i64 @llvm.fshl.i64(i64, i64, i64)
;.
More information about the llvm-commits
mailing list