[llvm] 8ec7ea3 - [CostModel] make default size cost for libcalls small (again)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 14 05:49:02 PST 2020
Author: Sanjay Patel
Date: 2020-11-14T08:15:35-05:00
New Revision: 8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c
URL: https://github.com/llvm/llvm-project/commit/8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c
DIFF: https://github.com/llvm/llvm-project/commit/8ec7ea3ddce7379e13e8dfb4a5260a6d2004aa1c.diff
LOG: [CostModel] make default size cost for libcalls small (again)
This was changed recently with D90554 / f7eac51b9b3f
...because we had a regression testing blindspot for intrinsics
that are expected to be lowered to libcalls.
In general, we want the *size* cost for a scalar call to be cheap
even if the other costs are expensive - we expect it to just be
a branch with some optional stack manipulation.
It is likely that we will want to carve out some
exceptions/overrides to this rule as follow-up patches for
calls that have some general and/or target-specific difference
to the expected lowering.
This was noticed as a regression in unrolling, so we have a test
for that now along with a couple of direct cost model tests.
If the assumed scalarization costs for the oversized vector
calls are not realistic, that would be another follow-up
refinement of the cost models.
Added:
Modified:
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 663c9460cfba..cd9b85279c19 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1307,8 +1307,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
}
+ // Library call cost - other than size, make it expensive.
+ unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
SmallVector<unsigned, 2> ISDs;
- unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
switch (IID) {
default: {
// Assume that we need to scalarize this intrinsic.
diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index e8a648d4c470..66bb10e8474f 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -98,8 +98,8 @@ define void @log2(float %a, <16 x float> %va) {
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'log2'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'log2'
diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index b44424e46805..9cd8614764dc 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -129,8 +129,8 @@ define void @log2(float %a, <16 x float> %va) {
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'log2'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'log2'
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll b/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll
index 33ca2771d286..668bc69e5a0c 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-unroll -unroll-threshold=300 -S %s | FileCheck %s
-; This test was full unrolled and simplified at -O3 with clang 11.
+; This test was fully unrolled and simplified at -O3 with clang 11.
; Changes to the cost model may cause that decision to
diff er.
; We would not necessarily view the
diff erence as a regression,
; but we should be aware that cost model changes can affect an
@@ -20,45 +20,30 @@ define i32 @tripcount_11() {
; CHECK-NEXT: do.body6.preheader:
; CHECK-NEXT: br label [[DO_BODY6:%.*]]
; CHECK: for.cond:
-; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[DIV20:%.*]], 0
-; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_END:%.*]], label [[DO_BODY6]]
+; CHECK-NEXT: br i1 true, label [[FOR_COND_1:%.*]], label [[IF_THEN11:%.*]]
; CHECK: do.body6:
-; CHECK-NEXT: [[I_021:%.*]] = phi i32 [ [[DIV20]], [[FOR_COND:%.*]] ], [ 1024, [[DO_BODY6_PREHEADER:%.*]] ]
-; CHECK-NEXT: [[OR_I:%.*]] = or i32 [[I_021]], 1
-; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[OR_I]], i1 true)
-; CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[I_021]], [[T0]]
-; CHECK-NEXT: [[AND_I:%.*]] = lshr i32 [[SHL_I]], 26
-; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[AND_I]] to i8
-; CHECK-NEXT: [[CONV3_I:%.*]] = and i8 [[T1]], 31
-; CHECK-NEXT: [[AND4_I:%.*]] = lshr i32 [[SHL_I]], 11
-; CHECK-NEXT: [[CONV6_I:%.*]] = and i32 [[AND4_I]], 32767
-; CHECK-NEXT: [[IDXPROM_I:%.*]] = zext i8 [[CONV3_I]] to i64
-; CHECK-NEXT: [[ARRAYIDX_I7:%.*]] = getelementptr inbounds [33 x i16], [33 x i16]* @tab_log2, i64 0, i64 [[IDXPROM_I]]
-; CHECK-NEXT: [[T2:%.*]] = load i16, i16* [[ARRAYIDX_I7]], align 2
-; CHECK-NEXT: [[CONV7_I:%.*]] = zext i16 [[T2]] to i32
-; CHECK-NEXT: [[NARROW_I:%.*]] = add nuw nsw i8 [[CONV3_I]], 1
-; CHECK-NEXT: [[T3:%.*]] = zext i8 [[NARROW_I]] to i64
-; CHECK-NEXT: [[ARRAYIDX11_I:%.*]] = getelementptr inbounds [33 x i16], [33 x i16]* @tab_log2, i64 0, i64 [[T3]]
-; CHECK-NEXT: [[T4:%.*]] = load i16, i16* [[ARRAYIDX11_I]], align 2
-; CHECK-NEXT: [[CONV12_I:%.*]] = zext i16 [[T4]] to i32
-; CHECK-NEXT: [[SUB16_I:%.*]] = sub nsw i32 [[CONV12_I]], [[CONV7_I]]
-; CHECK-NEXT: [[MUL_I8:%.*]] = mul nsw i32 [[CONV6_I]], [[SUB16_I]]
-; CHECK-NEXT: [[SHR17_I:%.*]] = ashr i32 [[MUL_I8]], 15
-; CHECK-NEXT: [[CONV_I:%.*]] = shl nuw nsw i32 [[T0]], 15
-; CHECK-NEXT: [[SHL20_I:%.*]] = xor i32 [[CONV_I]], 1015808
-; CHECK-NEXT: [[ADD18_I:%.*]] = add nuw nsw i32 [[SHL20_I]], [[CONV7_I]]
-; CHECK-NEXT: [[ADD21_I:%.*]] = add nsw i32 [[ADD18_I]], [[SHR17_I]]
-; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[I_021]] to double
-; CHECK-NEXT: [[T5:%.*]] = tail call double @llvm.log2.f64(double [[CONV]])
-; CHECK-NEXT: [[CONV8:%.*]] = fptosi double [[T5]] to i32
-; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[CONV8]], 15
-; CHECK-NEXT: [[ADD:%.*]] = or i32 [[MUL]], 4
-; CHECK-NEXT: [[CMP9:%.*]] = icmp eq i32 [[ADD21_I]], [[ADD]]
-; CHECK-NEXT: [[DIV20]] = lshr i32 [[I_021]], 1
-; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_COND]], label [[IF_THEN11:%.*]]
+; CHECK-NEXT: br i1 true, label [[FOR_COND:%.*]], label [[IF_THEN11]]
; CHECK: if.then11:
; CHECK-NEXT: unreachable
-; CHECK: for.end:
+; CHECK: for.cond.1:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_2:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.2:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_3:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.3:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_4:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.4:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_5:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.5:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_6:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.6:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_7:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.7:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_8:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.8:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_9:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.9:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_10:%.*]], label [[IF_THEN11]]
+; CHECK: for.cond.10:
; CHECK-NEXT: ret i32 0
;
do.body6.preheader:
More information about the llvm-commits
mailing list