[llvm] 55c6008 - [SLP][AArch64] Incorrectly estimated intrinsic as a function call.
Dinar Temirbulatov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 3 11:46:35 PST 2023
Author: Dinar Temirbulatov
Date: 2023-01-03T19:45:24Z
New Revision: 55c600819f92ed33bef868b5056b699915d645fa
URL: https://github.com/llvm/llvm-project/commit/55c600819f92ed33bef868b5056b699915d645fa
DIFF: https://github.com/llvm/llvm-project/commit/55c600819f92ed33bef868b5056b699915d645fa.diff
LOG: [SLP][AArch64] Incorrectly estimated intrinsic as a function call.
We incorrectly assume intrinsic as a function call and it prevents us from
the opportunity to vectorize. On Aarch64 Cortex-A53 we think that
llvm.fmuladd.f64 is a function call which is wrong.
Differential Revision: https://reviews.llvm.org/D140392
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/fmulladd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f11dc59d02f4..947c4568e997 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7567,9 +7567,30 @@ InstructionCost BoUpSLP::getSpillCost() const {
continue;
}
+ auto NoCallIntrinsic = [this](Instruction *I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->isAssumeLikeIntrinsic())
+ return true;
+ FastMathFlags FMF;
+ SmallVector<Type *, 4> Tys;
+ for (auto &ArgOp : II->args())
+ Tys.push_back(ArgOp->getType());
+ if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+ FMF = FPMO->getFastMathFlags();
+ IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys,
+ FMF);
+ InstructionCost IntrCost =
+ TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
+ InstructionCost CallCost = TTI->getCallInstrCost(
+ nullptr, II->getType(), Tys, TTI::TCK_RecipThroughput);
+ if (IntrCost < CallCost)
+ return true;
+ }
+ return false;
+ };
+
// Debug information does not impact spill cost.
- if ((isa<CallInst>(&*PrevInstIt) &&
- !isa<DbgInfoIntrinsic>(&*PrevInstIt)) &&
+ if (isa<CallInst>(&*PrevInstIt) && !NoCallIntrinsic(&*PrevInstIt) &&
&*PrevInstIt != PrevInst)
NumCalls++;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fmulladd.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fmulladd.ll
index 16df20505323..24d2ed97d0fa 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/fmulladd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fmulladd.ll
@@ -8,35 +8,30 @@ define void @foo(ptr %d, ptr %e) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x double], ptr [[D:%.*]], i64 2
-; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x double], ptr [[D]], i64 0, i64 3
-; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x double], ptr [[E:%.*]], i64 3
-; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = tail call double @llvm.fmuladd.f64(double [[TMP1]], double [[TMP2]], double [[TMP0]])
-; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [4 x double], ptr [[D]], i64 2, i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX8]], align 8
-; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [4 x double], ptr [[E]], i64 3, i64 1
-; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX12]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = tail call double @llvm.fmuladd.f64(double [[TMP1]], double [[TMP5]], double [[TMP4]])
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [4 x double], ptr [[D]], i64 1, i64 3
-; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr [[ARRAYIDX15]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX15]], align 8
; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [4 x double], ptr [[D]], i64 3, i64 2
-; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX17]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX17]], align 8
; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [4 x double], ptr [[E]], i64 2, i64 3
-; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX19]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = tail call double @llvm.fmuladd.f64(double [[TMP8]], double [[TMP9]], double [[TMP7]])
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[ARRAYIDX19]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = tail call double @llvm.fmuladd.f64(double [[TMP2]], double [[TMP3]], double [[TMP1]])
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [4 x double], ptr [[D]], i64 3, i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX21]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX21]], align 8
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [4 x double], ptr [[E]], i64 3, i64 3
-; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX23]], align 8
-; CHECK-NEXT: [[TMP13:%.*]] = tail call double @llvm.fmuladd.f64(double [[TMP11]], double [[TMP12]], double [[TMP10]])
-; CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr @a, align 8
-; CHECK-NEXT: store double [[TMP3]], ptr [[TMP14]], align 8
-; CHECK-NEXT: [[F_SROA_4_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 8
-; CHECK-NEXT: store double [[TMP6]], ptr [[F_SROA_4_0__SROA_IDX]], align 8
-; CHECK-NEXT: [[F_SROA_539_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 40
-; CHECK-NEXT: store double [[TMP13]], ptr [[F_SROA_539_0__SROA_IDX]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[ARRAYIDX23]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = tail call double @llvm.fmuladd.f64(double [[TMP5]], double [[TMP6]], double [[TMP4]])
+; CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr @a, align 8
+; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP12]], <2 x double> [[TMP10]], <2 x double> [[TMP9]])
+; CHECK-NEXT: store <2 x double> [[TMP13]], ptr [[TMP8]], align 8
+; CHECK-NEXT: [[F_SROA_539_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 40
+; CHECK-NEXT: store double [[TMP7]], ptr [[F_SROA_539_0__SROA_IDX]], align 8
; CHECK-NEXT: store i32 6, ptr @b, align 4
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list