[llvm] 255e441 - X86: Do not return invalid cost for fp16 conversion (#114128)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 17:16:20 PDT 2024
Author: Matthias Braun
Date: 2024-10-29T17:16:17-07:00
New Revision: 255e441613e39a391e9f85d6a605cc9e46dcf273
URL: https://github.com/llvm/llvm-project/commit/255e441613e39a391e9f85d6a605cc9e46dcf273
DIFF: https://github.com/llvm/llvm-project/commit/255e441613e39a391e9f85d6a605cc9e46dcf273.diff
LOG: X86: Do not return invalid cost for fp16 conversion (#114128)
Returning invalid instruction costs when converting from/to fp16 in
`X86TTIImpl::getCastInstrCost` when there is no hardware support
available was triggering asserts. This changes the code to return a
large (arbitrary) number to model the fact that libcalls are used to
implement the conversion.
This also simplifies the code by only reporting costs for the scalar
fp16 conversion; vectorized costs being left to the fallback assuming
scalarization.
This is a follow-up to assertion issues reported for the changes in
#113195
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index bae223243b3dc9..520284d1d7a488 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3068,6 +3068,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (auto KindCost = Entry->Cost[CostKind])
return *KindCost;
}
+
+ if ((ISD == ISD::FP_ROUND && SimpleDstTy == MVT::f16) ||
+ (ISD == ISD::FP_EXTEND && SimpleSrcTy == MVT::f16)) {
+ // fp16 conversions not covered by any table entries require a libcall.
+ // Return a large (arbitrary) number to model this.
+ return InstructionCost(64);
+ }
}
// Fall back to legalized types.
@@ -3174,11 +3181,6 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
TTI::CastContextHint::None, CostKind);
}
- if (ISD == ISD::FP_ROUND && LTDest.second.getScalarType() == MVT::f16) {
- // Conversion requires a libcall.
- return InstructionCost::getInvalid();
- }
-
// TODO: Allow non-throughput costs that aren't binary.
auto AdjustCost = [&CostKind](InstructionCost Cost,
InstructionCost N = 1) -> InstructionCost {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
index bcea147d724f53..f23043f0c47f4a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
@@ -453,14 +453,9 @@ define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) {
;
; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16(
; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
-; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8
-; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8
-; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[S0]], align 4
-; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <8 x float> [[TMP1]] to <8 x half>
-; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[S8]], align 4
-; CHECK-F16C-NEXT: [[TMP4:%.*]] = fptrunc <8 x float> [[TMP3]] to <8 x half>
-; CHECK-F16C-NEXT: store <8 x half> [[TMP2]], ptr [[D0]], align 2
-; CHECK-F16C-NEXT: store <8 x half> [[TMP4]], ptr [[D8]], align 2
+; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4
+; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half>
+; CHECK-F16C-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2
; CHECK-F16C-NEXT: ret void
;
; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16(
More information about the llvm-commits
mailing list