[llvm] X86: Do not return invalid cost for fp16 conversion (PR #114128)

Tue Oct 29 13:33:12 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Matthias Braun (MatzeB)

<details>
<summary>Changes</summary>

Returning invalid instruction when converting from/to fp16 in `X86TTIImpl::getCastInstrCost` when there is no hardware support available was triggering asserts. This changes the code to return a large number instead.

---
Full diff: https://github.com/llvm/llvm-project/pull/114128.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+7-5) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll (+3-8) 


``````````diff

diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index bae223243b3dc9..ef16636a2ea544 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3068,6 +3068,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
         if (auto KindCost = Entry->Cost[CostKind])
           return *KindCost;
     }
+
+    if ((ISD == ISD::FP_ROUND && SimpleDstTy == MVT::f16) ||
+        (ISD == ISD::FP_EXTEND && SimpleSrcTy == MVT::f16)) {
+      // fp16 conversions not covered yet require a libcall, return a
+      // large (arbitrary) number.
+      return InstructionCost(64);
+    }
   }
 
   // Fall back to legalized types.
@@ -3174,11 +3181,6 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
                             TTI::CastContextHint::None, CostKind);
   }
 
-  if (ISD == ISD::FP_ROUND && LTDest.second.getScalarType() == MVT::f16) {
-    // Conversion requires a libcall.
-    return InstructionCost::getInvalid();
-  }
-
   // TODO: Allow non-throughput costs that aren't binary.
   auto AdjustCost = [&CostKind](InstructionCost Cost,
                                 InstructionCost N = 1) -> InstructionCost {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
index bcea147d724f53..f23043f0c47f4a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
@@ -453,14 +453,9 @@ define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) {
 ;
 ; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16(
 ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
-; CHECK-F16C-NEXT:    [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8
-; CHECK-F16C-NEXT:    [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8
-; CHECK-F16C-NEXT:    [[TMP1:%.*]] = load <8 x float>, ptr [[S0]], align 4
-; CHECK-F16C-NEXT:    [[TMP2:%.*]] = fptrunc <8 x float> [[TMP1]] to <8 x half>
-; CHECK-F16C-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr [[S8]], align 4
-; CHECK-F16C-NEXT:    [[TMP4:%.*]] = fptrunc <8 x float> [[TMP3]] to <8 x half>
-; CHECK-F16C-NEXT:    store <8 x half> [[TMP2]], ptr [[D0]], align 2
-; CHECK-F16C-NEXT:    store <8 x half> [[TMP4]], ptr [[D8]], align 2
+; CHECK-F16C-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4
+; CHECK-F16C-NEXT:    [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half>
+; CHECK-F16C-NEXT:    store <16 x half> [[TMP2]], ptr [[D0]], align 2
 ; CHECK-F16C-NEXT:    ret void
 ;
 ; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16(

``````````

</details>


https://github.com/llvm/llvm-project/pull/114128