[llvm] [SelectionDAG] Add `STRICT_BF16_TO_FP` and `STRICT_FP_TO_BF16` (PR #80056)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 2 15:36:55 PST 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/80056
>From a7db81478d6dba7db4f3ff90917133ea93aa0f50 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sat, 2 Mar 2024 18:36:38 -0500
Subject: [PATCH] [SelectionDAG] Add `STRICT_BF16_TO_FP` and
`STRICT_FP_TO_BF16`
This patch adds the support for `STRICT_BF16_TO_FP` and `STRICT_FP_TO_BF16`.
Fix #78540.
---
llvm/include/llvm/CodeGen/ISDOpcodes.h | 2 +
llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 2 +
llvm/include/llvm/IR/RuntimeLibcalls.def | 2 +
.../include/llvm/Target/TargetSelectionDAG.td | 13 ++
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 37 +++-
.../SelectionDAG/LegalizeFloatTypes.cpp | 41 ++--
.../SelectionDAG/LegalizeIntegerTypes.cpp | 1 +
.../SelectionDAG/SelectionDAGDumper.cpp | 2 +
llvm/lib/CodeGen/TargetLoweringBase.cpp | 5 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +-
llvm/test/CodeGen/X86/bfloat-constrained.ll | 180 ++++++++++++++++++
11 files changed, 270 insertions(+), 24 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/bfloat-constrained.ll
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index ad876c5db4509a..ef0fec270a43c4 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -921,6 +921,8 @@ enum NodeType {
/// has native conversions.
BF16_TO_FP,
FP_TO_BF16,
+ STRICT_BF16_TO_FP,
+ STRICT_FP_TO_BF16,
/// Perform various unary floating-point operations inspired by libm. For
/// FPOWI, the result is undefined if the integer operand doesn't fit into
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 3130f6c4dce598..d1015630b05d12 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -698,6 +698,8 @@ END_TWO_BYTE_PACK()
return false;
case ISD::STRICT_FP16_TO_FP:
case ISD::STRICT_FP_TO_FP16:
+ case ISD::STRICT_BF16_TO_FP:
+ case ISD::STRICT_FP_TO_BF16:
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 19dea60bebf9be..353342203a3919 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -304,6 +304,8 @@ HANDLE_LIBCALL(FEGETMODE, "fegetmode")
HANDLE_LIBCALL(FESETMODE, "fesetmode")
// Conversion
+HANDLE_LIBCALL(FPEXT_BF16_F32, "__extendbfsf2")
+HANDLE_LIBCALL(FPEXT_BF16_F64, "__extendbfdf2")
HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq")
HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 5f8bf0d448105d..d84c2d30e44726 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -541,6 +541,8 @@ def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
+def bf16_to_fp : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>;
+def fp_to_bf16 : SDNode<"ISD::FP_TO_BF16" , SDTFPToIntOp>;
def strict_fadd : SDNode<"ISD::STRICT_FADD",
SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
@@ -620,6 +622,11 @@ def strict_f16_to_fp : SDNode<"ISD::STRICT_FP16_TO_FP",
def strict_fp_to_f16 : SDNode<"ISD::STRICT_FP_TO_FP16",
SDTFPToIntOp, [SDNPHasChain]>;
+def strict_bf16_to_fp : SDNode<"ISD::STRICT_BF16_TO_FP",
+ SDTIntToFPOp, [SDNPHasChain]>;
+def strict_fp_to_bf16 : SDNode<"ISD::STRICT_FP_TO_BF16",
+ SDTFPToIntOp, [SDNPHasChain]>;
+
def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>;
def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>;
@@ -1591,6 +1598,12 @@ def any_f16_to_fp : PatFrags<(ops node:$src),
def any_fp_to_f16 : PatFrags<(ops node:$src),
[(fp_to_f16 node:$src),
(strict_fp_to_f16 node:$src)]>;
+def any_bf16_to_fp : PatFrags<(ops node:$src),
+ [(bf16_to_fp node:$src),
+ (strict_bf16_to_fp node:$src)]>;
+def any_fp_to_bf16 : PatFrags<(ops node:$src),
+ [(fp_to_bf16 node:$src),
+ (strict_fp_to_bf16 node:$src)]>;
multiclass binary_atomic_op_ord {
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f5b7752f7ecc8e..d3dbb9f224bce0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1047,6 +1047,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOperand(0).getValueType());
break;
case ISD::STRICT_FP_TO_FP16:
+ case ISD::STRICT_FP_TO_BF16:
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_LRINT:
@@ -3286,6 +3287,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Op);
break;
}
+ case ISD::STRICT_FP_TO_BF16:
+ // We don't support this expansion for now.
+ break;
case ISD::FP_TO_BF16: {
SDValue Op = Node->getOperand(0);
if (Op.getValueType() != MVT::f32)
@@ -3645,14 +3649,14 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
}
break;
+ case ISD::STRICT_BF16_TO_FP:
case ISD::STRICT_FP16_TO_FP:
if (Node->getValueType(0) != MVT::f32) {
// We can extend to types bigger than f32 in two steps without changing
// the result. Since "f16 -> f32" is much more commonly available, give
// CodeGen the option of emitting that before resorting to a libcall.
- SDValue Res =
- DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other},
- {Node->getOperand(0), Node->getOperand(1)});
+ SDValue Res = DAG.getNode(Node->getOpcode(), dl, {MVT::f32, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
{Node->getValueType(0), MVT::Other},
{Res.getValue(1), Res});
@@ -4651,6 +4655,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
}
break;
+ case ISD::STRICT_BF16_TO_FP:
+ if (Node->getValueType(0) == MVT::f32) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
+ DAG, RTLIB::FPEXT_BF16_F32, MVT::f32, Node->getOperand(1),
+ CallOptions, SDLoc(Node), Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ }
+ break;
case ISD::STRICT_FP16_TO_FP: {
if (Node->getValueType(0) == MVT::f32) {
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4792,12 +4806,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::STRICT_FP_EXTEND:
- case ISD::STRICT_FP_TO_FP16: {
- RTLIB::Libcall LC =
- Node->getOpcode() == ISD::STRICT_FP_TO_FP16
- ? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16)
- : RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
- Node->getValueType(0));
+ case ISD::STRICT_FP_TO_FP16:
+ case ISD::STRICT_FP_TO_BF16: {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (Node->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
+ else if (Node->getOpcode() == ISD::STRICT_FP_TO_BF16)
+ LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::bf16);
+ else
+ LC = RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
+ Node->getValueType(0));
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
TargetLowering::MakeLibCallOptions CallOptions;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f0a04589fbfdc2..3332c02ec72358 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -918,6 +918,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_TO_BF16:
+ case ISD::STRICT_FP_TO_BF16:
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::STRICT_FP_TO_SINT:
@@ -970,6 +971,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
N->getOpcode() == ISD::FP_TO_BF16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_BF16 ||
N->getOpcode() == ISD::STRICT_FP_ROUND);
bool IsStrict = N->isStrictFPOpcode();
@@ -980,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
if (N->getOpcode() == ISD::FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_TO_FP16)
FloatRVT = MVT::f16;
- else if (N->getOpcode() == ISD::FP_TO_BF16)
+ else if (N->getOpcode() == ISD::FP_TO_BF16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_BF16)
FloatRVT = MVT::bf16;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
@@ -2193,13 +2196,11 @@ static ISD::NodeType GetPromotionOpcodeStrict(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::f16)
return ISD::STRICT_FP_TO_FP16;
- if (OpVT == MVT::bf16) {
- // TODO: return ISD::STRICT_BF16_TO_FP;
- }
+ if (OpVT == MVT::bf16)
+ return ISD::STRICT_BF16_TO_FP;
- if (RetVT == MVT::bf16) {
- // TODO: return ISD::STRICT_FP_TO_BF16;
- }
+ if (RetVT == MVT::bf16)
+ return ISD::STRICT_FP_TO_BF16;
report_fatal_error("Attempt at an invalid promotion-related conversion");
}
@@ -2999,10 +3000,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
EVT SVT = N->getOperand(0).getValueType();
if (N->isStrictFPOpcode()) {
- assert(RVT == MVT::f16);
- SDValue Res =
- DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
- {N->getOperand(0), N->getOperand(1)});
+ // FIXME: assume we only have two f16 variants for now.
+ unsigned Opcode;
+ if (RVT == MVT::f16)
+ Opcode = ISD::STRICT_FP_TO_FP16;
+ else if (RVT == MVT::bf16)
+ Opcode = ISD::STRICT_FP_TO_BF16;
+ else
+ llvm_unreachable("unknown half type");
+ SDValue Res = DAG.getNode(Opcode, SDLoc(N), {MVT::i16, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
@@ -3192,10 +3199,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
if (IsStrict) {
- assert(SVT == MVT::f16);
+ unsigned Opcode;
+ if (SVT == MVT::f16)
+ Opcode = ISD::STRICT_FP16_TO_FP;
+ else if (SVT == MVT::bf16)
+ Opcode = ISD::STRICT_BF16_TO_FP;
+ else
+ llvm_unreachable("unknown half type");
SDValue Res =
- DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
- {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
+ DAG.getNode(Opcode, SDLoc(N), {N->getValueType(0), MVT::Other},
+ {N->getOperand(0), Op});
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
ReplaceValueWith(SDValue(N, 0), Res);
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 6e55acd22bb37e..909c669abd120b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -165,6 +165,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_FP16:
Res = PromoteIntRes_FP_TO_FP16_BF16(N);
break;
+ case ISD::STRICT_FP_TO_BF16:
case ISD::STRICT_FP_TO_FP16:
Res = PromoteIntRes_STRICT_FP_TO_FP16_BF16(N);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 0fbd999694f104..18ca17e53dac38 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -380,7 +380,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
case ISD::BF16_TO_FP: return "bf16_to_fp";
+ case ISD::STRICT_BF16_TO_FP: return "strict_bf16_to_fp";
case ISD::FP_TO_BF16: return "fp_to_bf16";
+ case ISD::STRICT_FP_TO_BF16: return "strict_fp_to_bf16";
case ISD::LROUND: return "lround";
case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 646c0c345e54e0..99acfd3bafa661 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -307,6 +307,11 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
} else if (OpVT == MVT::f80) {
if (RetVT == MVT::f128)
return FPEXT_F80_F128;
+ } else if (OpVT == MVT::bf16) {
+ if (RetVT == MVT::f32)
+ return FPEXT_BF16_F32;
+ if (RetVT == MVT::f64)
+ return FPEXT_BF16_F64;
}
return UNKNOWN_LIBCALL;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6de0ae8a206482..e3323d918c700e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -424,7 +424,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
- ISD::STRICT_FP_TO_FP16}) {
+ ISD::STRICT_FP_TO_FP16, ISD::STRICT_FP_TO_BF16}) {
// Special handling for half-precision floating point conversions.
// If we don't have F16C support, then lower half float conversions
// into library calls.
@@ -437,6 +437,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(Op, MVT::f128, Expand);
}
+ // FIXME: If a target has F16C, it needs to be Custom.
+ setOperationAction(ISD::STRICT_FP_TO_BF16, MVT::f32, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_BF16, MVT::f64, Expand);
+
+ setOperationAction(ISD::STRICT_BF16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::STRICT_BF16_TO_FP, MVT::f64, Expand);
+
for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
diff --git a/llvm/test/CodeGen/X86/bfloat-constrained.ll b/llvm/test/CodeGen/X86/bfloat-constrained.ll
new file mode 100644
index 00000000000000..7ff86e979f1852
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bfloat-constrained.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefix=X64
+
+ at a = global bfloat 0xR0000, align 2
+ at b = global bfloat 0xR0000, align 2
+ at c = global bfloat 0xR0000, align 2
+
+define float @bfloat_to_float() strictfp {
+; X32-LABEL: bfloat_to_float:
+; X32: # %bb.0:
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: movzwl a, %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __extendbfsf2
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X64-LABEL: bfloat_to_float:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: movq a at GOTPCREL(%rip), %rax
+; X64-NEXT: movzwl (%rax), %edi
+; X64-NEXT: callq __extendbfsf2 at PLT
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %1 = load bfloat, ptr @a, align 2
+ %2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
+ ret float %2
+}
+
+define double @bfloat_to_double() strictfp {
+; X32-LABEL: bfloat_to_double:
+; X32: # %bb.0:
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: movzwl a, %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __extendbfsf2
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X64-LABEL: bfloat_to_double:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: movq a at GOTPCREL(%rip), %rax
+; X64-NEXT: movzwl (%rax), %edi
+; X64-NEXT: callq __extendbfsf2 at PLT
+; X64-NEXT: cvtss2sd %xmm0, %xmm0
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %1 = load bfloat, ptr @a, align 2
+ %2 = tail call double @llvm.experimental.constrained.fpext.f64.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
+ ret double %2
+}
+
+define void @float_to_bfloat(float %0) strictfp {
+; X32-LABEL: float_to_bfloat:
+; X32: # %bb.0:
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: fstps (%esp)
+; X32-NEXT: wait
+; X32-NEXT: calll __truncsfbf2
+; X32-NEXT: movw %ax, a
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X64-LABEL: float_to_bfloat:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: callq __truncsfbf2 at PLT
+; X64-NEXT: movq a at GOTPCREL(%rip), %rcx
+; X64-NEXT: movw %ax, (%rcx)
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ store bfloat %2, ptr @a, align 2
+ ret void
+}
+
+define void @double_to_bfloat(double %0) strictfp {
+; X32-LABEL: double_to_bfloat:
+; X32: # %bb.0:
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: fldl {{[0-9]+}}(%esp)
+; X32-NEXT: fstpl (%esp)
+; X32-NEXT: wait
+; X32-NEXT: calll __truncdfbf2
+; X32-NEXT: movw %ax, a
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X64-LABEL: double_to_bfloat:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: callq __truncdfbf2 at PLT
+; X64-NEXT: movq a at GOTPCREL(%rip), %rcx
+; X64-NEXT: movw %ax, (%rcx)
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ store bfloat %2, ptr @a, align 2
+ ret void
+}
+
+define void @add() strictfp {
+; X32-LABEL: add:
+; X32: # %bb.0:
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: movzwl a, %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __extendbfsf2
+; X32-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X32-NEXT: wait
+; X32-NEXT: movzwl b, %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __extendbfsf2
+; X32-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X32-NEXT: faddp %st, %st(1)
+; X32-NEXT: fstps (%esp)
+; X32-NEXT: wait
+; X32-NEXT: calll __truncsfbf2
+; X32-NEXT: movw %ax, c
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X64-LABEL: add:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: movq a at GOTPCREL(%rip), %rax
+; X64-NEXT: movzwl (%rax), %edi
+; X64-NEXT: callq __extendbfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: movq b at GOTPCREL(%rip), %rax
+; X64-NEXT: movzwl (%rax), %edi
+; X64-NEXT: callq __extendbfsf2 at PLT
+; X64-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT: callq __truncsfbf2 at PLT
+; X64-NEXT: movq c at GOTPCREL(%rip), %rcx
+; X64-NEXT: movw %ax, (%rcx)
+; X64-NEXT: popq %rax
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+ %1 = load bfloat, ptr @a, align 2
+ %2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
+ %3 = load bfloat, ptr @b, align 2
+ %4 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %3, metadata !"fpexcept.strict") #0
+ %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ %6 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ store bfloat %6, ptr @c, align 2
+ ret void
+}
+
+declare float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.bfloat(bfloat, metadata)
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.fptrunc.bfloat.f64(double, metadata, metadata)
+
+attributes #0 = { strictfp }
+
More information about the llvm-commits
mailing list