[llvm] a636928 - [SelectionDAG] Add expansion for llvm.convert.from.arbitrary.fp (#179318)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 01:40:53 PST 2026
Author: Dmitry Sidorov
Date: 2026-03-04T10:40:47+01:00
New Revision: a636928bb4db9d78e32af0a27e048687506af841
URL: https://github.com/llvm/llvm-project/commit/a636928bb4db9d78e32af0a27e048687506af841
DIFF: https://github.com/llvm/llvm-project/commit/a636928bb4db9d78e32af0a27e048687506af841.diff
LOG: [SelectionDAG] Add expansion for llvm.convert.from.arbitrary.fp (#179318)
The expansion converts arbitrary-precision FP represented as integer
following these algorithm:
1. Extract sign, exponent, and mantissa bit fields via masks and shifts.
2. Classify the input (zero, denormal, normal, Inf, NaN) using the
exponent and mantissa fields.
3. Normal path: adjusting the exponent bias and left-shifting the
mantissa to fit the wider destination format.
4. Denormal path: normalizing by finding the MSB position of the
mantissa (via count-leading-zeros), computing the correct exponent from
that position, stripping the implicit leading 1, and shifting the
fraction into the destination mantissa field.
5. Assemble the destination IEEE bit pattern (sign | exponent |
mantissa) and select among the normal, denormal, and special-value
results.
Currently only conversions from OCP floats are covered, in LLVM terms
these are: Float8E5M2, Float8E4M3FN, Float6E3M2FN, Float6E2M3FN,
Float4E2M1FN.
OCP spec:
https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
AI has assisted in X86 E2E testing.
Added:
llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll
llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll
llvm/test/CodeGen/X86/arbitrary-fp-convert-error.ll
llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll
Modified:
llvm/include/llvm/ADT/APFloat.h
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Support/APFloat.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index b9b942bdc7968..f1645729c115c 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -412,6 +412,10 @@ class APFloatBase {
/// format interpretation for llvm.convert.to.arbitrary.fp and
/// llvm.convert.from.arbitrary.fp intrinsics.
LLVM_ABI static bool isValidArbitraryFPFormat(StringRef Format);
+
+ /// Returns the fltSemantics for a given arbitrary FP format string,
+ /// or nullptr if invalid.
+ LLVM_ABI static const fltSemantics *getArbitraryFPSemantics(StringRef Format);
};
namespace detail {
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index b8c6788e0bc03..a846aad90bc2b 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1014,6 +1014,12 @@ enum NodeType {
STRICT_BF16_TO_FP,
STRICT_FP_TO_BF16,
+ /// CONVERT_FROM_ARBITRARY_FP - This operator converts from an arbitrary
+ /// floating-point represented as an integer to a native FP type.
+ /// The first operand is the integer containing the source FP bits.
+ /// The second operand is a constant indicating the source FP semantics.
+ CONVERT_FROM_ARBITRARY_FP,
+
/// Perform various unary floating-point operations inspired by libm. For
/// FPOWI, the result is undefined if the integer operand doesn't fit into
/// sizeof(int).
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index eb20e7982a102..a7aefc64bd500 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3528,6 +3528,243 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Op);
break;
}
+ case ISD::CONVERT_FROM_ARBITRARY_FP: {
+ // Expand conversion from arbitrary FP format stored in an integer to a
+ // native IEEE float type using integer bit manipulation.
+ //
+ // TODO: currently only conversions from FP4, FP6 and FP8 formats from OCP
+ // specification are expanded. Remaining arbitrary FP types: Float8E4M3,
+ // Float8E3M4, Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ,
+ // Float8E8M0FNU.
+ EVT DstVT = Node->getValueType(0);
+
+ SDValue IntVal = Node->getOperand(0);
+ const uint64_t SemEnum = Node->getConstantOperandVal(1);
+ const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
+
+ // Supported source formats.
+ switch (Sem) {
+ case APFloatBase::S_Float8E5M2:
+ case APFloatBase::S_Float8E4M3FN:
+ case APFloatBase::S_Float6E3M2FN:
+ case APFloatBase::S_Float6E2M3FN:
+ case APFloatBase::S_Float4E2M1FN:
+ break;
+ default:
+ DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented "
+ "source format (semantics enum " +
+ Twine(SemEnum) + ")");
+ Results.push_back(DAG.getPOISON(DstVT));
+ break;
+ }
+ if (!Results.empty())
+ break;
+
+ const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem);
+
+ const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
+ const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
+ const unsigned SrcMant = SrcPrecision - 1;
+ const unsigned SrcExp = SrcBits - SrcMant - 1;
+ const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
+
+ const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
+ const fltNanEncoding NanEnc = SrcSem.nanEncoding;
+
+ // Destination format parameters.
+ const fltSemantics &DstSem = DstVT.getFltSemantics();
+
+ const unsigned DstBits = APFloat::getSizeInBits(DstSem);
+ const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
+ const unsigned DstExpBits = DstBits - DstMant - 1;
+ const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
+ const int DstBias = 1 - DstMinExp;
+ const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
+
+ // Work in an integer type matching the destination float width.
+ // Use zero-extend to preserve the raw bit-pattern.
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), DstBits);
+ SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT);
+
+ EVT SetCCVT = getSetCCResultType(IntVT);
+
+ SDValue Zero = DAG.getConstant(0, dl, IntVT);
+ SDValue One = DAG.getConstant(1, dl, IntVT);
+
+ // Extract bit fields.
+ const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
+ const uint64_t ExpMask = (1ULL << SrcExp) - 1;
+
+ SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src,
+ DAG.getConstant(MantMask, dl, IntVT));
+
+ SDValue ExpField =
+ DAG.getNode(ISD::AND, dl, IntVT,
+ DAG.getNode(ISD::SRL, dl, IntVT, Src,
+ DAG.getShiftAmountConstant(SrcMant, IntVT, dl)),
+ DAG.getConstant(ExpMask, dl, IntVT));
+
+ SDValue SignBit =
+ DAG.getNode(ISD::SRL, dl, IntVT, Src,
+ DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
+
+ // Precompute sign shifted to MSB of destination.
+ SDValue SignShifted =
+ DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
+ DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
+
+ // Classify the input value based on compile-time format properties.
+ SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT);
+ SDValue IsExpAllOnes =
+ DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ);
+ SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ);
+ SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ);
+ SDValue IsMantNonZero =
+ DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE);
+
+ // NaN detection.
+ SDValue IsNaN;
+ if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
+ // FiniteOnly formats (E2M1FN, E3M2FN, E2M3FN) never produce NaN.
+ IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
+ } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
+ // E5M2 produces NaN when exp == all-ones AND mantissa != 0.
+ IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero);
+ } else {
+ // NanOnly + AllOnes (E4M3FN): NaN when all exp and mantissa bits are 1.
+ assert(NanEnc == fltNanEncoding::AllOnes);
+ SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT);
+ SDValue IsMantAllOnes =
+ DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ);
+ IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes);
+ }
+
+ // Inf detection.
+ SDValue IsInf;
+ if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
+ // E5M2: Inf when exp == all-ones AND mantissa == 0.
+ IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero);
+ } else {
+ // NanOnly and FiniteOnly formats have no Inf representation.
+ IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
+ }
+
+ // Zero detection.
+ SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero);
+
+ // Denorm detection: exp == 0 AND mant != 0.
+ SDValue IsDenorm =
+ DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero);
+
+ // Normal value conversion.
+ // dst_exp = exp_field + (DstBias - SrcBias)
+ // dst_mant = mant << (DstMant - SrcMant)
+ const int BiasAdjust = DstBias - SrcBias;
+ SDValue NormDstExp = DAG.getNode(
+ ISD::ADD, dl, IntVT, ExpField,
+ DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT));
+
+ SDValue NormDstMant;
+ if (DstMant > SrcMant) {
+ SDValue NormDstMantShift =
+ DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
+ NormDstMant =
+ DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift);
+ } else {
+ NormDstMant = MantField;
+ }
+
+ // Assemble normal result.
+ SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl);
+ SDValue NormExpShifted =
+ DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift);
+ SDValue NormResult = DAG.getNode(
+ ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted),
+ NormDstMant);
+
+ // Denormal value conversion.
+ // For a denormal source (exp_field == 0, mant != 0), normalize by finding
+ // the MSB position of mant using CTLZ, then compute the correct
+ // exponent and mantissa for the destination format.
+ SDValue DenormResult;
+ {
+ const unsigned IntVTBits = DstBits;
+ SDValue LeadingZeros =
+ DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, IntVT, MantField);
+
+ // dst_exp_denorm = (IntVTBits + DstBias - SrcBias - SrcMant) -
+ // LeadingZeros
+ const int DenormExpConst =
+ (int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
+ SDValue DenormDstExp = DAG.getNode(
+ ISD::SUB, dl, IntVT,
+ DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT),
+ LeadingZeros);
+
+ // MSB position of the mantissa (0-indexed from LSB).
+ SDValue MantMSB =
+ DAG.getNode(ISD::SUB, dl, IntVT,
+ DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros);
+
+ // leading_one = 1 << MantMSB
+ SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB);
+
+ // frac = mant XOR leading_one (strip the implicit 1)
+ SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne);
+
+ // shift_amount = DstMant - MantMSB
+ // = DstMant - (IntVTBits - 1 - LeadingZeros)
+ // = LeadingZeros - (IntVTBits - 1 - DstMant)
+ const unsigned ShiftSub = IntVTBits - 1 - DstMant; // always >= 0
+ SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros,
+ DAG.getConstant(ShiftSub, dl, IntVT));
+
+ SDValue DenormDstMant =
+ DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount);
+
+ // Assemble denorm as sign | (denorm_dst_exp << DstMant) | denorm_dst_mant
+ SDValue DenormExpShifted =
+ DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift);
+ DenormResult = DAG.getNode(
+ ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
+ DenormDstMant);
+ }
+
+ // Select between normal and denorm paths.
+ SDValue FiniteResult =
+ DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult);
+
+ // Build special-value results.
+ // NaN -> canonical quiet NaN: sign=0, exp=all-ones, qNaN bit set.
+ // Encoding: (DstExpAllOnes << DstMant) | (1 << (DstMant - 1))
+ const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
+ SDValue NaNResult =
+ DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT);
+
+ // Inf -> destination Inf.
+ // sign | (DstExpAllOnes << DstMant)
+ SDValue InfResult =
+ DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
+ DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT));
+
+ // Zero -> signed zero.
+ // Sign bit only.
+ SDValue ZeroResult = SignShifted;
+
+ // Final selection goes in order: NaN takes priority, then Inf, then Zero.
+ SDValue Result = FiniteResult;
+ Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
+ Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
+ Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
+
+ // Bitcast integer result to destination float type.
+ Result = DAG.getNode(ISD::BITCAST, dl, DstVT, Result);
+
+ Results.push_back(Result);
+ break;
+ }
case ISD::FCANONICALIZE: {
// This implements llvm.canonicalize.f* by multiplication with 1.0, as
// suggested in
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index bc04a198bb85b..25f4f75eaedea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2768,6 +2768,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break;
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
+ R = SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(N);
+ break;
case ISD::POISON:
case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
@@ -3055,6 +3058,19 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
+SDValue
+DAGTypeLegalizer::SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDLoc dl(N);
+
+ SDValue Res = DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, dl, NVT,
+ N->getOperand(0), N->getOperand(1));
+
+ // Round the value to the softened type.
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(MVT::i16);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0d5cba405d6e3..85eb59e5449e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2076,6 +2076,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::FP16_TO_FP:
case ISD::VP_UINT_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
+ Res = PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(N);
+ break;
case ISD::STRICT_FP16_TO_FP:
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
@@ -2685,6 +2688,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
ZExtPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0)),
+ N->getOperand(1)),
+ 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
ZExtPromotedInteger(N->getOperand(1))), 0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index da592e3cad0f5..a8ffb66a9d911 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -397,6 +397,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_VP_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
@@ -787,6 +788,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftPromoteHalfRes_FNEG(SDNode *N);
SDValue SoftPromoteHalfRes_AssertNoFPClass(SDNode *N);
SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N);
+ SDValue SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N);
SDValue SoftPromoteHalfRes_UNDEF(SDNode *N);
SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N);
SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N);
@@ -838,6 +840,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N);
SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ab1e2a3398ce9..c00fbe79c6d64 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -460,6 +460,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
case ISD::FCANONICALIZE:
case ISD::FFREXP:
case ISD::FMODF:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index aeb9d4d7bdc1d..564bf3b7f152e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -62,6 +62,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
+ R = ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(N);
+ break;
case ISD::AssertZext:
case ISD::AssertSext:
case ISD::FPOWI:
@@ -478,6 +481,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
N->getOperand(1));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ // The result needs scalarizing, but it's not a given that the source does.
+ // See similar logic in ScalarizeVecRes_UnaryOp.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
+ }
+ return DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, DL,
+ N->getValueType(0).getVectorElementType(), Op,
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
@@ -818,6 +838,7 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
Res = ScalarizeVecOp_UnaryOpWithExtraInput(N);
break;
case ISD::STRICT_SINT_TO_FP:
@@ -1382,6 +1403,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_UINT_TO_FP:
case ISD::FCANONICALIZE:
case ISD::AssertNoFPClass:
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
case ISD::ADDRSPACECAST:
@@ -2783,7 +2805,8 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
const SDNodeFlags Flags = N->getFlags();
unsigned Opcode = N->getOpcode();
if (N->getNumOperands() <= 2) {
- if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass) {
+ if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass ||
+ Opcode == ISD::CONVERT_FROM_ARBITRARY_FP) {
Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags);
Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags);
} else {
@@ -3596,7 +3619,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::STRICT_FP_ROUND:
case ISD::VP_FP_ROUND:
- case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FP_ROUND:
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
+ Res = SplitVecOp_FP_ROUND(N);
+ break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -4732,8 +4758,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo);
Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi);
} else {
- Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
- Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+ Lo = DAG.getNode(N->getOpcode(), DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), DL, OutVT, Hi, N->getOperand(1));
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
@@ -5142,6 +5168,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_UINT_TO_FP:
case ISD::ZERO_EXTEND:
case ISD::VP_ZERO_EXTEND:
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
Res = WidenVecRes_Convert(N);
break;
@@ -7278,6 +7305,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::TRUNCATE:
+ case ISD::CONVERT_FROM_ARBITRARY_FP:
Res = WidenVecOp_Convert(N);
break;
@@ -7499,7 +7527,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
} else {
- if (Opcode == ISD::FP_ROUND)
+ if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP)
Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1));
else
Res = DAG.getNode(Opcode, dl, WideVT, InOp);
@@ -7523,9 +7551,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
- for (unsigned i = 0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(Opcode, dl, EltVT,
- DAG.getExtractVectorElt(dl, InEltVT, InOp, i));
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Elt = DAG.getExtractVectorElt(dl, InEltVT, InOp, i);
+ if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt, N->getOperand(1));
+ else
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt);
+ }
}
return DAG.getBuildVector(VT, dl, Ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 392e53b99c64e..3e6fdd7bbf9fe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7148,6 +7148,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getValueType(VT.getScalarType())));
return;
}
+ case Intrinsic::convert_from_arbitrary_fp: {
+ // Extract format metadata and convert to semantics enum.
+ EVT DstVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
+ StringRef FormatStr = cast<MDString>(MD)->getString();
+ const fltSemantics *SrcSem =
+ APFloatBase::getArbitraryFPSemantics(FormatStr);
+ if (!SrcSem) {
+ DAG.getContext()->emitError(
+ "convert_from_arbitrary_fp: not implemented format '" + FormatStr +
+ "'");
+ setValue(&I, DAG.getPOISON(DstVT));
+ return;
+ }
+ APFloatBase::Semantics SemEnum = APFloatBase::SemanticsToEnum(*SrcSem);
+
+ SDValue IntVal = getValue(I.getArgOperand(0));
+
+ // Emit ISD::CONVERT_FROM_ARBITRARY_FP node.
+ SDValue SemConst =
+ DAG.getTargetConstant(static_cast<int>(SemEnum), sdl, MVT::i32);
+ setValue(&I, DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, sdl, DstVT, IntVal,
+ SemConst));
+ return;
+ }
case Intrinsic::set_rounding:
Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
{getRoot(), getValue(I.getArgOperand(0))});
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9453036455727..571830cc57b52 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -435,6 +435,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_BF16_TO_FP: return "strict_bf16_to_fp";
case ISD::FP_TO_BF16: return "fp_to_bf16";
case ISD::STRICT_FP_TO_BF16: return "strict_fp_to_bf16";
+ case ISD::CONVERT_FROM_ARBITRARY_FP: return "convert_from_arbitrary_fp";
case ISD::LROUND: return "lround";
case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 7ab6d82c5ccda..684ba3161f48c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1149,7 +1149,7 @@ void TargetLoweringBase::initActions() {
ISD::FASIN, ISD::FATAN,
ISD::FCOSH, ISD::FSINH,
ISD::FTANH, ISD::FATAN2,
- ISD::FMULADD},
+ ISD::FMULADD, ISD::CONVERT_FROM_ARBITRARY_FP},
VT, Expand);
// Overflow operations default to expand
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 6439e140a4d30..f72b3c56fabdc 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
@@ -6085,6 +6086,18 @@ bool APFloatBase::isValidArbitraryFPFormat(StringRef Format) {
return llvm::is_contained(ValidFormats, Format);
}
+const fltSemantics *APFloatBase::getArbitraryFPSemantics(StringRef Format) {
+ // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
+ // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
+ return StringSwitch<const fltSemantics *>(Format)
+ .Case("Float8E5M2", &semFloat8E5M2)
+ .Case("Float8E4M3FN", &semFloat8E4M3FN)
+ .Case("Float4E2M1FN", &semFloat4E2M1FN)
+ .Case("Float6E3M2FN", &semFloat6E3M2FN)
+ .Case("Float6E2M3FN", &semFloat6E2M3FN)
+ .Default(nullptr);
+}
+
APFloat::Storage::~Storage() {
if (usesLayout<IEEEFloat>(*semantics)) {
IEEE.~IEEEFloat();
diff --git a/llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll b/llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll
new file mode 100644
index 0000000000000..e7bb8825fec05
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll
@@ -0,0 +1,646 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck %s
+
+; Test llvm.convert.from.arbitrary intrinsic expansion.
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata)
+declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata)
+declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata)
+
+declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata)
+declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata)
+
+; Float8E5M2
+; Layout: sign(1) exp(5) mant(2), bias=15
+; Supports: Inf, NaN, signed zero, denormals
+
+; Float8E5M2 normal: 0_01111_00 = 1.0
+define float @from_f8e5m2_normal() {
+; CHECK-LABEL: from_f8e5m2_normal:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 zero: 0_00000_00 = +0.0
+define float @from_f8e5m2_zero() {
+; CHECK-LABEL: from_f8e5m2_zero:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 negative zero: 1_00000_00 = -0.0
+define float @from_f8e5m2_neg_zero() {
+; CHECK-LABEL: from_f8e5m2_neg_zero:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_bfrev_b32_e32 v0, 1
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 denorm: 0_00000_01 = 2^(-16)
+define float @from_f8e5m2_denorm() {
+; CHECK-LABEL: from_f8e5m2_denorm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x37800000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 +Inf: 0_11111_00
+define float @from_f8e5m2_inf() {
+; CHECK-LABEL: from_f8e5m2_inf:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 NaN: 0_11111_01
+define float @from_f8e5m2_nan() {
+; CHECK-LABEL: from_f8e5m2_nan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 max: 0_11110_11 = 57344
+define float @from_f8e5m2_max() {
+; CHECK-LABEL: from_f8e5m2_max:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x47600000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 negative: 1_01111_00 = -1.0
+define float @from_f8e5m2_neg() {
+; CHECK-LABEL: from_f8e5m2_neg:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, -1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 runtime arg test
+define float @from_f8e5m2_dynamic(i8 %x) {
+; CHECK-LABEL: from_f8e5m2_dynamic:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; CHECK-NEXT: v_and_b32_e32 v1, 3, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v3, 24, v0
+; CHECK-NEXT: v_bfe_u32 v0, v0, 2, 5
+; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v1
+; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
+; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
+; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
+; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
+; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
+; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
+; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
+; CHECK-NEXT: v_sub_u32_e32 v4, 0x8e, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; CHECK-NEXT: v_add_u32_e32 v2, 0x38000000, v2
+; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
+; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc
+; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5]
+; CHECK-NEXT: v_or_b32_e32 v4, v0, v1
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
+; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 31, v0
+; CHECK-NEXT: v_or_b32_e32 v0, 0x7f800000, v3
+; CHECK-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
+; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
+; CHECK-NEXT: v_mov_b32_e32 v1, 0x7fc00000
+; CHECK-NEXT: s_and_b64 vcc, s[6:7], vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E4M3FN (NanOnly, NanEncoding=AllOnes)
+; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7
+; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite.
+
+; Float8E4M3FN normal: 0_0111_000 = 1.0
+define float @from_f8e4m3fn_normal() {
+; CHECK-LABEL: from_f8e4m3fn_normal:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN NaN: 0_1111_111
+define float @from_f8e4m3fn_nan() {
+; CHECK-LABEL: from_f8e4m3fn_nan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN not-NaN: 0_1111_110 = 448
+; Despite exp=all-ones, this is a valid finite number (max value)
+define float @from_f8e4m3fn_max() {
+; CHECK-LABEL: from_f8e4m3fn_max:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x43e00000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN not-NaN: 0_1111_101 = 416
+; exp=all-ones but mant!=all-ones so this is finite
+define float @from_f8e4m3fn_not_nan() {
+; CHECK-LABEL: from_f8e4m3fn_not_nan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x43d00000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN zero: 0_0000_000 = +0.0
+define float @from_f8e4m3fn_zero() {
+; CHECK-LABEL: from_f8e4m3fn_zero:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN denorm: 0_0000_001 = 2^(-9)
+define float @from_f8e4m3fn_denorm() {
+; CHECK-LABEL: from_f8e4m3fn_denorm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x3b000000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN runtime arg test
+define float @from_f8e4m3fn_dynamic(i8 %x) {
+; CHECK-LABEL: from_f8e4m3fn_dynamic:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; CHECK-NEXT: v_and_b32_e32 v1, 7, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v3, 24, v0
+; CHECK-NEXT: v_bfe_u32 v0, v0, 3, 4
+; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v1
+; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
+; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
+; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
+; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
+; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
+; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
+; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
+; CHECK-NEXT: v_sub_u32_e32 v4, 0x95, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; CHECK-NEXT: v_add_u32_e32 v2, 0x3c000000, v2
+; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
+; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CHECK-NEXT: v_or_b32_e32 v4, v0, v1
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 15, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000
+; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float6E3M2FN (FiniteOnly)
+; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4
+; No Inf, no NaN. All bit patterns are finite.
+
+; Float6E3M2FN normal: 0_011_00 = 1.0
+define float @from_f6e3m2fn_normal() {
+; CHECK-LABEL: from_f6e3m2fn_normal:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN max: 0_111_11 = 28.0
+define float @from_f6e3m2fn_max() {
+; CHECK-LABEL: from_f6e3m2fn_max:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x41e00000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN denorm: 0_000_01 = 0.0625
+define float @from_f6e3m2fn_denorm() {
+; CHECK-LABEL: from_f6e3m2fn_denorm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x3d800000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN zero: 0_000_00 = +0.0
+define float @from_f6e3m2fn_zero() {
+; CHECK-LABEL: from_f6e3m2fn_zero:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN negative: 1_011_00 = -1.0
+define float @from_f6e3m2fn_neg() {
+; CHECK-LABEL: from_f6e3m2fn_neg:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, -1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN runtime arg test
+define float @from_f6e3m2fn_dynamic(i6 %x) {
+; CHECK-LABEL: from_f6e3m2fn_dynamic:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; CHECK-NEXT: v_and_b32_e32 v1, 3, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v3, 26, v0
+; CHECK-NEXT: v_bfe_u32 v0, v0, 2, 3
+; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v1
+; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
+; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
+; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
+; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
+; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
+; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
+; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
+; CHECK-NEXT: v_sub_u32_e32 v4, 0x9a, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; CHECK-NEXT: v_add_u32_e32 v2, 0x3e000000, v2
+; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
+; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
+; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E2M3FN (FiniteOnly)
+; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2
+; No Inf, no NaN. All bit patterns are finite.
+
+; Float6E2M3FN normal: 0_01_000 = 1.0
+define float @from_f6e2m3fn_normal() {
+; CHECK-LABEL: from_f6e2m3fn_normal:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN max: 0_11_111 = 7.5
+define float @from_f6e2m3fn_max() {
+; CHECK-LABEL: from_f6e2m3fn_max:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x40f00000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN denorm: 0_00_001 = 0.125
+define float @from_f6e2m3fn_denorm() {
+; CHECK-LABEL: from_f6e2m3fn_denorm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x3e000000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN zero: 0_00_000 = +0.0
+define float @from_f6e2m3fn_zero() {
+; CHECK-LABEL: from_f6e2m3fn_zero:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN runtime arg test
+define float @from_f6e2m3fn_dynamic(i6 %x) {
+; CHECK-LABEL: from_f6e2m3fn_dynamic:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; CHECK-NEXT: v_and_b32_e32 v1, 7, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v3, 26, v0
+; CHECK-NEXT: v_bfe_u32 v0, v0, 3, 2
+; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v1
+; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0
+; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2
+; CHECK-NEXT: v_ffbh_u32_e32 v4, v1
+; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4
+; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1
+; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5
+; CHECK-NEXT: v_add_u32_e32 v6, -8, v4
+; CHECK-NEXT: v_sub_u32_e32 v4, 0x9b, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; CHECK-NEXT: v_add_u32_e32 v2, 0.5, v2
+; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5
+; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc
+; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float4E2M1FN (FiniteOnly)
+; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2
+; No Inf, no NaN.
+
+; Float4E2M1FN normal: 0_01_0 = 1.0
+define float @from_f4e2m1fn_normal() {
+; CHECK-LABEL: from_f4e2m1fn_normal:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN denorm: 0_00_1 = 0.5
+define float @from_f4e2m1fn_denorm() {
+; CHECK-LABEL: from_f4e2m1fn_denorm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0.5
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN max: 0_11_1 = 6.0
+define float @from_f4e2m1fn_max() {
+; CHECK-LABEL: from_f4e2m1fn_max:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x40c00000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN runtime arg test
+define float @from_f4e2m1fn_dynamic(i4 %x) {
+; CHECK-LABEL: from_f4e2m1fn_dynamic:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v0
+; CHECK-NEXT: v_and_b32_e32 v2, 1, v1
+; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v1
+; CHECK-NEXT: v_bfe_u32 v1, v1, 1, 2
+; CHECK-NEXT: v_lshlrev_b32_e32 v3, 22, v2
+; CHECK-NEXT: v_and_b32_e32 v4, 0x80000000, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v5, 23, v1
+; CHECK-NEXT: v_or3_b32 v3, v5, v4, v3
+; CHECK-NEXT: v_ffbh_u32_e32 v5, v2
+; CHECK-NEXT: v_sub_u32_e32 v6, 31, v5
+; CHECK-NEXT: v_lshlrev_b32_e64 v6, v6, 1
+; CHECK-NEXT: v_xor_b32_e32 v6, v2, v6
+; CHECK-NEXT: v_add_u32_e32 v7, -8, v5
+; CHECK-NEXT: v_sub_u32_e32 v5, 0x9d, v5
+; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v6, v7, v6
+; CHECK-NEXT: v_lshlrev_b32_e32 v5, 23, v5
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
+; CHECK-NEXT: v_add_u32_e32 v3, 0.5, v3
+; CHECK-NEXT: v_or3_b32 v5, v4, v5, v6
+; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT: v_or_b32_e32 v1, v1, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float8E5M2 to f16: 1.0
+define half @from_f8e5m2_to_f16() {
+; CHECK-LABEL: from_f8e5m2_to_f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2")
+ ret half %r
+}
+
+; Float8E5M2 to f64: 1.0
+define double @from_f8e5m2_to_f64() {
+; CHECK-LABEL: from_f8e5m2_to_f64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2")
+ ret double %r
+}
+
+; Vector test: Float4E2M1FN <4 x i4> -> <4 x float>
+define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) {
+; CHECK-LABEL: fp4_to_f32_vec:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
+; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
+; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
+; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
+; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
+; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
+; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
+; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
+; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
+; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
+; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
+; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
+; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
+; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
+; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
+; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
+; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
+; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
+; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
+; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
+; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
+; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
+; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
+; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
+; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
+; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
+; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
+; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
+; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
+; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
+; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v8, vcc
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
+; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
+; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
+; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
+; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
+; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
+; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
+; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
+; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
+; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
+; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
+; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
+; CHECK-NEXT: v_and_b32_e32 v2, 1, v2
+; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2
+; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
+; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
+; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v8, vcc
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v3
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
+; CHECK-NEXT: v_and_b32_e32 v5, 1, v4
+; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4
+; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2
+; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5
+; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4
+; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6
+; CHECK-NEXT: v_ffbh_u32_e32 v8, v5
+; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8
+; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1
+; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9
+; CHECK-NEXT: v_add_u32_e32 v10, -8, v8
+; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8
+; CHECK-NEXT: v_and_b32_e32 v3, 1, v3
+; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9
+; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v3
+; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6
+; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9
+; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5]
+; CHECK-NEXT: v_or_b32_e32 v4, v4, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v8, vcc
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN")
+ ret <4 x float> %r
+}
diff --git a/llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll b/llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll
new file mode 100644
index 0000000000000..aff1bd385308d
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll
@@ -0,0 +1,761 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s
+
+; Test llvm.convert.from.arbitrary intrinsic expansion.
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata)
+declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata)
+declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata)
+
+declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata)
+declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata)
+
+; Float8E5M2
+; Layout: sign(1) exp(5) mant(2), bias=15
+; Supports: Inf, NaN, signed zero, denormals
+
+; Float8E5M2 normal: 0_01111_00 = 1.0
+define float @from_f8e5m2_normal() {
+; CHECK-LABEL: from_f8e5m2_normal(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 zero: 0_00000_00 = +0.0
+define float @from_f8e5m2_zero() {
+; CHECK-LABEL: from_f8e5m2_zero(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 negative zero: 1_00000_00 = -0.0
+define float @from_f8e5m2_neg_zero() {
+; CHECK-LABEL: from_f8e5m2_neg_zero(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], -2147483648;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 denorm: 0_00000_01 = 2^(-16)
+define float @from_f8e5m2_denorm() {
+; CHECK-LABEL: from_f8e5m2_denorm(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 931135488;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 +Inf: 0_11111_00
+define float @from_f8e5m2_inf() {
+; CHECK-LABEL: from_f8e5m2_inf(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 2139095040;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 NaN: 0_11111_01
+define float @from_f8e5m2_nan() {
+; CHECK-LABEL: from_f8e5m2_nan(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 2143289344;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 max: 0_11110_11 = 57344
+define float @from_f8e5m2_max() {
+; CHECK-LABEL: from_f8e5m2_max(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1197473792;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 negative: 1_01111_00 = -1.0
+define float @from_f8e5m2_neg() {
+; CHECK-LABEL: from_f8e5m2_neg(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], -1082130432;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 runtime arg test
+define float @from_f8e5m2_dynamic(i8 %x) {
+; CHECK-LABEL: from_f8e5m2_dynamic(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<6>;
+; CHECK-NEXT: .reg .b32 %r<31>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %r1, [from_f8e5m2_dynamic_param_0];
+; CHECK-NEXT: shl.b32 %r2, %r1, 24;
+; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
+; CHECK-NEXT: and.b32 %r4, %r1, 3;
+; CHECK-NEXT: clz.b32 %r5, %r4;
+; CHECK-NEXT: sub.s32 %r6, 142, %r5;
+; CHECK-NEXT: shl.b32 %r7, %r6, 23;
+; CHECK-NEXT: or.b32 %r8, %r3, %r7;
+; CHECK-NEXT: sub.s32 %r9, 31, %r5;
+; CHECK-NEXT: mov.b32 %r10, 1;
+; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
+; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
+; CHECK-NEXT: add.s32 %r13, %r5, -8;
+; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
+; CHECK-NEXT: or.b32 %r15, %r8, %r14;
+; CHECK-NEXT: bfe.u32 %r16, %r1, 2, 5;
+; CHECK-NEXT: shl.b32 %r17, %r16, 23;
+; CHECK-NEXT: or.b32 %r18, %r17, %r3;
+; CHECK-NEXT: shl.b32 %r19, %r4, 21;
+; CHECK-NEXT: or.b32 %r20, %r18, %r19;
+; CHECK-NEXT: add.s32 %r21, %r20, 939524096;
+; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0;
+; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
+; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
+; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
+; CHECK-NEXT: or.b32 %r24, %r16, %r4;
+; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
+; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
+; CHECK-NEXT: setp.eq.b32 %p4, %r4, 0;
+; CHECK-NEXT: or.b32 %r26, %r3, 2139095040;
+; CHECK-NEXT: selp.b32 %r27, %r26, %r25, %p4;
+; CHECK-NEXT: setp.eq.b32 %p5, %r16, 31;
+; CHECK-NEXT: selp.b32 %r28, %r27, %r25, %p5;
+; CHECK-NEXT: selp.b32 %r29, 2143289344, %r28, %p1;
+; CHECK-NEXT: selp.b32 %r30, %r29, %r28, %p5;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r30;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E4M3FN (NanOnly, NanEncoding=AllOnes)
+; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7
+; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite.
+
+; Float8E4M3FN normal: 0_0111_000 = 1.0
+define float @from_f8e4m3fn_normal() {
+; CHECK-LABEL: from_f8e4m3fn_normal(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN NaN: 0_1111_111
+define float @from_f8e4m3fn_nan() {
+; CHECK-LABEL: from_f8e4m3fn_nan(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 2143289344;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN not-NaN: 0_1111_110 = 448
+; Despite exp=all-ones, this is a valid finite number (max value)
+define float @from_f8e4m3fn_max() {
+; CHECK-LABEL: from_f8e4m3fn_max(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1138753536;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN not-NaN: 0_1111_101 = 416
+; exp=all-ones but mant!=all-ones so this is finite
+define float @from_f8e4m3fn_not_nan() {
+; CHECK-LABEL: from_f8e4m3fn_not_nan(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1137704960;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN zero: 0_0000_000 = +0.0
+define float @from_f8e4m3fn_zero() {
+; CHECK-LABEL: from_f8e4m3fn_zero(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN denorm: 0_0000_001 = 2^(-9)
+define float @from_f8e4m3fn_denorm() {
+; CHECK-LABEL: from_f8e4m3fn_denorm(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 989855744;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN runtime arg test
+define float @from_f8e4m3fn_dynamic(i8 %x) {
+; CHECK-LABEL: from_f8e4m3fn_dynamic(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<6>;
+; CHECK-NEXT: .reg .b32 %r<28>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %r1, [from_f8e4m3fn_dynamic_param_0];
+; CHECK-NEXT: shl.b32 %r2, %r1, 24;
+; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
+; CHECK-NEXT: and.b32 %r4, %r1, 7;
+; CHECK-NEXT: clz.b32 %r5, %r4;
+; CHECK-NEXT: sub.s32 %r6, 149, %r5;
+; CHECK-NEXT: shl.b32 %r7, %r6, 23;
+; CHECK-NEXT: or.b32 %r8, %r3, %r7;
+; CHECK-NEXT: sub.s32 %r9, 31, %r5;
+; CHECK-NEXT: mov.b32 %r10, 1;
+; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
+; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
+; CHECK-NEXT: add.s32 %r13, %r5, -8;
+; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
+; CHECK-NEXT: or.b32 %r15, %r8, %r14;
+; CHECK-NEXT: bfe.u32 %r16, %r1, 3, 4;
+; CHECK-NEXT: shl.b32 %r17, %r16, 23;
+; CHECK-NEXT: or.b32 %r18, %r17, %r3;
+; CHECK-NEXT: shl.b32 %r19, %r4, 20;
+; CHECK-NEXT: or.b32 %r20, %r18, %r19;
+; CHECK-NEXT: add.s32 %r21, %r20, 1006632960;
+; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0;
+; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
+; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
+; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
+; CHECK-NEXT: or.b32 %r24, %r16, %r4;
+; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
+; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
+; CHECK-NEXT: setp.eq.b32 %p4, %r4, 7;
+; CHECK-NEXT: selp.b32 %r26, 2143289344, %r25, %p4;
+; CHECK-NEXT: setp.eq.b32 %p5, %r16, 15;
+; CHECK-NEXT: selp.b32 %r27, %r26, %r25, %p5;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r27;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float6E3M2FN (FiniteOnly)
+; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4
+; No Inf, no NaN. All bit patterns are finite.
+
+; Float6E3M2FN normal: 0_011_00 = 1.0
+define float @from_f6e3m2fn_normal() {
+; CHECK-LABEL: from_f6e3m2fn_normal(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN max: 0_111_11 = 28.0
+define float @from_f6e3m2fn_max() {
+; CHECK-LABEL: from_f6e3m2fn_max(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1105199104;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN denorm: 0_000_01 = 0.0625
+define float @from_f6e3m2fn_denorm() {
+; CHECK-LABEL: from_f6e3m2fn_denorm(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1031798784;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN zero: 0_000_00 = +0.0
+define float @from_f6e3m2fn_zero() {
+; CHECK-LABEL: from_f6e3m2fn_zero(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN negative: 1_011_00 = -1.0
+define float @from_f6e3m2fn_neg() {
+; CHECK-LABEL: from_f6e3m2fn_neg(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], -1082130432;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN runtime arg test
+define float @from_f6e3m2fn_dynamic(i6 %x) {
+; CHECK-LABEL: from_f6e3m2fn_dynamic(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<26>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [from_f6e3m2fn_dynamic_param_0+1];
+; CHECK-NEXT: shl.b16 %rs2, %rs1, 8;
+; CHECK-NEXT: ld.param.b8 %rs3, [from_f6e3m2fn_dynamic_param_0];
+; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs4;
+; CHECK-NEXT: shl.b32 %r2, %r1, 26;
+; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
+; CHECK-NEXT: and.b32 %r4, %r1, 3;
+; CHECK-NEXT: clz.b32 %r5, %r4;
+; CHECK-NEXT: sub.s32 %r6, 154, %r5;
+; CHECK-NEXT: shl.b32 %r7, %r6, 23;
+; CHECK-NEXT: or.b32 %r8, %r3, %r7;
+; CHECK-NEXT: sub.s32 %r9, 31, %r5;
+; CHECK-NEXT: mov.b32 %r10, 1;
+; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
+; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
+; CHECK-NEXT: add.s32 %r13, %r5, -8;
+; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
+; CHECK-NEXT: or.b32 %r15, %r8, %r14;
+; CHECK-NEXT: bfe.u32 %r16, %r1, 2, 3;
+; CHECK-NEXT: shl.b32 %r17, %r16, 23;
+; CHECK-NEXT: or.b32 %r18, %r17, %r3;
+; CHECK-NEXT: shl.b32 %r19, %r4, 21;
+; CHECK-NEXT: or.b32 %r20, %r18, %r19;
+; CHECK-NEXT: add.s32 %r21, %r20, 1040187392;
+; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0;
+; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
+; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
+; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
+; CHECK-NEXT: or.b32 %r24, %r16, %r4;
+; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
+; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r25;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E2M3FN (FiniteOnly)
+; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2
+; No Inf, no NaN. All bit patterns are finite.
+
+; Float6E2M3FN normal: 0_01_000 = 1.0
+define float @from_f6e2m3fn_normal() {
+; CHECK-LABEL: from_f6e2m3fn_normal(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN max: 0_11_111 = 7.5
+define float @from_f6e2m3fn_max() {
+; CHECK-LABEL: from_f6e2m3fn_max(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1089470464;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN denorm: 0_00_001 = 0.125
+define float @from_f6e2m3fn_denorm() {
+; CHECK-LABEL: from_f6e2m3fn_denorm(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1040187392;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN zero: 0_00_000 = +0.0
+define float @from_f6e2m3fn_zero() {
+; CHECK-LABEL: from_f6e2m3fn_zero(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 0;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN runtime arg test
+define float @from_f6e2m3fn_dynamic(i6 %x) {
+; CHECK-LABEL: from_f6e2m3fn_dynamic(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<26>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [from_f6e2m3fn_dynamic_param_0+1];
+; CHECK-NEXT: shl.b16 %rs2, %rs1, 8;
+; CHECK-NEXT: ld.param.b8 %rs3, [from_f6e2m3fn_dynamic_param_0];
+; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs4;
+; CHECK-NEXT: shl.b32 %r2, %r1, 26;
+; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
+; CHECK-NEXT: and.b32 %r4, %r1, 7;
+; CHECK-NEXT: clz.b32 %r5, %r4;
+; CHECK-NEXT: sub.s32 %r6, 155, %r5;
+; CHECK-NEXT: shl.b32 %r7, %r6, 23;
+; CHECK-NEXT: or.b32 %r8, %r3, %r7;
+; CHECK-NEXT: sub.s32 %r9, 31, %r5;
+; CHECK-NEXT: mov.b32 %r10, 1;
+; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
+; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
+; CHECK-NEXT: add.s32 %r13, %r5, -8;
+; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
+; CHECK-NEXT: or.b32 %r15, %r8, %r14;
+; CHECK-NEXT: bfe.u32 %r16, %r1, 3, 2;
+; CHECK-NEXT: shl.b32 %r17, %r16, 23;
+; CHECK-NEXT: or.b32 %r18, %r17, %r3;
+; CHECK-NEXT: shl.b32 %r19, %r4, 20;
+; CHECK-NEXT: or.b32 %r20, %r18, %r19;
+; CHECK-NEXT: add.s32 %r21, %r20, 1056964608;
+; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0;
+; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
+; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
+; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
+; CHECK-NEXT: or.b32 %r24, %r16, %r4;
+; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
+; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r25;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float4E2M1FN (FiniteOnly)
+; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2
+; No Inf, no NaN.
+
+; Float4E2M1FN normal: 0_01_0 = 1.0
+define float @from_f4e2m1fn_normal() {
+; CHECK-LABEL: from_f4e2m1fn_normal(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN denorm: 0_00_1 = 0.5
+define float @from_f4e2m1fn_denorm() {
+; CHECK-LABEL: from_f4e2m1fn_denorm(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1056964608;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN max: 0_11_1 = 6.0
+define float @from_f4e2m1fn_max() {
+; CHECK-LABEL: from_f4e2m1fn_max(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b32 [func_retval0], 1086324736;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN runtime arg test
+define float @from_f4e2m1fn_dynamic(i4 %x) {
+; CHECK-LABEL: from_f4e2m1fn_dynamic(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<4>;
+; CHECK-NEXT: .reg .b16 %rs<6>;
+; CHECK-NEXT: .reg .b32 %r<26>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [from_f4e2m1fn_dynamic_param_0+1];
+; CHECK-NEXT: shl.b16 %rs2, %rs1, 8;
+; CHECK-NEXT: ld.param.b8 %rs3, [from_f4e2m1fn_dynamic_param_0];
+; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs4;
+; CHECK-NEXT: shl.b32 %r2, %r1, 28;
+; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
+; CHECK-NEXT: and.b32 %r4, %r1, 1;
+; CHECK-NEXT: clz.b32 %r5, %r4;
+; CHECK-NEXT: sub.s32 %r6, 157, %r5;
+; CHECK-NEXT: shl.b32 %r7, %r6, 23;
+; CHECK-NEXT: or.b32 %r8, %r3, %r7;
+; CHECK-NEXT: sub.s32 %r9, 31, %r5;
+; CHECK-NEXT: mov.b32 %r10, 1;
+; CHECK-NEXT: shl.b32 %r11, %r10, %r9;
+; CHECK-NEXT: xor.b32 %r12, %r4, %r11;
+; CHECK-NEXT: add.s32 %r13, %r5, -8;
+; CHECK-NEXT: shl.b32 %r14, %r12, %r13;
+; CHECK-NEXT: or.b32 %r15, %r8, %r14;
+; CHECK-NEXT: bfe.u32 %r16, %r1, 1, 2;
+; CHECK-NEXT: shl.b32 %r17, %r16, 23;
+; CHECK-NEXT: or.b32 %r18, %r17, %r3;
+; CHECK-NEXT: shl.b32 %r19, %r4, 22;
+; CHECK-NEXT: or.b32 %r20, %r18, %r19;
+; CHECK-NEXT: add.s32 %r21, %r20, 1056964608;
+; CHECK-NEXT: and.b16 %rs5, %rs3, 1;
+; CHECK-NEXT: setp.ne.b16 %p1, %rs5, 0;
+; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1;
+; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0;
+; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2;
+; CHECK-NEXT: or.b32 %r24, %r16, %r4;
+; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0;
+; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r25;
+; CHECK-NEXT: ret;
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float8E5M2 to f16: 1.0
+define half @from_f8e5m2_to_f16() {
+; CHECK-LABEL: from_f8e5m2_to_f16(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b16 [func_retval0], 0x3C00;
+; CHECK-NEXT: ret;
+ %r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2")
+ ret half %r
+}
+
+; Float8E5M2 to f64: 1.0
+define double @from_f8e5m2_to_f64() {
+; CHECK-LABEL: from_f8e5m2_to_f64(
+; CHECK: {
+; CHECK-EMPTY:
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: st.param.b64 [func_retval0], 4607182418800017408;
+; CHECK-NEXT: ret;
+ %r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2")
+ ret double %r
+}
+
+; Vector test: Float4E2M1FN <4 x i4> -> <4 x float>
+define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) {
+; CHECK-LABEL: fp4_to_f32_vec(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<13>;
+; CHECK-NEXT: .reg .b32 %r<101>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %r1, [fp4_to_f32_vec_param_0+2];
+; CHECK-NEXT: shl.b32 %r2, %r1, 16;
+; CHECK-NEXT: ld.param.b16 %r3, [fp4_to_f32_vec_param_0];
+; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x7771U;
+; CHECK-NEXT: shl.b32 %r5, %r4, 28;
+; CHECK-NEXT: and.b32 %r6, %r5, -2147483648;
+; CHECK-NEXT: and.b32 %r7, %r4, 1;
+; CHECK-NEXT: clz.b32 %r8, %r7;
+; CHECK-NEXT: sub.s32 %r9, 157, %r8;
+; CHECK-NEXT: shl.b32 %r10, %r9, 23;
+; CHECK-NEXT: or.b32 %r11, %r6, %r10;
+; CHECK-NEXT: sub.s32 %r12, 31, %r8;
+; CHECK-NEXT: mov.b32 %r13, 1;
+; CHECK-NEXT: shl.b32 %r14, %r13, %r12;
+; CHECK-NEXT: xor.b32 %r15, %r7, %r14;
+; CHECK-NEXT: add.s32 %r16, %r8, -8;
+; CHECK-NEXT: shl.b32 %r17, %r15, %r16;
+; CHECK-NEXT: or.b32 %r18, %r11, %r17;
+; CHECK-NEXT: bfe.u32 %r19, %r4, 1, 2;
+; CHECK-NEXT: shl.b32 %r20, %r19, 23;
+; CHECK-NEXT: or.b32 %r21, %r20, %r6;
+; CHECK-NEXT: shl.b32 %r22, %r7, 22;
+; CHECK-NEXT: or.b32 %r23, %r21, %r22;
+; CHECK-NEXT: add.s32 %r24, %r23, 1056964608;
+; CHECK-NEXT: setp.ne.b32 %p1, %r7, 0;
+; CHECK-NEXT: selp.b32 %r25, %r18, %r24, %p1;
+; CHECK-NEXT: setp.eq.b32 %p2, %r19, 0;
+; CHECK-NEXT: selp.b32 %r26, %r25, %r24, %p2;
+; CHECK-NEXT: or.b32 %r27, %r19, %r7;
+; CHECK-NEXT: setp.eq.b32 %p3, %r27, 0;
+; CHECK-NEXT: selp.b32 %r28, %r6, %r26, %p3;
+; CHECK-NEXT: prmt.b32 %r29, %r3, 0, 0x7770U;
+; CHECK-NEXT: shl.b32 %r30, %r29, 28;
+; CHECK-NEXT: and.b32 %r31, %r30, -2147483648;
+; CHECK-NEXT: and.b32 %r32, %r29, 1;
+; CHECK-NEXT: clz.b32 %r33, %r32;
+; CHECK-NEXT: sub.s32 %r34, 157, %r33;
+; CHECK-NEXT: shl.b32 %r35, %r34, 23;
+; CHECK-NEXT: or.b32 %r36, %r31, %r35;
+; CHECK-NEXT: sub.s32 %r37, 31, %r33;
+; CHECK-NEXT: shl.b32 %r38, %r13, %r37;
+; CHECK-NEXT: xor.b32 %r39, %r32, %r38;
+; CHECK-NEXT: add.s32 %r40, %r33, -8;
+; CHECK-NEXT: shl.b32 %r41, %r39, %r40;
+; CHECK-NEXT: or.b32 %r42, %r36, %r41;
+; CHECK-NEXT: bfe.u32 %r43, %r29, 1, 2;
+; CHECK-NEXT: shl.b32 %r44, %r43, 23;
+; CHECK-NEXT: or.b32 %r45, %r44, %r31;
+; CHECK-NEXT: shl.b32 %r46, %r32, 22;
+; CHECK-NEXT: or.b32 %r47, %r45, %r46;
+; CHECK-NEXT: add.s32 %r48, %r47, 1056964608;
+; CHECK-NEXT: setp.ne.b32 %p4, %r32, 0;
+; CHECK-NEXT: selp.b32 %r49, %r42, %r48, %p4;
+; CHECK-NEXT: setp.eq.b32 %p5, %r43, 0;
+; CHECK-NEXT: selp.b32 %r50, %r49, %r48, %p5;
+; CHECK-NEXT: or.b32 %r51, %r43, %r32;
+; CHECK-NEXT: setp.eq.b32 %p6, %r51, 0;
+; CHECK-NEXT: selp.b32 %r52, %r31, %r50, %p6;
+; CHECK-NEXT: prmt.b32 %r53, %r2, 0, 0x7773U;
+; CHECK-NEXT: shl.b32 %r54, %r53, 28;
+; CHECK-NEXT: and.b32 %r55, %r54, -2147483648;
+; CHECK-NEXT: and.b32 %r56, %r53, 1;
+; CHECK-NEXT: clz.b32 %r57, %r56;
+; CHECK-NEXT: sub.s32 %r58, 157, %r57;
+; CHECK-NEXT: shl.b32 %r59, %r58, 23;
+; CHECK-NEXT: or.b32 %r60, %r55, %r59;
+; CHECK-NEXT: sub.s32 %r61, 31, %r57;
+; CHECK-NEXT: shl.b32 %r62, %r13, %r61;
+; CHECK-NEXT: xor.b32 %r63, %r56, %r62;
+; CHECK-NEXT: add.s32 %r64, %r57, -8;
+; CHECK-NEXT: shl.b32 %r65, %r63, %r64;
+; CHECK-NEXT: or.b32 %r66, %r60, %r65;
+; CHECK-NEXT: bfe.u32 %r67, %r53, 1, 2;
+; CHECK-NEXT: shl.b32 %r68, %r67, 23;
+; CHECK-NEXT: or.b32 %r69, %r68, %r55;
+; CHECK-NEXT: shl.b32 %r70, %r56, 22;
+; CHECK-NEXT: or.b32 %r71, %r69, %r70;
+; CHECK-NEXT: add.s32 %r72, %r71, 1056964608;
+; CHECK-NEXT: setp.ne.b32 %p7, %r56, 0;
+; CHECK-NEXT: selp.b32 %r73, %r66, %r72, %p7;
+; CHECK-NEXT: setp.eq.b32 %p8, %r67, 0;
+; CHECK-NEXT: selp.b32 %r74, %r73, %r72, %p8;
+; CHECK-NEXT: or.b32 %r75, %r67, %r56;
+; CHECK-NEXT: setp.eq.b32 %p9, %r75, 0;
+; CHECK-NEXT: selp.b32 %r76, %r55, %r74, %p9;
+; CHECK-NEXT: prmt.b32 %r77, %r2, 0, 0x7772U;
+; CHECK-NEXT: shl.b32 %r78, %r77, 28;
+; CHECK-NEXT: and.b32 %r79, %r78, -2147483648;
+; CHECK-NEXT: and.b32 %r80, %r77, 1;
+; CHECK-NEXT: clz.b32 %r81, %r80;
+; CHECK-NEXT: sub.s32 %r82, 157, %r81;
+; CHECK-NEXT: shl.b32 %r83, %r82, 23;
+; CHECK-NEXT: or.b32 %r84, %r79, %r83;
+; CHECK-NEXT: sub.s32 %r85, 31, %r81;
+; CHECK-NEXT: shl.b32 %r86, %r13, %r85;
+; CHECK-NEXT: xor.b32 %r87, %r80, %r86;
+; CHECK-NEXT: add.s32 %r88, %r81, -8;
+; CHECK-NEXT: shl.b32 %r89, %r87, %r88;
+; CHECK-NEXT: or.b32 %r90, %r84, %r89;
+; CHECK-NEXT: bfe.u32 %r91, %r77, 1, 2;
+; CHECK-NEXT: shl.b32 %r92, %r91, 23;
+; CHECK-NEXT: or.b32 %r93, %r92, %r79;
+; CHECK-NEXT: shl.b32 %r94, %r80, 22;
+; CHECK-NEXT: or.b32 %r95, %r93, %r94;
+; CHECK-NEXT: add.s32 %r96, %r95, 1056964608;
+; CHECK-NEXT: setp.ne.b32 %p10, %r80, 0;
+; CHECK-NEXT: selp.b32 %r97, %r90, %r96, %p10;
+; CHECK-NEXT: setp.eq.b32 %p11, %r91, 0;
+; CHECK-NEXT: selp.b32 %r98, %r97, %r96, %p11;
+; CHECK-NEXT: or.b32 %r99, %r91, %r80;
+; CHECK-NEXT: setp.eq.b32 %p12, %r99, 0;
+; CHECK-NEXT: selp.b32 %r100, %r79, %r98, %p12;
+; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r52, %r28, %r100, %r76};
+; CHECK-NEXT: ret;
+ %r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN")
+ ret <4 x float> %r
+}
diff --git a/llvm/test/CodeGen/X86/arbitrary-fp-convert-error.ll b/llvm/test/CodeGen/X86/arbitrary-fp-convert-error.ll
new file mode 100644
index 0000000000000..86fd0a5d22e0d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/arbitrary-fp-convert-error.ll
@@ -0,0 +1,76 @@
+; RUN: split-file %s %t
+; RUN: not llc < %t/float8e4m3.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E4M3
+; RUN: not llc < %t/float8e3m4.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E3M4
+; RUN: not llc < %t/float8e5m2fnuz.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E5M2FNUZ
+; RUN: not llc < %t/float8e4m3fnuz.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E4M3FNUZ
+; RUN: not llc < %t/float8e4m3b11fnuz.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E4M3B11FNUZ
+; RUN: not llc < %t/float8e8m0fnu.ll -mtriple=x86_64-unknown-unknown 2>&1 | FileCheck %s --check-prefix=E8M0FNU
+
+; Test that llvm.convert.from.arbitrary.fp emits an error for formats that pass
+; verifier validation but are not yet implemented in SelectionDAGBuilder.
+
+;--- float8e4m3.ll
+; E4M3: error: convert_from_arbitrary_fp: not implemented format 'Float8E4M3'
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+
+define float @from_f8e4m3(i8 %v) {
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
+ i8 %v, metadata !"Float8E4M3")
+ ret float %r
+}
+
+;--- float8e3m4.ll
+; E3M4: error: convert_from_arbitrary_fp: not implemented format 'Float8E3M4'
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+
+define float @from_f8e3m4(i8 %v) {
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
+ i8 %v, metadata !"Float8E3M4")
+ ret float %r
+}
+
+;--- float8e5m2fnuz.ll
+; E5M2FNUZ: error: convert_from_arbitrary_fp: not implemented format 'Float8E5M2FNUZ'
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+
+define float @from_f8e5m2fnuz(i8 %v) {
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
+ i8 %v, metadata !"Float8E5M2FNUZ")
+ ret float %r
+}
+
+;--- float8e4m3fnuz.ll
+; E4M3FNUZ: error: convert_from_arbitrary_fp: not implemented format 'Float8E4M3FNUZ'
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+
+define float @from_f8e4m3fnuz(i8 %v) {
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
+ i8 %v, metadata !"Float8E4M3FNUZ")
+ ret float %r
+}
+
+;--- float8e4m3b11fnuz.ll
+; E4M3B11FNUZ: error: convert_from_arbitrary_fp: not implemented format 'Float8E4M3B11FNUZ'
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+
+define float @from_f8e4m3b11fnuz(i8 %v) {
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
+ i8 %v, metadata !"Float8E4M3B11FNUZ")
+ ret float %r
+}
+
+;--- float8e8m0fnu.ll
+; E8M0FNU: error: convert_from_arbitrary_fp: not implemented format 'Float8E8M0FNU'
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+
+define float @from_f8e8m0fnu(i8 %v) {
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(
+ i8 %v, metadata !"Float8E8M0FNU")
+ ret float %r
+}
diff --git a/llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll b/llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll
new file mode 100644
index 0000000000000..e60fed64fd179
--- /dev/null
+++ b/llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll
@@ -0,0 +1,727 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; Test llvm.convert.from.arbitrary intrinsic expansion.
+
+declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata)
+declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata)
+declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata)
+declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata)
+
+declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata)
+declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata)
+
+; Float8E5M2
+; Layout: sign(1) exp(5) mant(2), bias=15
+; Supports: Inf, NaN, signed zero, denormals
+
+; Float8E5M2 normal: 0_01111_00 = 1.0
+define float @from_f8e5m2_normal() {
+; CHECK-LABEL: from_f8e5m2_normal:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 zero: 0_00000_00 = +0.0
+define float @from_f8e5m2_zero() {
+; CHECK-LABEL: from_f8e5m2_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 negative zero: 1_00000_00 = -0.0
+define float @from_f8e5m2_neg_zero() {
+; CHECK-LABEL: from_f8e5m2_neg_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [-0.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 denorm: 0_00000_01 = 2^(-16)
+define float @from_f8e5m2_denorm() {
+; CHECK-LABEL: from_f8e5m2_denorm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.52587891E-5,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 +Inf: 0_11111_00
+define float @from_f8e5m2_inf() {
+; CHECK-LABEL: from_f8e5m2_inf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 NaN: 0_11111_01
+define float @from_f8e5m2_nan() {
+; CHECK-LABEL: from_f8e5m2_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 max: 0_11110_11 = 57344
+define float @from_f8e5m2_max() {
+; CHECK-LABEL: from_f8e5m2_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [5.7344E+4,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 negative: 1_01111_00 = -1.0
+define float @from_f8e5m2_neg() {
+; CHECK-LABEL: from_f8e5m2_neg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E5M2 runtime arg test
+define float @from_f8e5m2_dynamic(i8 %x) {
+; CHECK-LABEL: from_f8e5m2_dynamic:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: andl $3, %edx
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: shll $21, %ecx
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl $-128, %eax
+; CHECK-NEXT: shll $24, %eax
+; CHECK-NEXT: shrl $2, %edi
+; CHECK-NEXT: andl $31, %edi
+; CHECK-NEXT: movl %edi, %esi
+; CHECK-NEXT: shll $23, %esi
+; CHECK-NEXT: orl %eax, %esi
+; CHECK-NEXT: leal 939524096(%rcx,%rsi), %esi
+; CHECK-NEXT: bsrl %edx, %r8d
+; CHECK-NEXT: movl %edx, %r9d
+; CHECK-NEXT: btcl %r8d, %r9d
+; CHECK-NEXT: xorl $31, %r8d
+; CHECK-NEXT: leal -8(%r8), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r9d
+; CHECK-NEXT: movl $142, %ecx
+; CHECK-NEXT: subl %r8d, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %eax, %ecx
+; CHECK-NEXT: orl %r9d, %ecx
+; CHECK-NEXT: testl %edx, %edx
+; CHECK-NEXT: sete %dl
+; CHECK-NEXT: setne %r8b
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %r9b
+; CHECK-NEXT: testb %r8b, %r9b
+; CHECK-NEXT: cmovel %esi, %ecx
+; CHECK-NEXT: testb %dl, %r9b
+; CHECK-NEXT: cmovnel %eax, %ecx
+; CHECK-NEXT: orl $2139095040, %eax # imm = 0x7F800000
+; CHECK-NEXT: cmpl $31, %edi
+; CHECK-NEXT: sete %sil
+; CHECK-NEXT: testb %dl, %sil
+; CHECK-NEXT: cmovel %ecx, %eax
+; CHECK-NEXT: testb %r8b, %sil
+; CHECK-NEXT: movl $2143289344, %ecx # imm = 0x7FC00000
+; CHECK-NEXT: cmovel %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2")
+ ret float %r
+}
+
+; Float8E4M3FN (NanOnly, NanEncoding=AllOnes)
+; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7
+; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite.
+
+; Float8E4M3FN normal: 0_0111_000 = 1.0
+define float @from_f8e4m3fn_normal() {
+; CHECK-LABEL: from_f8e4m3fn_normal:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN NaN: 0_1111_111
+define float @from_f8e4m3fn_nan() {
+; CHECK-LABEL: from_f8e4m3fn_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN not-NaN: 0_1111_110 = 448
+; Despite exp=all-ones, this is a valid finite number (max value)
+define float @from_f8e4m3fn_max() {
+; CHECK-LABEL: from_f8e4m3fn_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.48E+2,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN not-NaN: 0_1111_101 = 416
+; exp=all-ones but mant!=all-ones so this is finite
+define float @from_f8e4m3fn_not_nan() {
+; CHECK-LABEL: from_f8e4m3fn_not_nan:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.16E+2,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN zero: 0_0000_000 = +0.0
+define float @from_f8e4m3fn_zero() {
+; CHECK-LABEL: from_f8e4m3fn_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN denorm: 0_0000_001 = 2^(-9)
+define float @from_f8e4m3fn_denorm() {
+; CHECK-LABEL: from_f8e4m3fn_denorm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.953125E-3,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float8E4M3FN runtime arg test
+define float @from_f8e4m3fn_dynamic(i8 %x) {
+; CHECK-LABEL: from_f8e4m3fn_dynamic:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl $7, %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shll $20, %ecx
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: andl $-128, %edx
+; CHECK-NEXT: shll $24, %edx
+; CHECK-NEXT: shrl $3, %edi
+; CHECK-NEXT: andl $15, %edi
+; CHECK-NEXT: movl %edi, %esi
+; CHECK-NEXT: shll $23, %esi
+; CHECK-NEXT: orl %edx, %esi
+; CHECK-NEXT: leal 1006632960(%rcx,%rsi), %esi
+; CHECK-NEXT: bsrl %eax, %r8d
+; CHECK-NEXT: movl %eax, %r9d
+; CHECK-NEXT: btcl %r8d, %r9d
+; CHECK-NEXT: xorl $31, %r8d
+; CHECK-NEXT: leal -8(%r8), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r9d
+; CHECK-NEXT: movl $149, %ecx
+; CHECK-NEXT: subl %r8d, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %edx, %ecx
+; CHECK-NEXT: orl %r9d, %ecx
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: sete %r8b
+; CHECK-NEXT: setne %r9b
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %r10b
+; CHECK-NEXT: testb %r9b, %r10b
+; CHECK-NEXT: cmovel %esi, %ecx
+; CHECK-NEXT: testb %r8b, %r10b
+; CHECK-NEXT: cmovnel %edx, %ecx
+; CHECK-NEXT: cmpl $7, %eax
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: cmpl $15, %edi
+; CHECK-NEXT: sete %dl
+; CHECK-NEXT: testb %al, %dl
+; CHECK-NEXT: movl $2143289344, %eax # imm = 0x7FC00000
+; CHECK-NEXT: cmovel %ecx, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN")
+ ret float %r
+}
+
+; Float6E3M2FN (FiniteOnly)
+; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4
+; No Inf, no NaN. All bit patterns are finite.
+
+; Float6E3M2FN normal: 0_011_00 = 1.0
+define float @from_f6e3m2fn_normal() {
+; CHECK-LABEL: from_f6e3m2fn_normal:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN max: 0_111_11 = 28.0
+define float @from_f6e3m2fn_max() {
+; CHECK-LABEL: from_f6e3m2fn_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.8E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN denorm: 0_000_01 = 0.0625
+define float @from_f6e3m2fn_denorm() {
+; CHECK-LABEL: from_f6e3m2fn_denorm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [6.25E-2,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN zero: 0_000_00 = +0.0
+define float @from_f6e3m2fn_zero() {
+; CHECK-LABEL: from_f6e3m2fn_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN negative: 1_011_00 = -1.0
+define float @from_f6e3m2fn_neg() {
+; CHECK-LABEL: from_f6e3m2fn_neg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E3M2FN runtime arg test
+define float @from_f6e3m2fn_dynamic(i6 %x) {
+; CHECK-LABEL: from_f6e3m2fn_dynamic:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: andl $3, %edx
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: shll $21, %ecx
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl $-32, %eax
+; CHECK-NEXT: shll $26, %eax
+; CHECK-NEXT: shrl $2, %edi
+; CHECK-NEXT: andl $7, %edi
+; CHECK-NEXT: movl %edi, %esi
+; CHECK-NEXT: shll $23, %esi
+; CHECK-NEXT: orl %eax, %esi
+; CHECK-NEXT: leal 1040187392(%rcx,%rsi), %esi
+; CHECK-NEXT: bsrl %edx, %r8d
+; CHECK-NEXT: movl %edx, %r9d
+; CHECK-NEXT: btcl %r8d, %r9d
+; CHECK-NEXT: xorl $31, %r8d
+; CHECK-NEXT: leal -8(%r8), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r9d
+; CHECK-NEXT: movl $154, %ecx
+; CHECK-NEXT: subl %r8d, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %eax, %ecx
+; CHECK-NEXT: orl %r9d, %ecx
+; CHECK-NEXT: testl %edx, %edx
+; CHECK-NEXT: sete %dl
+; CHECK-NEXT: setne %r8b
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %dil
+; CHECK-NEXT: testb %r8b, %dil
+; CHECK-NEXT: cmovel %esi, %ecx
+; CHECK-NEXT: testb %dl, %dil
+; CHECK-NEXT: cmovnel %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN")
+ ret float %r
+}
+
+; Float6E2M3FN (FiniteOnly)
+; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2
+; No Inf, no NaN. All bit patterns are finite.
+
+; Float6E2M3FN normal: 0_01_000 = 1.0
+define float @from_f6e2m3fn_normal() {
+; CHECK-LABEL: from_f6e2m3fn_normal:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN max: 0_11_111 = 7.5
+define float @from_f6e2m3fn_max() {
+; CHECK-LABEL: from_f6e2m3fn_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [7.5E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN denorm: 0_00_001 = 0.125
+define float @from_f6e2m3fn_denorm() {
+; CHECK-LABEL: from_f6e2m3fn_denorm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.25E-1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN zero: 0_00_000 = +0.0
+define float @from_f6e2m3fn_zero() {
+; CHECK-LABEL: from_f6e2m3fn_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float6E2M3FN runtime arg test
+define float @from_f6e2m3fn_dynamic(i6 %x) {
+; CHECK-LABEL: from_f6e2m3fn_dynamic:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: andl $7, %edx
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: shll $20, %ecx
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl $-32, %eax
+; CHECK-NEXT: shll $26, %eax
+; CHECK-NEXT: shrl $3, %edi
+; CHECK-NEXT: andl $3, %edi
+; CHECK-NEXT: movl %edi, %esi
+; CHECK-NEXT: shll $23, %esi
+; CHECK-NEXT: orl %eax, %esi
+; CHECK-NEXT: leal 1056964608(%rcx,%rsi), %esi
+; CHECK-NEXT: bsrl %edx, %r8d
+; CHECK-NEXT: movl %edx, %r9d
+; CHECK-NEXT: btcl %r8d, %r9d
+; CHECK-NEXT: xorl $31, %r8d
+; CHECK-NEXT: leal -8(%r8), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r9d
+; CHECK-NEXT: movl $155, %ecx
+; CHECK-NEXT: subl %r8d, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %eax, %ecx
+; CHECK-NEXT: orl %r9d, %ecx
+; CHECK-NEXT: testl %edx, %edx
+; CHECK-NEXT: sete %dl
+; CHECK-NEXT: setne %r8b
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %dil
+; CHECK-NEXT: testb %r8b, %dil
+; CHECK-NEXT: cmovel %esi, %ecx
+; CHECK-NEXT: testb %dl, %dil
+; CHECK-NEXT: cmovnel %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN")
+ ret float %r
+}
+
+; Float4E2M1FN (FiniteOnly)
+; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2
+; No Inf, no NaN.
+
+; Float4E2M1FN normal: 0_01_0 = 1.0
+define float @from_f4e2m1fn_normal() {
+; CHECK-LABEL: from_f4e2m1fn_normal:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN denorm: 0_00_1 = 0.5
+define float @from_f4e2m1fn_denorm() {
+; CHECK-LABEL: from_f4e2m1fn_denorm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [5.0E-1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN max: 0_11_1 = 6.0
+define float @from_f4e2m1fn_max() {
+; CHECK-LABEL: from_f4e2m1fn_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float4E2M1FN runtime arg test
+define float @from_f4e2m1fn_dynamic(i4 %x) {
+; CHECK-LABEL: from_f4e2m1fn_dynamic:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: shll $22, %ecx
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl $-8, %eax
+; CHECK-NEXT: shll $28, %eax
+; CHECK-NEXT: shrl %edi
+; CHECK-NEXT: andl $3, %edi
+; CHECK-NEXT: movl %edi, %esi
+; CHECK-NEXT: shll $23, %esi
+; CHECK-NEXT: orl %eax, %esi
+; CHECK-NEXT: leal 1056964608(%rcx,%rsi), %esi
+; CHECK-NEXT: bsrl %edx, %r8d
+; CHECK-NEXT: movl %edx, %r9d
+; CHECK-NEXT: btcl %r8d, %r9d
+; CHECK-NEXT: xorl $31, %r8d
+; CHECK-NEXT: leal -8(%r8), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r9d
+; CHECK-NEXT: movl $157, %ecx
+; CHECK-NEXT: subl %r8d, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %eax, %ecx
+; CHECK-NEXT: orl %r9d, %ecx
+; CHECK-NEXT: testl %edx, %edx
+; CHECK-NEXT: sete %dl
+; CHECK-NEXT: setne %r8b
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %dil
+; CHECK-NEXT: testb %r8b, %dil
+; CHECK-NEXT: cmovel %esi, %ecx
+; CHECK-NEXT: testb %dl, %dil
+; CHECK-NEXT: cmovnel %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm0
+; CHECK-NEXT: retq
+ %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN")
+ ret float %r
+}
+
+; Float8E5M2 to f16: 1.0
+define half @from_f8e5m2_to_f16() {
+; CHECK-LABEL: from_f8e5m2_to_f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2")
+ ret half %r
+}
+
+; Float8E5M2 to f64: 1.0
+define double @from_f8e5m2_to_f64() {
+; CHECK-LABEL: from_f8e5m2_to_f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+ %r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2")
+ ret double %r
+}
+
+declare bfloat @llvm.convert.from.arbitrary.fp.bf16.i8(i8, metadata)
+
+; Float8E5M2 to bf16: 1.0
+; bf16 has: sign(1) exp(8) mant(7), bias=127
+define bfloat @from_f8e5m2_to_bf16() {
+; CHECK-LABEL: from_f8e5m2_to_bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: callq __truncsfbf2 at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ %r = call bfloat @llvm.convert.from.arbitrary.fp.bf16.i8(i8 60, metadata !"Float8E5M2")
+ ret bfloat %r
+}
+
+; Vector test: Float4E2M1FN <4 x i4> -> <4 x float>
+define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) {
+; CHECK-LABEL: fp4_to_f32_vec:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
+; CHECK-NEXT: movd %xmm1, %esi
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shll $22, %eax
+; CHECK-NEXT: movl %esi, %edx
+; CHECK-NEXT: andl $-8, %edx
+; CHECK-NEXT: shll $28, %edx
+; CHECK-NEXT: shrl %esi
+; CHECK-NEXT: andl $3, %esi
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %edx, %ecx
+; CHECK-NEXT: leal 1056964608(%rax,%rcx), %r8d
+; CHECK-NEXT: bsrl %edi, %r9d
+; CHECK-NEXT: movl %edi, %r10d
+; CHECK-NEXT: btcl %r9d, %r10d
+; CHECK-NEXT: xorl $31, %r9d
+; CHECK-NEXT: leal -8(%r9), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r10d
+; CHECK-NEXT: movl $157, %eax
+; CHECK-NEXT: movl $157, %ecx
+; CHECK-NEXT: subl %r9d, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %edx, %ecx
+; CHECK-NEXT: orl %r10d, %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %dil
+; CHECK-NEXT: setne %r9b
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: sete %sil
+; CHECK-NEXT: testb %r9b, %sil
+; CHECK-NEXT: cmovel %r8d, %ecx
+; CHECK-NEXT: testb %dil, %sil
+; CHECK-NEXT: cmovnel %edx, %ecx
+; CHECK-NEXT: movd %ecx, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
+; CHECK-NEXT: movd %xmm2, %esi
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: shll $22, %ecx
+; CHECK-NEXT: movl %esi, %edx
+; CHECK-NEXT: andl $-8, %edx
+; CHECK-NEXT: shll $28, %edx
+; CHECK-NEXT: shrl %esi
+; CHECK-NEXT: andl $3, %esi
+; CHECK-NEXT: movl %esi, %r8d
+; CHECK-NEXT: shll $23, %r8d
+; CHECK-NEXT: orl %edx, %r8d
+; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d
+; CHECK-NEXT: bsrl %edi, %r9d
+; CHECK-NEXT: movl %edi, %r10d
+; CHECK-NEXT: btcl %r9d, %r10d
+; CHECK-NEXT: xorl $31, %r9d
+; CHECK-NEXT: leal -8(%r9), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r10d
+; CHECK-NEXT: movl $157, %ecx
+; CHECK-NEXT: subl %r9d, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %edx, %ecx
+; CHECK-NEXT: orl %r10d, %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %dil
+; CHECK-NEXT: setne %r9b
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: sete %sil
+; CHECK-NEXT: testb %r9b, %sil
+; CHECK-NEXT: cmovel %r8d, %ecx
+; CHECK-NEXT: testb %dil, %sil
+; CHECK-NEXT: cmovnel %edx, %ecx
+; CHECK-NEXT: movd %ecx, %xmm2
+; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-NEXT: movd %xmm0, %esi
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: shll $22, %ecx
+; CHECK-NEXT: movl %esi, %edx
+; CHECK-NEXT: andl $-8, %edx
+; CHECK-NEXT: shll $28, %edx
+; CHECK-NEXT: shrl %esi
+; CHECK-NEXT: andl $3, %esi
+; CHECK-NEXT: movl %esi, %r8d
+; CHECK-NEXT: shll $23, %r8d
+; CHECK-NEXT: orl %edx, %r8d
+; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d
+; CHECK-NEXT: bsrl %edi, %r9d
+; CHECK-NEXT: movl %edi, %r10d
+; CHECK-NEXT: btcl %r9d, %r10d
+; CHECK-NEXT: xorl $31, %r9d
+; CHECK-NEXT: leal -8(%r9), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r10d
+; CHECK-NEXT: movl $157, %ecx
+; CHECK-NEXT: subl %r9d, %ecx
+; CHECK-NEXT: shll $23, %ecx
+; CHECK-NEXT: orl %edx, %ecx
+; CHECK-NEXT: orl %r10d, %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %dil
+; CHECK-NEXT: setne %r9b
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: sete %sil
+; CHECK-NEXT: testb %r9b, %sil
+; CHECK-NEXT: cmovel %r8d, %ecx
+; CHECK-NEXT: testb %dil, %sil
+; CHECK-NEXT: cmovnel %edx, %ecx
+; CHECK-NEXT: movd %ecx, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: movd %xmm0, %esi
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: shll $22, %ecx
+; CHECK-NEXT: movl %esi, %edx
+; CHECK-NEXT: andl $-8, %edx
+; CHECK-NEXT: shll $28, %edx
+; CHECK-NEXT: shrl %esi
+; CHECK-NEXT: andl $3, %esi
+; CHECK-NEXT: movl %esi, %r8d
+; CHECK-NEXT: shll $23, %r8d
+; CHECK-NEXT: orl %edx, %r8d
+; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d
+; CHECK-NEXT: bsrl %edi, %r9d
+; CHECK-NEXT: movl %edi, %r10d
+; CHECK-NEXT: btcl %r9d, %r10d
+; CHECK-NEXT: xorl $31, %r9d
+; CHECK-NEXT: leal -8(%r9), %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: shll %cl, %r10d
+; CHECK-NEXT: subl %r9d, %eax
+; CHECK-NEXT: shll $23, %eax
+; CHECK-NEXT: orl %edx, %eax
+; CHECK-NEXT: orl %r10d, %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: setne %dil
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: sete %sil
+; CHECK-NEXT: testb %dil, %sil
+; CHECK-NEXT: cmovel %r8d, %eax
+; CHECK-NEXT: testb %cl, %sil
+; CHECK-NEXT: cmovnel %edx, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN")
+ ret <4 x float> %r
+}
More information about the llvm-commits
mailing list