[llvm] 7299250 - DAG: Use fast variants of fast math libcalls (#147481)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 12 18:41:48 PDT 2025
Author: Matt Arsenault
Date: 2025-07-13T10:41:45+09:00
New Revision: 7299250c030546b6811f5a1e0c4fd86f4380192e
URL: https://github.com/llvm/llvm-project/commit/7299250c030546b6811f5a1e0c4fd86f4380192e
DIFF: https://github.com/llvm/llvm-project/commit/7299250c030546b6811f5a1e0c4fd86f4380192e.diff
LOG: DAG: Use fast variants of fast math libcalls (#147481)
Hexagon currently has an untested global flag to control fast
math variants of libcalls. Add fast variants as explicit libcall
options so this can be a flag based lowering decision, and implement
it. I have no idea what fast math flags the hexagon case requires,
so I picked the maximally potentially relevant set of flags although
this probably is refinable per call. Looking in compiler-rt, I'm not
sure if the fast variants are anything more than aliases.
Added:
llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll
Modified:
llvm/include/llvm/IR/RuntimeLibcalls.td
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/IR/RuntimeLibcalls.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index c236e698759cc..57f5d9fd6d3a6 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -62,13 +62,24 @@ foreach IntTy = ["I32", "I64", "I128"] in {
foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
def ADD_#FPTy : RuntimeLibcall;
+ def FAST_ADD_#FPTy : RuntimeLibcall;
+
def SUB_#FPTy : RuntimeLibcall;
+ def FAST_SUB_#FPTy : RuntimeLibcall;
+
def MUL_#FPTy : RuntimeLibcall;
+ def FAST_MUL_#FPTy : RuntimeLibcall;
+
def DIV_#FPTy : RuntimeLibcall;
+ def FAST_DIV_#FPTy : RuntimeLibcall;
+
def REM_#FPTy : RuntimeLibcall;
def FMA_#FPTy : RuntimeLibcall;
def POWI_#FPTy : RuntimeLibcall;
+
def SQRT_#FPTy : RuntimeLibcall;
+ def FAST_SQRT_#FPTy : RuntimeLibcall;
+
def CBRT_#FPTy : RuntimeLibcall;
def LOG_#FPTy : RuntimeLibcall;
def LOG_FINITE_#FPTy : RuntimeLibcall;
@@ -1470,27 +1481,26 @@ def __hexagon_moddi3 : RuntimeLibcallImpl<SREM_I64>;
def __hexagon_umodsi3 : RuntimeLibcallImpl<UREM_I32>;
def __hexagon_umoddi3 : RuntimeLibcallImpl<UREM_I64>;
-// FIXME: "Fast" versions should be treated as a separate RTLIB::FAST_* function
def __hexagon_adddf3 : RuntimeLibcallImpl<ADD_F64>;
-def __hexagon_fast_adddf3 : RuntimeLibcallImpl<ADD_F64>;
+def __hexagon_fast_adddf3 : RuntimeLibcallImpl<FAST_ADD_F64>;
def __hexagon_subdf3 : RuntimeLibcallImpl<SUB_F64>;
-def __hexagon_fast_subdf3 : RuntimeLibcallImpl<SUB_F64>;
+def __hexagon_fast_subdf3 : RuntimeLibcallImpl<FAST_SUB_F64>;
def __hexagon_muldf3 : RuntimeLibcallImpl<MUL_F64>;
-def __hexagon_fast_muldf3 : RuntimeLibcallImpl<MUL_F64>;
+def __hexagon_fast_muldf3 : RuntimeLibcallImpl<FAST_MUL_F64>;
def __hexagon_divdf3 : RuntimeLibcallImpl<DIV_F64>;
-def __hexagon_fast_divdf3 : RuntimeLibcallImpl<DIV_F64>;
+def __hexagon_fast_divdf3 : RuntimeLibcallImpl<FAST_DIV_F64>;
def __hexagon_divsf3 : RuntimeLibcallImpl<DIV_F32>;
-def __hexagon_fast_divsf3 : RuntimeLibcallImpl<DIV_F32>;
+def __hexagon_fast_divsf3 : RuntimeLibcallImpl<FAST_DIV_F32>;
def __hexagon_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
-def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
+def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<FAST_SQRT_F32>;
// This is the only fast library function for sqrtd.
-def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<SQRT_F64>;
+def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<FAST_SQRT_F64>;
def __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
: RuntimeLibcallImpl<HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 528136a55f14a..7266940c94bf1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -140,12 +140,19 @@ class SelectionDAGLegalize {
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results);
- SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
- RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16,
- RTLIB::Libcall Call_I32,
- RTLIB::Libcall Call_I64,
- RTLIB::Libcall Call_I128);
+
+ void
+ ExpandFastFPLibCall(SDNode *Node, bool IsFast,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
+
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
void ExpandArgFPLibCall(SDNode *Node,
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2228,6 +2235,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
ExpandFPLibCall(Node, LC, Results);
}
+void SelectionDAGLegalize::ExpandFastFPLibCall(
+ SDNode *Node, bool IsFast,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
+ std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
+
+ EVT VT = Node->getSimpleValueType(0);
+
+ RTLIB::Libcall LC;
+
+ // FIXME: Probably should define fast to respect nan/inf and only be
+ // approximate functions.
+
+ if (IsFast) {
+ LC = RTLIB::getFPLibCall(VT, Call_F32.first, Call_F64.first, Call_F80.first,
+ Call_F128.first, Call_PPCF128.first);
+ }
+
+ if (!IsFast || TLI.getLibcallImpl(LC) == RTLIB::Unsupported) {
+ // Fall back if we don't have a fast implementation.
+ LC = RTLIB::getFPLibCall(VT, Call_F32.second, Call_F64.second,
+ Call_F80.second, Call_F128.second,
+ Call_PPCF128.second);
+ }
+
+ ExpandFPLibCall(Node, LC, Results);
+}
+
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
@@ -4514,6 +4552,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
return true;
}
+/// Return if we can use the FAST_* variant of a math libcall for the node.
+/// FIXME: This is just guessing, we probably should have unique specific sets
+/// flags required per libcall.
+static bool canUseFastMathLibcall(const SDNode *Node) {
+ // FIXME: Probably should define fast to respect nan/inf and only be
+ // approximate functions.
+
+ SDNodeFlags Flags = Node->getFlags();
+ return Flags.hasApproximateFuncs() && Flags.hasNoNaNs() &&
+ Flags.hasNoInfs() && Flags.hasNoSignedZeros();
+}
+
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
SmallVector<SDValue, 8> Results;
@@ -4634,11 +4684,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::FMAXIMUM_NUM_PPCF128, Results);
break;
case ISD::FSQRT:
- case ISD::STRICT_FSQRT:
- ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128, Results);
+ case ISD::STRICT_FSQRT: {
+ // FIXME: Probably should define fast to respect nan/inf and only be
+ // approximate functions.
+ ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
+ {RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32},
+ {RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64},
+ {RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80},
+ {RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128},
+ {RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128},
+ Results);
break;
+ }
case ISD::FCBRT:
ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
RTLIB::CBRT_F80, RTLIB::CBRT_F128,
@@ -4875,11 +4932,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::LLRINT_PPCF128, Results);
break;
case ISD::FDIV:
- case ISD::STRICT_FDIV:
- ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
- RTLIB::DIV_F80, RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128, Results);
+ case ISD::STRICT_FDIV: {
+ ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
+ {RTLIB::FAST_DIV_F32, RTLIB::DIV_F32},
+ {RTLIB::FAST_DIV_F64, RTLIB::DIV_F64},
+ {RTLIB::FAST_DIV_F80, RTLIB::DIV_F80},
+ {RTLIB::FAST_DIV_F128, RTLIB::DIV_F128},
+ {RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results);
break;
+ }
case ISD::FREM:
case ISD::STRICT_FREM:
ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
@@ -4893,17 +4954,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::FMA_PPCF128, Results);
break;
case ISD::FADD:
- case ISD::STRICT_FADD:
- ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128, Results);
+ case ISD::STRICT_FADD: {
+ ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
+ {RTLIB::FAST_ADD_F32, RTLIB::ADD_F32},
+ {RTLIB::FAST_ADD_F64, RTLIB::ADD_F64},
+ {RTLIB::FAST_ADD_F80, RTLIB::ADD_F80},
+ {RTLIB::FAST_ADD_F128, RTLIB::ADD_F128},
+ {RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results);
break;
+ }
case ISD::FMUL:
- case ISD::STRICT_FMUL:
- ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
- RTLIB::MUL_F80, RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128, Results);
+ case ISD::STRICT_FMUL: {
+ ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
+ {RTLIB::FAST_MUL_F32, RTLIB::MUL_F32},
+ {RTLIB::FAST_MUL_F64, RTLIB::MUL_F64},
+ {RTLIB::FAST_MUL_F80, RTLIB::MUL_F80},
+ {RTLIB::FAST_MUL_F128, RTLIB::MUL_F128},
+ {RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results);
break;
+ }
case ISD::FP16_TO_FP:
if (Node->getValueType(0) == MVT::f32) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
@@ -5076,11 +5145,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::FSUB:
- case ISD::STRICT_FSUB:
- ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
- RTLIB::SUB_F80, RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128, Results);
+ case ISD::STRICT_FSUB: {
+ ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
+ {RTLIB::FAST_SUB_F32, RTLIB::SUB_F32},
+ {RTLIB::FAST_SUB_F64, RTLIB::SUB_F64},
+ {RTLIB::FAST_SUB_F80, RTLIB::SUB_F80},
+ {RTLIB::FAST_SUB_F128, RTLIB::SUB_F128},
+ {RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results);
break;
+ }
case ISD::SREM:
Results.push_back(ExpandIntLibCall(Node, true,
RTLIB::SREM_I8,
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 64c9415c54d4d..c4fd40f313077 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -18,10 +18,6 @@ using namespace RTLIB;
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
-static cl::opt<bool>
- HexagonEnableFastMathRuntimeCalls("hexagon-fast-math", cl::Hidden,
- cl::desc("Enable Fast Math processing"));
-
static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
FloatABI::ABIType FloatABIType,
EABI EABIVersion) {
@@ -268,32 +264,25 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
setLibcallImpl(RTLIB::UREM_I32, RTLIB::__hexagon_umodsi3);
setLibcallImpl(RTLIB::UREM_I64, RTLIB::__hexagon_umoddi3);
- const bool FastMath = HexagonEnableFastMathRuntimeCalls;
- // This is the only fast library function for sqrtd.
- if (FastMath)
- setLibcallImpl(RTLIB::SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);
-
// Prefix is: nothing for "slow-math",
// "fast2_" for V5+ fast-math double-precision
// (actually, keep fast-math and fast-math2 separate for now)
- if (FastMath) {
- setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_fast_adddf3);
- setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_fast_subdf3);
- setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_fast_muldf3);
- setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_fast_divdf3);
- setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_fast_divsf3);
- } else {
- setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
- setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
- setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
- setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
- setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
- }
- if (FastMath)
- setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
- else
- setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
+ setLibcallImpl(RTLIB::FAST_ADD_F64, RTLIB::__hexagon_fast_adddf3);
+ setLibcallImpl(RTLIB::FAST_SUB_F64, RTLIB::__hexagon_fast_subdf3);
+ setLibcallImpl(RTLIB::FAST_MUL_F64, RTLIB::__hexagon_fast_muldf3);
+ setLibcallImpl(RTLIB::FAST_DIV_F64, RTLIB::__hexagon_fast_divdf3);
+ setLibcallImpl(RTLIB::FAST_DIV_F32, RTLIB::__hexagon_fast_divsf3);
+ setLibcallImpl(RTLIB::FAST_SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
+ // This is the only fast library function for sqrtd.
+ setLibcallImpl(RTLIB::FAST_SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);
+
+ setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
+ setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
+ setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
+ setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
+ setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
+ setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
setLibcallImpl(
RTLIB::HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES,
diff --git a/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll b/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll
new file mode 100644
index 0000000000000..6bc60132d3e6a
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll
@@ -0,0 +1,369 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+
+;---------------------------------------------------------------------
+; fast sqrt
+;---------------------------------------------------------------------
+
+define float @fast_sqrt_f32(float %x) {
+; CHECK-LABEL: fast_sqrt_f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_fast2_sqrtf
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = call nnan ninf nsz afn float @llvm.sqrt.f32(float %x)
+ ret float %result
+}
+
+define double @fast_sqrt_f64(double %x) {
+; CHECK-LABEL: fast_sqrt_f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_fast2_sqrtdf2
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = call nnan ninf nsz afn double @llvm.sqrt.f64(double %x)
+ ret double %result
+}
+
+;---------------------------------------------------------------------
+; fast fadd
+;---------------------------------------------------------------------
+
+define float @fast_add_f32(float %x, float %y) {
+; CHECK-LABEL: fast_add_f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfadd(r0,r1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fadd nnan ninf nsz afn float %x, %y
+ ret float %result
+}
+
+define double @fast_add_f64(double %x, double %y) {
+; CHECK-LABEL: fast_add_f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_fast_adddf3
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = fadd nnan ninf nsz afn double %x, %y
+ ret double %result
+}
+
+;---------------------------------------------------------------------
+; fast fsub
+;---------------------------------------------------------------------
+
+define float @fast_sub_f32(float %x, float %y) {
+; CHECK-LABEL: fast_sub_f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfsub(r0,r1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fsub nnan ninf nsz afn float %x, %y
+ ret float %result
+}
+
+define double @fast_sub_f64(double %x, double %y) {
+; CHECK-LABEL: fast_sub_f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_fast_subdf3
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = fsub nnan ninf nsz afn double %x, %y
+ ret double %result
+}
+
+;---------------------------------------------------------------------
+; fast fmul
+;---------------------------------------------------------------------
+
+define float @fast_mul_f32(float %x, float %y) {
+; CHECK-LABEL: fast_mul_f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfmpy(r0,r1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fmul nnan ninf nsz afn float %x, %y
+ ret float %result
+}
+
+define double @fast_mul_f64(double %x, double %y) {
+; CHECK-LABEL: fast_mul_f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_fast_muldf3
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = fmul nnan ninf nsz afn double %x, %y
+ ret double %result
+}
+
+;---------------------------------------------------------------------
+; fast fdiv
+;---------------------------------------------------------------------
+
+define float @fast_div_f32(float %x, float %y) {
+; CHECK-LABEL: fast_div_f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = sffixupn(r0,r1)
+; CHECK-NEXT: r4,p0 = sfrecipa(r0,r1)
+; CHECK-NEXT: r5 = ##1065353216
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = sffixupd(r0,r1)
+; CHECK-NEXT: r6 = ##1065353216
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r5 -= sfmpy(r1,r4):lib
+; CHECK-NEXT: r0 = and(r2,##-2147483648)
+; CHECK-NEXT: r3 = r2
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r4 += sfmpy(r5,r4):lib
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 += sfmpy(r2,r4):lib
+; CHECK-NEXT: r6 -= sfmpy(r1,r4):lib
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3 -= sfmpy(r1,r0):lib
+; CHECK-NEXT: r4 += sfmpy(r6,r4):lib
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 += sfmpy(r3,r4):lib
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 -= sfmpy(r0,r1):lib
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 += sfmpy(r2,r4,p0):scale
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fdiv nnan ninf nsz afn float %x, %y
+ ret float %result
+}
+
+define double @fast_div_f64(double %x, double %y) {
+; CHECK-LABEL: fast_div_f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_fast_divdf3
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = fdiv nnan ninf nsz afn double %x, %y
+ ret double %result
+}
+
+;---------------------------------------------------------------------
+; Negative tests sqrt
+;---------------------------------------------------------------------
+
+; TODO: What flags do we really need here?
+define float @sqrt_f32__afn(float %x) {
+; CHECK-LABEL: sqrt_f32__afn:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_sqrtf
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = call afn float @llvm.sqrt.f32(float %x)
+ ret float %result
+}
+
+define float @sqrt_f32__afn_ninf(float %x) {
+; CHECK-LABEL: sqrt_f32__afn_ninf:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_sqrtf
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = call afn ninf float @llvm.sqrt.f32(float %x)
+ ret float %result
+}
+
+define float @sqrt_f32__afn_nnan(float %x) {
+; CHECK-LABEL: sqrt_f32__afn_nnan:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_sqrtf
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = call afn nnan float @llvm.sqrt.f32(float %x)
+ ret float %result
+}
+
+define float @sqrt_f32__nnan(float %x) {
+; CHECK-LABEL: sqrt_f32__nnan:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_sqrtf
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = call nnan float @llvm.sqrt.f32(float %x)
+ ret float %result
+}
+
+define float @sqrt_f32_nnan_ninf_afn(float %x) {
+; CHECK-LABEL: sqrt_f32_nnan_ninf_afn:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: .cfi_def_cfa r30, 8
+; CHECK-NEXT: .cfi_offset r31, -4
+; CHECK-NEXT: .cfi_offset r30, -8
+; CHECK-NEXT: {
+; CHECK-NEXT: call __hexagon_sqrtf
+; CHECK-NEXT: allocframe(r29,#0):raw
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: }
+ %result = call nnan ninf afn float @llvm.sqrt.f32(float %x)
+ ret float %result
+}
+
+;---------------------------------------------------------------------
+; Negative tests fadd
+;---------------------------------------------------------------------
+
+; TODO: What flags do we really need here?
+define float @fadd_f32_afn(float %x, float %y) {
+; CHECK-LABEL: fadd_f32_afn:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfadd(r0,r1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fadd afn float %x, %y
+ ret float %result
+}
+
+define float @fadd_f32__afn_ninf(float %x, float %y) {
+; CHECK-LABEL: fadd_f32__afn_ninf:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfadd(r0,r1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fadd afn ninf float %x, %y
+ ret float %result
+}
+
+define float @fadd_f32__afn_nnan(float %x, float %y) {
+; CHECK-LABEL: fadd_f32__afn_nnan:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfadd(r0,r1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fadd afn nnan float %x, %y
+ ret float %result
+}
+
+define float @fadd_f32__nnan(float %x, float %y) {
+; CHECK-LABEL: fadd_f32__nnan:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfadd(r0,r1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fadd nnan float %x, %y
+ ret float %result
+}
+
+define float @fadd_f32__nnan_ninf_afn(float %x, float %y) {
+; CHECK-LABEL: fadd_f32__nnan_ninf_afn:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfadd(r0,r1)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %result = fadd nnan ninf afn float %x, %y
+ ret float %result
+}
More information about the llvm-commits
mailing list