[llvm] [IR] Add `llvm.modf` intrinsic (PR #121948)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 7 06:56:48 PST 2025
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/121948
This adds the `llvm.modf` intrinsic, legalization, and lowering (mostly reusing the lowering for sincos and frexp).
The `llvm.modf` intrinsic takes a floating-point value and returns both the integral and fractional parts (as a struct).
```
declare { float, float } @llvm.modf.f32(float %Val)
declare { double, double } @llvm.modf.f64(double %Val)
declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80 %Val)
declare { fp128, fp128 } @llvm.modf.f128(fp128 %Val)
declare { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %Val)
declare { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %Val)
```
This corresponds to the libm `modf` function but returns multiple values in a struct (rather than take output pointers), which makes it easier to vectorize.
>From ae791351f9470e79f66473ee60110395ab6548a5 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 7 Jan 2025 13:50:46 +0000
Subject: [PATCH] [IR] Add `llvm.modf` intrinsic
This adds the `llvm.modf` intrinsic, legalization, and lowering.
The `llvm.modf` intrinsic takes a floating-point value and returns both
the integral and fractional parts (as a struct).
```
declare { float, float } @llvm.modf.f32(float %Val)
declare { double, double } @llvm.modf.f64(double %Val)
declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80 %Val)
declare { fp128, fp128 } @llvm.modf.f128(fp128 %Val)
declare { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %Val)
declare { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %Val)
```
This corresponds to the libm `modf` function but returns multiple values
in a struct (rather than take output pointers), which makes it easier to
vectorize.
---
llvm/docs/LangRef.rst | 43 +++
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 +
llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 +
.../include/llvm/CodeGen/RuntimeLibcallUtil.h | 4 +
llvm/include/llvm/IR/Intrinsics.td | 2 +
llvm/include/llvm/IR/RuntimeLibcalls.def | 5 +
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 10 +-
.../SelectionDAG/LegalizeFloatTypes.cpp | 3 +-
.../SelectionDAG/LegalizeVectorOps.cpp | 9 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 3 +
.../SelectionDAG/SelectionDAGBuilder.cpp | 4 +
.../SelectionDAG/SelectionDAGDumper.cpp | 1 +
llvm/lib/CodeGen/TargetLoweringBase.cpp | 11 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 26 +-
llvm/test/CodeGen/AArch64/llvm.modf.ll | 255 ++++++++++++++++++
llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll | 57 ++++
16 files changed, 420 insertions(+), 20 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/llvm.modf.ll
create mode 100644 llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7e01331b20c570..bd067fa5d1c4b4 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16004,6 +16004,49 @@ of the argument.
When specified with the fast-math-flag 'afn', the result may be approximated
using a less accurate calculation.
+'``llvm.modf.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.modf`` on any floating-point
+or vector of floating-point type. However, not all targets support all types.
+
+::
+
+ declare { float, float } @llvm.modf.f32(float %Val)
+ declare { double, double } @llvm.modf.f64(double %Val)
+ declare { x86_fp80, x86_fp80 } @llvm.modf.f80(x86_fp80 %Val)
+ declare { fp128, fp128 } @llvm.modf.f128(fp128 %Val)
+ declare { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %Val)
+ declare { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.modf.*``' intrinsics return the operand's integral and fractional
+parts.
+
+Arguments:
+""""""""""
+
+The argument is a :ref:`floating-point <t_floating>` value or
+:ref:`vector <t_vector>` of floating-point values. Returns two values matching
+the argument type in a struct.
+
+Semantics:
+""""""""""
+
+Return the same values as a corresponding libm '``modf``' function without
+trapping or setting ``errno``.
+
+The first result is the fractional part of the operand and the second result is
+the integral part of the operand. Both results have the same sign as the operand.
+
+When specified with the fast-math-flag 'afn', the result may be approximated
+using a less accurate calculation.
+
'``llvm.pow.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c9f142d64ae9e4..46778ca23aeed1 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2078,6 +2078,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::sincos:
ISD = ISD::FSINCOS;
break;
+ case Intrinsic::modf:
+ ISD = ISD::FMODF;
+ break;
case Intrinsic::tan:
ISD = ISD::FTAN;
break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 604dc9419025b0..9514cb399c7d81 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1058,6 +1058,10 @@ enum NodeType {
/// FSINCOS - Compute both fsin and fcos as a single operation.
FSINCOS,
+ /// FMODF - Decomposes the given arg in integral and fractional parts, each
+ /// having the same type and sign as the arg.
+ FMODF,
+
/// Gets the current floating-point environment. The first operand is a token
/// chain. The results are FP environment, represented by an integer value,
/// and a token chain.
diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
index 045ec7d3653119..59313520e0d831 100644
--- a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
+++ b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
@@ -66,6 +66,10 @@ Libcall getFREXP(EVT RetVT);
/// UNKNOWN_LIBCALL if there is none.
Libcall getFSINCOS(EVT RetVT);
+/// getMODF - Return the MODF_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+Libcall getMODF(EVT RetVT);
+
/// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
/// UNKNOWN_LIBCALL if there is none.
Libcall getSYNC(unsigned Opc, MVT VT);
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index ee877349a33149..2c22060237faa6 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1063,6 +1063,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
[llvm_anyfloat_ty]>;
+ def int_modf : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_anyfloat_ty]>;
// Truncate a floating point number with a specific rounding mode
def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 8153845b52c7ae..dc69b1ae19769e 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -354,6 +354,11 @@ HANDLE_LIBCALL(FREXP_F64, "frexp")
HANDLE_LIBCALL(FREXP_F80, "frexpl")
HANDLE_LIBCALL(FREXP_F128, "frexpl")
HANDLE_LIBCALL(FREXP_PPCF128, "frexpl")
+HANDLE_LIBCALL(MODF_F32, "modff")
+HANDLE_LIBCALL(MODF_F64, "modf")
+HANDLE_LIBCALL(MODF_F80, "modfl")
+HANDLE_LIBCALL(MODF_F128, "modfl")
+HANDLE_LIBCALL(MODF_PPCF128, "modfl")
// Floating point environment
HANDLE_LIBCALL(FEGETENV, "fegetenv")
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 595a410101eca1..f080c3aa562e4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4609,12 +4609,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, RTLIB::LDEXP_F80,
RTLIB::LDEXP_F128, RTLIB::LDEXP_PPCF128, Results);
break;
+ case ISD::FMODF:
case ISD::FFREXP: {
- RTLIB::Libcall LC = RTLIB::getFREXP(Node->getValueType(0));
+ EVT VT = Node->getValueType(0);
+ RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT)
+ : RTLIB::getFREXP(VT);
bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results,
/*CallRetResNo=*/0);
if (!Expanded)
- llvm_unreachable("Expected scalar FFREXP to expand to libcall!");
+ llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!");
break;
}
case ISD::FPOWI:
@@ -5509,9 +5512,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp2.getValue(1));
break;
}
+ case ISD::FMODF:
case ISD::FSINCOS: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
- Tmp2 = DAG.getNode(ISD::FSINCOS, dl, DAG.getVTList(NVT, NVT), Tmp1,
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, DAG.getVTList(NVT, NVT), Tmp1,
Node->getFlags());
Tmp3 = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true);
for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 71f100bfa03434..2a4eed1ed527a8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2766,10 +2766,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break;
case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break;
+ case ISD::FMODF:
case ISD::FSINCOS:
R = PromoteFloatRes_UnaryWithTwoFPResults(N);
break;
-
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
case ISD::STRICT_FP_ROUND:
R = PromoteFloatRes_STRICT_FP_ROUND(N);
@@ -3228,6 +3228,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;
+ case ISD::FMODF:
case ISD::FSINCOS:
R = SoftPromoteHalfRes_UnaryWithTwoFPResults(N);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index e8404a13009a72..c4e282cf2dad29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -454,6 +454,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UMULO:
case ISD::FCANONICALIZE:
case ISD::FFREXP:
+ case ISD::FMODF:
case ISD::FSINCOS:
case ISD::SADDSAT:
case ISD::UADDSAT:
@@ -1205,6 +1206,14 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
break;
}
+ case ISD::FMODF: {
+ RTLIB::Libcall LC =
+ RTLIB::getMODF(Node->getValueType(0).getVectorElementType());
+ if (DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+ /*CallRetResNo=*/0))
+ return;
+ break;
+ }
case ISD::VECTOR_COMPRESS:
Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG));
return;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 780eba16c9c498..2d8676ffb66e3e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -129,6 +129,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADDRSPACECAST:
R = ScalarizeVecRes_ADDRSPACECAST(N);
break;
+ case ISD::FMODF:
case ISD::FFREXP:
case ISD::FSINCOS:
R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo);
@@ -1257,6 +1258,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADDRSPACECAST:
SplitVecRes_ADDRSPACECAST(N, Lo, Hi);
break;
+ case ISD::FMODF:
case ISD::FFREXP:
case ISD::FSINCOS:
SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi);
@@ -4779,6 +4781,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FSHR:
Res = WidenVecRes_Ternary(N);
break;
+ case ISD::FMODF:
case ISD::FFREXP:
case ISD::FSINCOS: {
if (!unrollExpandedOp())
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f8d7c3ef7bbe71..036fff7a6cb6e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6994,6 +6994,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
+ case Intrinsic::modf:
case Intrinsic::sincos:
case Intrinsic::frexp: {
unsigned Opcode;
@@ -7003,6 +7004,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::sincos:
Opcode = ISD::FSINCOS;
break;
+ case Intrinsic::modf:
+ Opcode = ISD::FMODF;
+ break;
case Intrinsic::frexp:
Opcode = ISD::FFREXP;
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 580ff19065557b..99ce0eb8a5ee4e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -219,6 +219,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCOS: return "fcos";
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
+ case ISD::FMODF: return "fmodf";
case ISD::FTAN: return "ftan";
case ISD::STRICT_FTAN: return "strict_ftan";
case ISD::FASIN: return "fasin";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 3b0e9c7526fd0a..8a6b66a0ef7f77 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -407,6 +407,11 @@ RTLIB::Libcall RTLIB::getFSINCOS(EVT RetVT) {
SINCOS_PPCF128);
}
+RTLIB::Libcall RTLIB::getMODF(EVT RetVT) {
+ return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128,
+ MODF_PPCF128);
+}
+
RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4],
AtomicOrdering Order,
uint64_t MemSize) {
@@ -775,9 +780,9 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
// These library functions default to expand.
- setOperationAction(
- {ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP, ISD::FSINCOS}, VT,
- Expand);
+ setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP,
+ ISD::FSINCOS, ISD::FMODF},
+ VT, Expand);
// These operations default to expand for vector types.
if (VT.isVector())
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e35ad524885015..86627929fc4336 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -732,19 +732,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
}
- for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
- ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
- ISD::FACOS, ISD::FASIN, ISD::FATAN,
- ISD::FATAN2, ISD::FCOSH, ISD::FSINH,
- ISD::FTANH, ISD::FTAN, ISD::FEXP,
- ISD::FEXP2, ISD::FEXP10, ISD::FLOG,
- ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
- ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
- ISD::STRICT_FSIN, ISD::STRICT_FACOS, ISD::STRICT_FASIN,
- ISD::STRICT_FATAN, ISD::STRICT_FATAN2, ISD::STRICT_FCOSH,
- ISD::STRICT_FSINH, ISD::STRICT_FTANH, ISD::STRICT_FEXP,
- ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2,
- ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
+ for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
+ ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
+ ISD::FMODF, ISD::FACOS, ISD::FASIN,
+ ISD::FATAN, ISD::FATAN2, ISD::FCOSH,
+ ISD::FSINH, ISD::FTANH, ISD::FTAN,
+ ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
+ ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
+ ISD::STRICT_FREM, ISD::STRICT_FPOW, ISD::STRICT_FPOWI,
+ ISD::STRICT_FCOS, ISD::STRICT_FSIN, ISD::STRICT_FACOS,
+ ISD::STRICT_FASIN, ISD::STRICT_FATAN, ISD::STRICT_FATAN2,
+ ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH,
+ ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
+ ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::v4f16, Expand);
setOperationAction(Op, MVT::v8f16, Expand);
diff --git a/llvm/test/CodeGen/AArch64/llvm.modf.ll b/llvm/test/CodeGen/AArch64/llvm.modf.ll
new file mode 100644
index 00000000000000..41fe796daca86c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.modf.ll
@@ -0,0 +1,255 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+
+define { half, half } @test_modf_f16(half %a) {
+; CHECK-LABEL: test_modf_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr s1, [sp, #12]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_modf_f16_only_use_fractional_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_fractional_part:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_modf_f16_only_use_integral_part(half %a) {
+; CHECK-LABEL: test_modf_f16_only_use_integral_part:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.modf.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_modf_v2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: add x0, sp, #40
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: fmov s0, s1
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: add x0, sp, #56
+; CHECK-NEXT: mov h1, v1.h[2]
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov v2.h[1], v1.h[0]
+; CHECK-NEXT: str q2, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: mov h1, v1.h[3]
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldp s2, s1, [sp, #40]
+; CHECK-NEXT: fcvt h4, s0
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: fcvt h3, s1
+; CHECK-NEXT: fcvt h1, s2
+; CHECK-NEXT: ldr s2, [sp, #56]
+; CHECK-NEXT: mov v0.h[3], v4.h[0]
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-NEXT: ldr s3, [sp, #60]
+; CHECK-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-NEXT: fcvt h2, s3
+; CHECK-NEXT: mov v1.h[3], v2.h[0]
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_modf_f32(float %a) {
+; CHECK-LABEL: test_modf_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr s1, [sp, #12]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { float, float } @llvm.modf.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_modf_v3f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: add x0, sp, #56
+; CHECK-NEXT: add x19, sp, #56
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x20, sp, #60
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov s0, v0.s[2]
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr s1, [sp, #44]
+; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v1.s }[1], [x19]
+; CHECK-NEXT: mov v2.s[2], v0.s[0]
+; CHECK-NEXT: ld1 { v1.s }[2], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+ %result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a)
+ ret { <3 x float>, <3 x float> } %result
+}
+
+define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_modf_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #40
+; CHECK-NEXT: add x19, sp, #40
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl modff
+; CHECK-NEXT: ldr s1, [sp, #44]
+; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: ld1 { v1.s }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.s[1], v2.s[0]
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_modf_f64(double %a) {
+; CHECK-LABEL: test_modf_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #8
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: ldr d1, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { double, double } @llvm.modf.f64(double %a)
+ ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_modf_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: add x0, sp, #32
+; CHECK-NEXT: add x19, sp, #32
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #40
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl modf
+; CHECK-NEXT: ldr d1, [sp, #40]
+; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: ld1 { v1.d }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll b/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll
new file mode 100644
index 00000000000000..1874d265978d70
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/veclib-llvm.modf.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter "(bl|ptrue)" --version 5
+; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -check-prefix=SLEEF
+; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s -check-prefix=ARMPL
+
+define <4 x float> @test_modf_v4f32(<4 x float> %x, ptr %out_integral) {
+; SLEEF-LABEL: test_modf_v4f32:
+; SLEEF: bl _ZGVnN4vl4_modff
+;
+; ARMPL-LABEL: test_modf_v4f32:
+; ARMPL: bl armpl_vmodfq_f32
+ %result = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %x)
+ %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0
+ %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1
+ store <4 x float> %result.1, ptr %out_integral, align 4
+ ret <4 x float> %result.0
+}
+
+define <2 x double> @test_modf_v2f64(<2 x double> %x, ptr %out_integral) {
+; SLEEF-LABEL: test_modf_v2f64:
+; SLEEF: bl _ZGVnN2vl8_modf
+;
+; ARMPL-LABEL: test_modf_v2f64:
+; ARMPL: bl armpl_vmodfq_f64
+ %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %x)
+ %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
+ %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
+ store <2 x double> %result.1, ptr %out_integral, align 8
+ ret <2 x double> %result.0
+}
+
+define <vscale x 4 x float> @test_modf_nxv4f32(<vscale x 4 x float> %x, ptr %out_integral) {
+; SLEEF-LABEL: test_modf_nxv4f32:
+; SLEEF: bl _ZGVsNxvl4_modff
+;
+; ARMPL-LABEL: test_modf_nxv4f32:
+; ARMPL: ptrue p0.s
+; ARMPL: bl armpl_svmodf_f32_x
+ %result = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.modf.nxv4f32(<vscale x 4 x float> %x)
+ %result.0 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 0
+ %result.1 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 1
+ store <vscale x 4 x float> %result.1, ptr %out_integral, align 4
+ ret <vscale x 4 x float> %result.0
+}
+
+define <vscale x 2 x double> @test_modf_nxv2f64(<vscale x 2 x double> %x, ptr %out_integral) {
+; SLEEF-LABEL: test_modf_nxv2f64:
+; SLEEF: bl _ZGVsNxvl8_modf
+;
+; ARMPL-LABEL: test_modf_nxv2f64:
+; ARMPL: ptrue p0.d
+; ARMPL: bl armpl_svmodf_f64_x
+ %result = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.modf.nxv2f64(<vscale x 2 x double> %x)
+ %result.0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 0
+ %result.1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 1
+ store <vscale x 2 x double> %result.1, ptr %out_integral, align 8
+ ret <vscale x 2 x double> %result.0
+}
More information about the llvm-commits
mailing list