[llvm] dbe8def - [AArch64] Lower mathlib call ldexp into fscale when sve is enabled (#67552)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 23 19:17:08 PDT 2023
Author: huhu233
Date: 2023-10-24T10:17:04+08:00
New Revision: dbe8def9ccd1b8d50e12c8ee66ee509a86e72510
URL: https://github.com/llvm/llvm-project/commit/dbe8def9ccd1b8d50e12c8ee66ee509a86e72510
DIFF: https://github.com/llvm/llvm-project/commit/dbe8def9ccd1b8d50e12c8ee66ee509a86e72510.diff
LOG: [AArch64] Lower mathlib call ldexp into fscale when sve is enabled (#67552)
The function of 'fscale' is equivalent to mathlib call ldexp, but has
better performance. This patch lowers ldexp into fscale when sve is
enabled.
Added:
llvm/test/CodeGen/AArch64/ldexp.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f059e5d50a20924..565a9d55cce3ea1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1642,6 +1642,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ if (Subtarget->hasSVE()) {
+ setOperationAction(ISD::FLDEXP, MVT::f64, Custom);
+ setOperationAction(ISD::FLDEXP, MVT::f32, Custom);
+ setOperationAction(ISD::FLDEXP, MVT::f16, Custom);
+ }
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
IsStrictFPEnabled = true;
@@ -5895,6 +5901,49 @@ static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
+ SDValue X = Op.getOperand(0);
+ EVT XScalarTy = X.getValueType();
+ SDValue Exp = Op.getOperand(1);
+
+ SDLoc DL(Op);
+ EVT XVT, ExpVT;
+ switch (Op.getSimpleValueType().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::f16:
+ X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
+ [[fallthrough]];
+ case MVT::f32:
+ XVT = MVT::nxv4f32;
+ ExpVT = MVT::nxv4i32;
+ break;
+ case MVT::f64:
+ XVT = MVT::nxv2f64;
+ ExpVT = MVT::nxv2i64;
+ Exp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Exp);
+ break;
+ }
+
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ SDValue VX =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
+ SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
+ DAG.getUNDEF(ExpVT), Exp, Zero);
+ SDValue VPg = getPTrue(DAG, DL, XVT.changeVectorElementType(MVT::i1),
+ AArch64SVEPredPattern::all);
+ SDValue FScale =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XVT,
+ DAG.getConstant(Intrinsic::aarch64_sve_fscale, DL, MVT::i64),
+ VPg, VX, VExp);
+ SDValue Final =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), FScale, Zero);
+ if (X.getValueType() != XScalarTy)
+ Final = DAG.getNode(ISD::FP_ROUND, DL, XScalarTy, Final,
+ DAG.getIntPtrConstant(1, SDLoc(Op)));
+ return Final;
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6215,6 +6264,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::FSHL:
case ISD::FSHR:
return LowerFunnelShift(Op, DAG);
+ case ISD::FLDEXP:
+ return LowerFLDEXP(Op, DAG);
}
}
diff --git a/llvm/test/CodeGen/AArch64/ldexp.ll b/llvm/test/CodeGen/AArch64/ldexp.ll
new file mode 100644
index 000000000000000..4b491051a88aa75
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ldexp.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck %s
+
+define double @testExp(double %val, i32 %a) {
+; CHECK-LABEL: testExp:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fscale z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: ret
+entry:
+ %call = tail call fast double @ldexp(double %val, i32 %a)
+ ret double %call
+}
+
+declare double @ldexp(double, i32) memory(none)
+
+define float @testExpf(float %val, i32 %a) {
+; CHECK-LABEL: testExpf:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT: ret
+entry:
+ %call = tail call fast float @ldexpf(float %val, i32 %a)
+ ret float %call
+}
+
+declare float @ldexpf(float, i32) memory(none)
+
+define fp128 @testExpl(fp128 %val, i32 %a) {
+; CHECK-LABEL: testExpl:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: b ldexpl
+entry:
+ %call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a)
+ ret fp128 %call
+}
+
+declare fp128 @ldexpl(fp128, i32) memory(none)
+
+define half @testExpf16(half %val, i32 %a) {
+; CHECK-LABEL: testExpf16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a)
+ ret half %0
+}
+
+declare half @llvm.ldexp.f16.i32(half, i32) memory(none)
More information about the llvm-commits
mailing list