[llvm] [AArch64] Lower mathlib call ldexp into fscale when sve is enabled (PR #67552)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 23 04:38:39 PDT 2023


https://github.com/huhu233 updated https://github.com/llvm/llvm-project/pull/67552

>From 9644a49219e1be5e07f07b5b796a7ae6c37d00db Mon Sep 17 00:00:00 2001
From: huhu233 <1293577861 at qq.com>
Date: Mon, 23 Oct 2023 19:36:08 +0800
Subject: [PATCH] [AArch64] Lower mathlib call ldexp into fscale when sve is
 enabled

The function of 'fscale' is equivalent to mathlib call ldexp, but has
better performance. This patch lowers ldexp into fscale when sve is
enabled.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 51 +++++++++++++++
 llvm/test/CodeGen/AArch64/ldexp.ll            | 63 +++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/ldexp.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a16a102e472e709..6747bc2f9ea156f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1642,6 +1642,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 
+  if (Subtarget->hasSVE()) {
+    setOperationAction(ISD::FLDEXP, MVT::f64, Custom);
+    setOperationAction(ISD::FLDEXP, MVT::f32, Custom);
+    setOperationAction(ISD::FLDEXP, MVT::f16, Custom);
+  }
+
   PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
 
   IsStrictFPEnabled = true;
@@ -5897,6 +5903,49 @@ static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
   return SDValue();
 }
 
+static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
+  SDValue X = Op.getOperand(0);
+  EVT XScalarTy = X.getValueType();
+  SDValue Exp = Op.getOperand(1);
+
+  SDLoc DL(Op);
+  EVT XVT, ExpVT;
+  switch (Op.getSimpleValueType().SimpleTy) {
+  default:
+    return SDValue();
+  case MVT::f16:
+    X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
+    [[fallthrough]];
+  case MVT::f32:
+    XVT = MVT::nxv4f32;
+    ExpVT = MVT::nxv4i32;
+    break;
+  case MVT::f64:
+    XVT = MVT::nxv2f64;
+    ExpVT = MVT::nxv2i64;
+    Exp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Exp);
+    break;
+  }
+
+  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+  SDValue VX =
+      DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
+  SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
+                             DAG.getUNDEF(ExpVT), Exp, Zero);
+  SDValue VPg = getPTrue(DAG, DL, XVT.changeVectorElementType(MVT::i1),
+                         AArch64SVEPredPattern::all);
+  SDValue FScale =
+      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XVT,
+                  DAG.getConstant(Intrinsic::aarch64_sve_fscale, DL, MVT::i64),
+                  VPg, VX, VExp);
+  SDValue Final =
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), FScale, Zero);
+  if (X.getValueType() != XScalarTy)
+    Final = DAG.getNode(ISD::FP_ROUND, DL, XScalarTy, Final,
+                        DAG.getIntPtrConstant(1, SDLoc(Op)));
+  return Final;
+}
+
 SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6217,6 +6266,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::FSHL:
   case ISD::FSHR:
     return LowerFunnelShift(Op, DAG);
+  case ISD::FLDEXP:
+    return LowerFLDEXP(Op, DAG);
   }
 }
 
diff --git a/llvm/test/CodeGen/AArch64/ldexp.ll b/llvm/test/CodeGen/AArch64/ldexp.ll
new file mode 100644
index 000000000000000..4b491051a88aa75
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ldexp.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck %s
+
+define double @testExp(double %val, i32 %a) {
+; CHECK-LABEL: testExp:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sxtw x8, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    fmov d1, x8
+; CHECK-NEXT:    fscale z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
+entry:
+  %call = tail call fast double @ldexp(double %val, i32 %a)
+  ret double %call
+}
+
+declare double @ldexp(double, i32) memory(none)
+
+define float @testExpf(float %val, i32 %a) {
+; CHECK-LABEL: testExpf:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmov s1, w0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    fscale z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-NEXT:    ret
+entry:
+  %call = tail call fast float @ldexpf(float %val, i32 %a)
+  ret float %call
+}
+
+declare float @ldexpf(float, i32) memory(none)
+
+define fp128 @testExpl(fp128 %val, i32 %a) {
+; CHECK-LABEL: testExpl:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    b ldexpl
+entry:
+  %call = tail call fast fp128 @ldexpl(fp128 %val, i32 %a)
+  ret fp128 %call
+}
+
+declare fp128 @ldexpl(fp128, i32) memory(none)
+
+define half @testExpf16(half %val, i32 %a) {
+; CHECK-LABEL: testExpf16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    fmov s1, w0
+; CHECK-NEXT:    fscale z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call fast half @llvm.ldexp.f16.i32(half %val, i32 %a)
+  ret half %0
+}
+
+declare half @llvm.ldexp.f16.i32(half, i32) memory(none)



More information about the llvm-commits mailing list