[Openmp-commits] [libcxx] [lld] [mlir] [openmp] [llvm] [compiler-rt] [clang] [HEXAGON] Inlining Division (PR #79021)
via Openmp-commits
openmp-commits at lists.llvm.org
Mon Jan 22 09:44:53 PST 2024
https://github.com/quic-asaravan created https://github.com/llvm/llvm-project/pull/79021
This patch inlines float division function calls for hexagon.
>From e8beb77010becb9a9c82d875597bdedbfed9be9e Mon Sep 17 00:00:00 2001
From: Awanish Pandey <awanpand at codeaurora.org>
Date: Sun, 6 Sep 2020 14:39:57 +0530
Subject: [PATCH] [HEXAGON] Inlining division
This patch inlines float division function calls for hexagon.
---
.../Target/Hexagon/HexagonISelDAGToDAG.cpp | 94 +++++++++++++++++++
llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h | 3 +
.../Target/Hexagon/HexagonISelLowering.cpp | 11 +++
llvm/lib/Target/Hexagon/HexagonISelLowering.h | 1 +
.../CodeGen/Hexagon/inline-division-space.ll | 30 ++++++
llvm/test/CodeGen/Hexagon/inline-division.ll | 29 ++++++
6 files changed, 168 insertions(+)
create mode 100644 llvm/test/CodeGen/Hexagon/inline-division-space.ll
create mode 100644 llvm/test/CodeGen/Hexagon/inline-division.ll
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index e4127b0b94c625..6fe3fe0d36b9e7 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -904,6 +904,98 @@ void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
ReplaceNode(N, T);
}
+void HexagonDAGToDAGISel::FDiv(SDNode *N) {
+ const SDLoc &dl(N);
+ ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
+ SmallVector<SDValue, 2> Ops;
+ Ops = {N->getOperand(0), N->getOperand(1)};
+ SDVTList VTs;
+ VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
+ SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
+ SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
+
+ SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
+ SDNode *constNode =
+ CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
+
+ SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
+ SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
+ SDValue(constNode, 0), SDValue(D, 0),
+ SDValue(ResScale, 0));
+ SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
+ SDValue(ResScale, 0), SDValue(Err, 0),
+ SDValue(ResScale, 0));
+ SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
+ SDValue(constNode, 0), SDValue(D, 0),
+ SDValue(NewRec, 0));
+ SDNode *q = CurDAG->getMachineNode(
+ Hexagon::A2_andir, dl, MVT::f32, SDValue(n, 0),
+ CurDAG->getTargetConstant(0x80000000, dl, MVT::i32));
+ SDNode *NewQ =
+ CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(q, 0),
+ SDValue(n, 0), SDValue(NewRec, 0));
+ SDNode *NNewRec = CurDAG->getMachineNode(
+ Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
+ SDValue(newErr, 0), SDValue(NewRec, 0));
+ SDNode *qErr =
+ CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
+ SDValue(D, 0), SDValue(NewQ, 0));
+ SDNode *NNewQ = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
+ SDValue(NewQ, 0), SDValue(qErr, 0),
+ SDValue(NNewRec, 0));
+
+ SDNode *NqErr =
+ CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
+ SDValue(NNewQ, 0), SDValue(D, 0));
+ std::array<SDValue, 4> temp1 = {SDValue(NNewQ, 0), SDValue(NqErr, 0),
+ SDValue(NNewRec, 0), SDValue(ResScale, 1)};
+ ArrayRef<SDValue> OpValue1(temp1);
+ SDNode *FinalNewQ =
+ CurDAG->getMachineNode(Hexagon::F2_sffma_sc, dl, MVT::f32, OpValue1);
+ ReplaceNode(N, FinalNewQ);
+}
+
+void HexagonDAGToDAGISel::FastFDiv(SDNode *N) {
+ const SDLoc &dl(N);
+ ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
+ SmallVector<SDValue, 2> Ops;
+ Ops = {N->getOperand(0), N->getOperand(1)};
+ SDVTList VTs;
+ VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
+ SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
+ SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
+
+ SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
+ SDNode *constNode =
+ CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
+
+ SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
+ SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
+ SDValue(constNode, 0), SDValue(D, 0),
+ SDValue(ResScale, 0));
+ SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
+ SDValue(ResScale, 0), SDValue(Err, 0),
+ SDValue(ResScale, 0));
+ SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
+ SDValue(constNode, 0), SDValue(D, 0),
+ SDValue(NewRec, 0));
+
+ SDNode *NNewRec = CurDAG->getMachineNode(
+ Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
+ SDValue(newErr, 0), SDValue(NewRec, 0));
+ SDNode *FinalNewQ = CurDAG->getMachineNode(
+ Hexagon::F2_sfmpy, dl, MVT::f32, SDValue(NNewRec, 0), SDValue(n, 0));
+ ReplaceNode(N, FinalNewQ);
+}
+
+void HexagonDAGToDAGISel::SelectFDiv(SDNode *N) {
+ if (N->getFlags().hasAllowReassociation())
+ FastFDiv(N);
+ else
+ FDiv(N);
+ return;
+}
+
void HexagonDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode())
return N->setNodeId(-1); // Already selected.
@@ -949,6 +1041,8 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {
case HexagonISD::D2P: return SelectD2P(N);
case HexagonISD::Q2V: return SelectQ2V(N);
case HexagonISD::V2Q: return SelectV2Q(N);
+ case ISD::FDIV:
+ return SelectFDiv(N);
}
SelectCode(N);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index 8fb1760936e819..50162b10799643 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -110,6 +110,9 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
void SelectD2P(SDNode *N);
void SelectQ2V(SDNode *N);
void SelectV2Q(SDNode *N);
+ void SelectFDiv(SDNode *N);
+ void FDiv(SDNode *N);
+ void FastFDiv(SDNode *N);
// Include the declarations autogenerated from the selection patterns.
#define GET_DAGISEL_DECL
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 51138091f4a558..13691053ddd707 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -653,6 +653,13 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
}
+SDValue HexagonTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
+ if (DAG.getMachineFunction().getFunction().hasOptSize())
+ return SDValue();
+ else
+ return Op;
+}
+
SDValue
HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1765,6 +1772,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FADD, MVT::f64, Expand);
setOperationAction(ISD::FSUB, MVT::f64, Expand);
setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
@@ -3341,6 +3349,9 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
errs() << "Error: check for a non-legal type in this operation\n";
#endif
llvm_unreachable("Should not custom lower this!");
+
+ case ISD::FDIV:
+ return LowerFDIV(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 8c7d0b70f38578..cb09e5b17843e0 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -204,6 +204,7 @@ class HexagonTargetLowering : public TargetLowering {
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/Hexagon/inline-division-space.ll b/llvm/test/CodeGen/Hexagon/inline-division-space.ll
new file mode 100644
index 00000000000000..9cf3c5c8b2b84b
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/inline-division-space.ll
@@ -0,0 +1,30 @@
+; Test for checking division is inlined or not in case of Os.
+; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
+
+; Function Attrs: optsize
+define dso_local i32 @testInt(i32 %a, i32 %b) local_unnamed_addr #0 {
+entry:
+;CHECK: call __hexagon_divsi3
+ %div = sdiv i32 %a, %b
+ %conv = sitofp i32 %div to float
+ %conv1 = fptosi float %conv to i32
+ ret i32 %conv1
+}
+
+; Function Attrs: optsize
+define dso_local float @testFloat(float %a, float %b) local_unnamed_addr #0 {
+entry:
+;CHECK: call __hexagon_divsf3
+ %div = fdiv float %a, %b
+ ret float %div
+}
+
+; Function Attrs: optsize
+define dso_local double @testDouble(double %a, double %b) local_unnamed_addr #0 {
+entry:
+;CHECK: call __hexagon_divdf3
+ %div = fdiv double %a, %b
+ ret double %div
+}
+
+attributes #0 = { optsize }
diff --git a/llvm/test/CodeGen/Hexagon/inline-division.ll b/llvm/test/CodeGen/Hexagon/inline-division.ll
new file mode 100644
index 00000000000000..7249a3f55e8683
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/inline-division.ll
@@ -0,0 +1,29 @@
+; Test for checking division is inlined or not in case of Os.
+; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
+
+define dso_local i32 @testInt(i32 %a, i32 %b) local_unnamed_addr {
+entry:
+;CHECK: call __hexagon_divsi3
+ %div = sdiv i32 %a, %b
+ %conv = sitofp i32 %div to float
+ %conv1 = fptosi float %conv to i32
+ ret i32 %conv1
+}
+
+define dso_local float @testFloat(float %a, float %b) local_unnamed_addr {
+entry:
+;CHECK-NOT: call __hexagon_divsf3
+;CHECK: sfrecipa
+;CHECK: sffixupn
+;CHEKC: and
+;CHECK: sfmpy
+ %div = fdiv float %a, %b
+ ret float %div
+}
+
+define dso_local double @testDouble(double %a, double %b) local_unnamed_addr {
+entry:
+;CHECK: call __hexagon_divdf3
+ %div = fdiv double %a, %b
+ ret double %div
+}
More information about the Openmp-commits
mailing list