[llvm] Inlining division for hexagon (PR #78599)

Thu Jan 18 07:25:33 PST 2024

https://github.com/quic-asaravan created https://github.com/llvm/llvm-project/pull/78599

Inlining of single float division is beneficial - how ? Description of what this improves

This patch implements FloatDivision functions which is inlined during the single float division.

Tests added to check the following
1. Whether the implemented function gets inlined when -fast-math option is used
2. Check that the splat of a constant, when it's used in a fp vector operation that needs to be expanded
3. Check inlining
4. Check no-inlining on optsize


>From 5a0adc342be8d100f6e58a0059d9f73d14519f05 Mon Sep 17 00:00:00 2001
From: Awanish Pandey <awanpand at codeaurora.org>
Date: Sun, 6 Sep 2020 14:39:57 +0530
Subject: [PATCH] [QTOOL-10056] Inlining division for hexagon

Change-Id: I2f0d361a7b24b86198a5e9cad908d5ecaa03e1e3
Reviewed-on: https://review-hexagon.quicinc.com/345447
Tested-by: hex build <hexbuild at quicinc.com>
Reviewed-by: Sergei Larin <slarin at quicinc.com>
---
 .../Target/Hexagon/HexagonISelDAGToDAG.cpp    |  93 ++++++++
 llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h |   3 +
 .../Target/Hexagon/HexagonISelLowering.cpp    |  11 +
 llvm/lib/Target/Hexagon/HexagonISelLowering.h |   1 +
 llvm/test/CodeGen/Hexagon/fast-math.ll        | 198 ++++++++++++++++++
 llvm/test/CodeGen/Hexagon/fp16-splat.ll       |  27 +++
 .../CodeGen/Hexagon/inline-division-space.ll  |  30 +++
 llvm/test/CodeGen/Hexagon/inline-division.ll  |  29 +++
 8 files changed, 392 insertions(+)
 create mode 100644 llvm/test/CodeGen/Hexagon/fast-math.ll
 create mode 100644 llvm/test/CodeGen/Hexagon/fp16-splat.ll
 create mode 100644 llvm/test/CodeGen/Hexagon/inline-division-space.ll
 create mode 100644 llvm/test/CodeGen/Hexagon/inline-division.ll

diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index e4127b0b94c625..56d35e5875381d 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -904,6 +904,98 @@ void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
   ReplaceNode(N, T);
 }
 
+void HexagonDAGToDAGISel::FDiv(SDNode *N) {
+  const SDLoc &dl(N);
+  ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
+  SmallVector<SDValue, 2> Ops;
+  Ops = {N->getOperand(0), N->getOperand(1)};
+  SDVTList VTs;
+  VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
+  SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
+  SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
+
+  SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
+  SDNode *constNode =
+      CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
+
+  SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
+  SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
+                                       SDValue(constNode, 0), SDValue(D, 0),
+                                       SDValue(ResScale, 0));
+  SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
+                                          SDValue(ResScale, 0), SDValue(Err, 0),
+                                          SDValue(ResScale, 0));
+  SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
+                                          SDValue(constNode, 0), SDValue(D, 0),
+                                          SDValue(NewRec, 0));
+  SDNode *q = CurDAG->getMachineNode(
+      Hexagon::A2_andir, dl, MVT::f32, SDValue(n, 0),
+      CurDAG->getTargetConstant(0x80000000, dl, MVT::i32));
+  SDNode *NewQ =
+      CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(q, 0),
+                             SDValue(n, 0), SDValue(NewRec, 0));
+  SDNode *NNewRec = CurDAG->getMachineNode(
+      Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
+      SDValue(newErr, 0), SDValue(NewRec, 0));
+  SDNode *qErr =
+      CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
+                             SDValue(D, 0), SDValue(NewQ, 0));
+  SDNode *NNewQ = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
+                                         SDValue(NewQ, 0), SDValue(qErr, 0),
+                                         SDValue(NNewRec, 0));
+
+  SDNode *NqErr =
+      CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
+                             SDValue(NNewQ, 0), SDValue(D, 0));
+  std::array<SDValue, 4> temp1 = {SDValue(NNewQ, 0), SDValue(NqErr, 0),
+                                  SDValue(NNewRec, 0), SDValue(ResScale, 1)};
+  ArrayRef<SDValue> OpValue1(temp1);
+  SDNode *FinalNewQ =
+      CurDAG->getMachineNode(Hexagon::F2_sffma_sc, dl, MVT::f32, OpValue1);
+  ReplaceNode(N, FinalNewQ);
+}
+
+void HexagonDAGToDAGISel::FastFDiv(SDNode *N) {
+  const SDLoc &dl(N);
+  ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
+  SmallVector<SDValue, 2> Ops;
+  Ops = {N->getOperand(0), N->getOperand(1)};
+  SDVTList VTs;
+  VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
+  SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
+  SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
+
+  SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
+  SDNode *constNode =
+      CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
+
+  SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
+  SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
+                                       SDValue(constNode, 0), SDValue(D, 0),
+                                       SDValue(ResScale, 0));
+  SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
+                                          SDValue(ResScale, 0), SDValue(Err, 0),
+                                          SDValue(ResScale, 0));
+  SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
+                                          SDValue(constNode, 0), SDValue(D, 0),
+                                          SDValue(NewRec, 0));
+
+  SDNode *NNewRec = CurDAG->getMachineNode(
+      Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
+      SDValue(newErr, 0), SDValue(NewRec, 0));
+  SDNode *FinalNewQ = CurDAG->getMachineNode(
+      Hexagon::F2_sfmpy, dl, MVT::f32, SDValue(NNewRec, 0), SDValue(n, 0));
+  ReplaceNode(N, FinalNewQ);
+}
+
+void HexagonDAGToDAGISel::SelectFDiv(SDNode *N) {
+  if (N->getFlags().hasAllowReassociation())
+    FastFDiv(N);
+  else
+    FDiv(N);
+  return;
+}
+
 void HexagonDAGToDAGISel::Select(SDNode *N) {
   if (N->isMachineOpcode())
     return N->setNodeId(-1);  // Already selected.
@@ -949,6 +1041,7 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {
   case HexagonISD::D2P:           return SelectD2P(N);
   case HexagonISD::Q2V:           return SelectQ2V(N);
   case HexagonISD::V2Q:           return SelectV2Q(N);
+  case ISD::FDIV:                 return SelectFDiv(N);
   }
 
   SelectCode(N);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index 8fb1760936e819..50162b10799643 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -110,6 +110,9 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
   void SelectD2P(SDNode *N);
   void SelectQ2V(SDNode *N);
   void SelectV2Q(SDNode *N);
+  void SelectFDiv(SDNode *N);
+  void FDiv(SDNode *N);
+  void FastFDiv(SDNode *N);
 
   // Include the declarations autogenerated from the selection patterns.
   #define GET_DAGISEL_DECL
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 51138091f4a558..6a07cacc312e34 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -653,6 +653,14 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
   return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
 }
 
+SDValue
+HexagonTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
+  if (DAG.getMachineFunction().getFunction().hasOptSize())
+    return SDValue();
+  else
+    return Op;
+}
+
 SDValue
 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -1765,6 +1773,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FADD, MVT::f64, Expand);
   setOperationAction(ISD::FSUB, MVT::f64, Expand);
   setOperationAction(ISD::FMUL, MVT::f64, Expand);
+  setOperationAction(ISD::FDIV, MVT::f32, Custom);
 
   setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
   setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
@@ -3341,6 +3350,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
         errs() << "Error: check for a non-legal type in this operation\n";
 #endif
       llvm_unreachable("Should not custom lower this!");
+
+    case ISD::FDIV:                 return LowerFDIV(Op, DAG);
     case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
     case ISD::INSERT_SUBVECTOR:     return LowerINSERT_SUBVECTOR(Op, DAG);
     case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR_ELT(Op, DAG);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 8c7d0b70f38578..cb09e5b17843e0 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -204,6 +204,7 @@ class HexagonTargetLowering : public TargetLowering {
 
   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/Hexagon/fast-math.ll b/llvm/test/CodeGen/Hexagon/fast-math.ll
new file mode 100644
index 00000000000000..3ed3a918f60d13
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/fast-math.ll
@@ -0,0 +1,198 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv65 -fast-math < %s | FileCheck %s --check-prefix=MATHV5
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define double @ffoo0(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATH5: __hexagon_fast2_muldf3
+
+  %mul = fmul double %a, %c
+  ret double %mul
+}
+
+define double @ffoo1(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATHV5: __hexagon_fast2_subdf3
+  %sub = fsub double %a, %c
+  ret double %sub
+}
+
+define double @ffoo2(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATHV5: __hexagon_fast2_divdf3
+  %div = fdiv double %a, %c
+  ret double %div
+}
+
+define double @ffoo3(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATHV5: __hexagon_fast2_adddf3
+  %add = fadd double %a, %c
+  ret double %add
+}
+
+define double @ffoo4(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATHV5: togglebit
+  %sub = fsub double -0.000000e+00, %c
+  ret double %sub
+}
+
+define double @ffoo5b(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATHV5: __hexagon_fast2_sqrtdf2
+  %call = tail call double @sqrt(double %c) nounwind readnone
+  ret double %call
+}
+
+declare double @sqrt(double) nounwind readnone
+
+define double @ffoo5c(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATHV5: __hexagon_fast2_sqrtdf2
+  %call = tail call double @sqrt(double %c)
+  ret double %call
+}
+
+define i32 @ffoo6(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATHV5-NOT: __hexagon_fast2_ltdf2
+  %cmp = fcmp olt double %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @ffoo7(double %a, double %b, double %c) nounwind readnone {
+entry:
+; MATHV5-NOT: __hexagon_fast2_gtdf2
+  %cmp = fcmp ogt double %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @ffoo8(double %a, double %b, double %c) nounwind readnone {
+entry:
+  %cmp = fcmp ole double %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @ffoo9(double %a, double %b, double %c) nounwind readnone {
+entry:
+  %cmp = fcmp oge double %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @ffoo10(double %a, double %b, double %c) nounwind readnone {
+entry:
+  %cmp = fcmp oeq double %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @ffoo11(double %a, double %b, double %c) nounwind readnone {
+entry:
+  %cmp = fcmp une double %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define float @fgoo0(float %a, float %b, float %c) nounwind readnone {
+entry:
+; MATHV5-NOT: __hexagon_fast2_mulsf3
+  %mul = fmul float %a, %c
+  ret float %mul
+}
+
+define float @fgoo1(float %a, float %b, float %c) nounwind readnone {
+entry:
+; MATHV5-NOT: __hexagon_fast2_subsf3
+  %sub = fsub float %a, %c
+  ret float %sub
+}
+
+define float @fgoo2(float %a, float %b, float %c) nounwind readnone {
+entry:
+; MATHV5: += sfmpy({{.*}}):lib
+; MATHV5: -= sfmpy({{.*}}):lib
+; MATHV5: -= sfmpy({{.*}}):lib
+; MATHV5: += sfmpy({{.*}}):lib
+; MATHV5: += sfmpy({{.*}}):lib
+; MATHV5: -= sfmpy({{.*}}):lib
+; MATHV5: += sfmpy{{.*}}:scale
+  %div = fdiv float %a, %c
+  ret float %div
+}
+
+define float @fgoo3(float %a, float %b, float %c) nounwind readnone {
+entry:
+; MATHV5-NOT: __hexagon_fast2_addsf3
+  %add = fadd float %a, %c
+  ret float %add
+}
+
+define float @fgoo4(float %a, float %b, float %c) nounwind readnone {
+entry:
+  %sub = fsub float -0.000000e+00, %c
+  ret float %sub
+}
+
+define float @fgoo5b(float %a, float %b, float %c) nounwind readnone {
+entry:
+  %call = tail call float @sqrtf(float %c) nounwind readnone
+  ret float %call
+}
+
+declare float @sqrtf(float) nounwind readnone
+
+define float @fgoo5c(float %a, float %b, float %c) nounwind readnone {
+entry:
+  %call = tail call float @sqrtf(float %c)
+  ret float %call
+}
+
+define i32 @fgoo6(float %a, float %b, float %c) nounwind readnone {
+entry:
+; MATHV5-NOT: __hexagon_fast2_ltsf2
+  %cmp = fcmp olt float %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @fgoo7(float %a, float %b, float %c) nounwind readnone {
+entry:
+; MATHV5-NOT: __hexagon_fast2_gtsf2
+  %cmp = fcmp ogt float %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @fgoo8(float %a, float %b, float %c) nounwind readnone {
+entry:
+  %cmp = fcmp ole float %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @fgoo9(float %a, float %b, float %c) nounwind readnone {
+entry:
+  %cmp = fcmp oge float %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @fgoo10(float %a, float %b, float %c) nounwind readnone {
+entry:
+  %cmp = fcmp oeq float %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @fgoo11(float %a, float %b, float %c) nounwind readnone {
+entry:
+  %cmp = fcmp une float %a, %c
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/llvm/test/CodeGen/Hexagon/fp16-splat.ll b/llvm/test/CodeGen/Hexagon/fp16-splat.ll
new file mode 100644
index 00000000000000..9a27aba766455d
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/fp16-splat.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon -enable-qfloat-codegen < %s | FileCheck %s
+; REQUIRES: asserts
+
+; Check that the splat of a constant, when it's used in a fp vector operation that
+; needs to be expanded (such as 'fdiv' below), is handled properly.
+
+; CHECK-LABEL: test1
+; CHECK: [[VREG0:(v[0-9]+)]].h = vsplat(r0)
+; CHECK: vmpy({{.*}}[[VREG0]].hf
+
+define dllexport void @test1() local_unnamed_addr #0 {
+entry:
+  %0 = load half, half* undef, align 2
+  %1 = fadd half 0xH0000, %0
+  %2 = insertelement <2 x half> undef, half %1, i32 1
+  %3 = shufflevector <2 x half> %2, <2 x half> undef, <64 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  %4 = fdiv <64 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %3
+  %5 = load i8*, i8** undef, align 4
+  %6 = bitcast i8* %5 to half*
+  %cgep109 = getelementptr half, half* %6, i32 undef
+  %7 = bitcast half* %cgep109 to <64 x half>*
+  %8 = fmul <64 x half> zeroinitializer, %4
+  store <64 x half> %8, <64 x half>* %7, align 128
+  ret void
+}
+
+attributes #0 = { "target-features"="+hvxv68,+hvx-length128b,+hmxv68,+hvx-qfloat" }
diff --git a/llvm/test/CodeGen/Hexagon/inline-division-space.ll b/llvm/test/CodeGen/Hexagon/inline-division-space.ll
new file mode 100644
index 00000000000000..9cf3c5c8b2b84b
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/inline-division-space.ll
@@ -0,0 +1,30 @@
+; Test for checking division is inlined or not in case of Os.
+; RUN: llc -O2 -march=hexagon   < %s | FileCheck  %s
+
+; Function Attrs: optsize
+define dso_local i32 @testInt(i32 %a, i32 %b) local_unnamed_addr #0 {
+entry:
+;CHECK: call __hexagon_divsi3
+  %div = sdiv i32 %a, %b
+  %conv = sitofp i32 %div to float
+  %conv1 = fptosi float %conv to i32
+  ret i32 %conv1
+}
+
+; Function Attrs: optsize
+define dso_local float @testFloat(float %a, float %b) local_unnamed_addr #0 {
+entry:
+;CHECK: call __hexagon_divsf3
+  %div = fdiv float %a, %b
+  ret float %div
+}
+
+; Function Attrs: optsize
+define dso_local double @testDouble(double %a, double %b) local_unnamed_addr #0 {
+entry:
+;CHECK: call __hexagon_divdf3
+  %div = fdiv double %a, %b
+  ret double %div
+}
+
+attributes #0 = { optsize }
diff --git a/llvm/test/CodeGen/Hexagon/inline-division.ll b/llvm/test/CodeGen/Hexagon/inline-division.ll
new file mode 100644
index 00000000000000..7249a3f55e8683
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/inline-division.ll
@@ -0,0 +1,29 @@
+; Test for checking division is inlined or not in case of Os.
+; RUN: llc -O2 -march=hexagon   < %s | FileCheck  %s
+
+define dso_local i32 @testInt(i32 %a, i32 %b) local_unnamed_addr  {
+entry:
+;CHECK: call __hexagon_divsi3
+  %div = sdiv i32 %a, %b
+  %conv = sitofp i32 %div to float
+  %conv1 = fptosi float %conv to i32
+  ret i32 %conv1
+}
+
+define dso_local float @testFloat(float %a, float %b) local_unnamed_addr  {
+entry:
+;CHECK-NOT: call __hexagon_divsf3
+;CHECK: sfrecipa
+;CHECK: sffixupn
+;CHEKC: and
+;CHECK: sfmpy
+  %div = fdiv float %a, %b
+  ret float %div
+}
+
+define dso_local double @testDouble(double %a, double %b) local_unnamed_addr  {
+entry:
+;CHECK: call __hexagon_divdf3
+  %div = fdiv double %a, %b
+  ret double %div
+}