[llvm] a284bdb - [DAG] Fold fdiv X, c2 -> fmul X, 1/c2 without AllowReciprocal if exact (#93882)

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 9 04:28:25 PDT 2024


Author: David Green
Date: 2024-06-09T12:28:20+01:00
New Revision: a284bdb31146160352da905a888da738f2661b50

URL: https://github.com/llvm/llvm-project/commit/a284bdb31146160352da905a888da738f2661b50
DIFF: https://github.com/llvm/llvm-project/commit/a284bdb31146160352da905a888da738f2661b50.diff

LOG: [DAG] Fold fdiv X, c2 -> fmul X, 1/c2 without AllowReciprocal if exact (#93882)

This moves the combine of fdiv by constant to fmul out of an
'if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()' block,
so that it triggers if the divide is exact. An extra check for
Recip.isDenormal() is added as multiple places make reference
to it being unsafe or slow on certain platforms.

Added: 
    

Modified: 
    llvm/include/llvm/ADT/APFloat.h
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/fcvt-fixed.ll
    llvm/test/CodeGen/AArch64/fdiv-const.ll
    llvm/test/CodeGen/AArch64/frem-power2.ll
    llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
    llvm/test/CodeGen/ARM/frem-power2.ll
    llvm/test/CodeGen/ARM/vdiv_combine.ll
    llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
    llvm/test/CodeGen/X86/change-unsafe-fp-math.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 44a301ecc9928..78faadb30d9eb 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -964,6 +964,13 @@ class APFloat : public APFloatBase {
     return Val;
   }
 
+  /// Factory for Positive and Negative One.
+  ///
+  /// \param Negative True iff the number should be negative.
+  static APFloat getOne(const fltSemantics &Sem, bool Negative = false) {
+    return APFloat(Sem, Negative ? -1 : 1);
+  }
+
   /// Factory for Positive and Negative Infinity.
   ///
   /// \param Negative True iff the number should be negative.

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e3bd4ea3ffd90..4fcbe08e4b2b9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17262,26 +17262,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   if (SDValue V = combineRepeatedFPDivisors(N))
     return V;
 
-  if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
-    // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
-    if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
-      // Compute the reciprocal 1.0 / c2.
-      const APFloat &N1APF = N1CFP->getValueAPF();
-      APFloat Recip(N1APF.getSemantics(), 1); // 1.0
-      APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
-      // Only do the transform if the reciprocal is a legal fp immediate that
-      // isn't too nasty (eg NaN, denormal, ...).
-      if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
-          (!LegalOperations ||
-           // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
-           // backend)... we should handle this gracefully after Legalize.
-           // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
-           TLI.isOperationLegal(ISD::ConstantFP, VT) ||
-           TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
-        return DAG.getNode(ISD::FMUL, DL, VT, N0,
-                           DAG.getConstantFP(Recip, DL, VT));
-    }
+  // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
+  // the loss is acceptable with AllowReciprocal.
+  if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
+    // Compute the reciprocal 1.0 / c2.
+    const APFloat &N1APF = N1CFP->getValueAPF();
+    APFloat Recip = APFloat::getOne(N1APF.getSemantics());
+    APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+    // Only do the transform if the reciprocal is a legal fp immediate that
+    // isn't too nasty (eg NaN, denormal, ...).
+    if (((st == APFloat::opOK && !Recip.isDenormal()) ||
+         (st == APFloat::opInexact &&
+          (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
+        (!LegalOperations ||
+         // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+         // backend)... we should handle this gracefully after Legalize.
+         // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
+         TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+         TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
+      return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                         DAG.getConstantFP(Recip, DL, VT));
+  }
 
+  if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
     // If this FDIV is part of a reciprocal square root, it may be folded
     // into a target-specific square root estimate instruction.
     if (N1.getOpcode() == ISD::FSQRT) {

diff  --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 296be831da762..7056a4d28fed3 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -412,10 +412,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
 ; CHECK-NO16-LABEL: scvtf_f16_i32_7:
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    scvtf s1, w0
-; CHECK-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    movi v0.2s, #60, lsl #24
 ; CHECK-NO16-NEXT:    fcvt h1, s1
 ; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fdiv s0, s1, s0
+; CHECK-NO16-NEXT:    fmul s0, s1, s0
 ; CHECK-NO16-NEXT:    fcvt h0, s0
 ; CHECK-NO16-NEXT:    ret
 ;
@@ -432,10 +432,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
 ; CHECK-NO16-LABEL: scvtf_f16_i32_15:
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    scvtf s1, w0
-; CHECK-NO16-NEXT:    movi v0.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    movi v0.2s, #56, lsl #24
 ; CHECK-NO16-NEXT:    fcvt h1, s1
 ; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fdiv s0, s1, s0
+; CHECK-NO16-NEXT:    fmul s0, s1, s0
 ; CHECK-NO16-NEXT:    fcvt h0, s0
 ; CHECK-NO16-NEXT:    ret
 ;
@@ -452,10 +452,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
 ; CHECK-NO16-LABEL: scvtf_f16_i64_7:
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    scvtf s1, x0
-; CHECK-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    movi v0.2s, #60, lsl #24
 ; CHECK-NO16-NEXT:    fcvt h1, s1
 ; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fdiv s0, s1, s0
+; CHECK-NO16-NEXT:    fmul s0, s1, s0
 ; CHECK-NO16-NEXT:    fcvt h0, s0
 ; CHECK-NO16-NEXT:    ret
 ;
@@ -472,10 +472,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
 ; CHECK-NO16-LABEL: scvtf_f16_i64_15:
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    scvtf s1, x0
-; CHECK-NO16-NEXT:    movi v0.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    movi v0.2s, #56, lsl #24
 ; CHECK-NO16-NEXT:    fcvt h1, s1
 ; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fdiv s0, s1, s0
+; CHECK-NO16-NEXT:    fmul s0, s1, s0
 ; CHECK-NO16-NEXT:    fcvt h0, s0
 ; CHECK-NO16-NEXT:    ret
 ;
@@ -574,10 +574,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
 ; CHECK-NO16-LABEL: ucvtf_f16_i32_7:
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    ucvtf s1, w0
-; CHECK-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    movi v0.2s, #60, lsl #24
 ; CHECK-NO16-NEXT:    fcvt h1, s1
 ; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fdiv s0, s1, s0
+; CHECK-NO16-NEXT:    fmul s0, s1, s0
 ; CHECK-NO16-NEXT:    fcvt h0, s0
 ; CHECK-NO16-NEXT:    ret
 ;
@@ -594,10 +594,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
 ; CHECK-NO16-LABEL: ucvtf_f16_i32_15:
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    ucvtf s1, w0
-; CHECK-NO16-NEXT:    movi v0.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    movi v0.2s, #56, lsl #24
 ; CHECK-NO16-NEXT:    fcvt h1, s1
 ; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fdiv s0, s1, s0
+; CHECK-NO16-NEXT:    fmul s0, s1, s0
 ; CHECK-NO16-NEXT:    fcvt h0, s0
 ; CHECK-NO16-NEXT:    ret
 ;
@@ -614,10 +614,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
 ; CHECK-NO16-LABEL: ucvtf_f16_i64_7:
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    ucvtf s1, x0
-; CHECK-NO16-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-NO16-NEXT:    movi v0.2s, #60, lsl #24
 ; CHECK-NO16-NEXT:    fcvt h1, s1
 ; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fdiv s0, s1, s0
+; CHECK-NO16-NEXT:    fmul s0, s1, s0
 ; CHECK-NO16-NEXT:    fcvt h0, s0
 ; CHECK-NO16-NEXT:    ret
 ;
@@ -634,10 +634,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
 ; CHECK-NO16-LABEL: ucvtf_f16_i64_15:
 ; CHECK-NO16:       // %bb.0:
 ; CHECK-NO16-NEXT:    ucvtf s1, x0
-; CHECK-NO16-NEXT:    movi v0.2s, #71, lsl #24
+; CHECK-NO16-NEXT:    movi v0.2s, #56, lsl #24
 ; CHECK-NO16-NEXT:    fcvt h1, s1
 ; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fdiv s0, s1, s0
+; CHECK-NO16-NEXT:    fmul s0, s1, s0
 ; CHECK-NO16-NEXT:    fcvt h0, s0
 ; CHECK-NO16-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/fdiv-const.ll b/llvm/test/CodeGen/AArch64/fdiv-const.ll
index 5a8f733843401..7aa89db71adfe 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-const.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-const.ll
@@ -4,8 +4,8 @@
 define float @divf32_2(float %a) nounwind {
 ; CHECK-LABEL: divf32_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov s1, #2.00000000
-; CHECK-NEXT:    fdiv s0, s0, s1
+; CHECK-NEXT:    fmov s1, #0.50000000
+; CHECK-NEXT:    fmul s0, s0, s1
 ; CHECK-NEXT:    ret
   %r = fdiv float %a, 2.0
   ret float %r
@@ -46,8 +46,8 @@ define float @divf32_p75_arcp(float %a) nounwind {
 define half @divf16_2(half %a) nounwind {
 ; CHECK-LABEL: divf16_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov h1, #2.00000000
-; CHECK-NEXT:    fdiv h0, h0, h1
+; CHECK-NEXT:    fmov h1, #0.50000000
+; CHECK-NEXT:    fmul h0, h0, h1
 ; CHECK-NEXT:    ret
   %r = fdiv half %a, 2.0
   ret half %r
@@ -67,9 +67,9 @@ define half @divf16_32768(half %a) nounwind {
 define half @divf16_32768_arcp(half %a) nounwind {
 ; CHECK-LABEL: divf16_32768_arcp:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #512 // =0x200
+; CHECK-NEXT:    mov w8, #30720 // =0x7800
 ; CHECK-NEXT:    fmov h1, w8
-; CHECK-NEXT:    fmul h0, h0, h1
+; CHECK-NEXT:    fdiv h0, h0, h1
 ; CHECK-NEXT:    ret
   %r = fdiv arcp half %a, 32768.0
   ret half %r
@@ -78,8 +78,8 @@ define half @divf16_32768_arcp(half %a) nounwind {
 define double @divf64_2(double %a) nounwind {
 ; CHECK-LABEL: divf64_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov d1, #2.00000000
-; CHECK-NEXT:    fdiv d0, d0, d1
+; CHECK-NEXT:    fmov d1, #0.50000000
+; CHECK-NEXT:    fmul d0, d0, d1
 ; CHECK-NEXT:    ret
   %r = fdiv double %a, 2.0
   ret double %r
@@ -88,8 +88,8 @@ define double @divf64_2(double %a) nounwind {
 define <4 x float> @divv4f32_2(<4 x float> %a) nounwind {
 ; CHECK-LABEL: divv4f32_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #64, lsl #24
-; CHECK-NEXT:    fdiv v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.4s, #63, lsl #24
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %r = fdiv <4 x float> %a, <float 2.0, float 2.0, float 2.0, float 2.0>
   ret <4 x float> %r
@@ -141,9 +141,8 @@ define <4 x float> @divv4f32_24816(<4 x float> %a) nounwind {
 define <vscale x 4 x float> @divnxv4f32_2(<vscale x 4 x float> %a) nounwind {
 ; CHECK-LABEL: divnxv4f32_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov z1.s, #2.00000000
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, #0.5
 ; CHECK-NEXT:    ret
   %r = fdiv <vscale x 4 x float> %a, splat (float 2.0)
   ret <vscale x 4 x float> %r

diff  --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 4192745abd347..98276b68481a1 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -5,11 +5,12 @@
 define float @frem2(float %x) {
 ; CHECK-SD-LABEL: frem2:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov s1, #2.00000000
+; CHECK-SD-NEXT:    fmov s1, #0.50000000
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s1, s2, s1, s0
+; CHECK-SD-NEXT:    fmov s2, #-2.00000000
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s1, s1, s2, s0
 ; CHECK-SD-NEXT:    mvni v2.4s, #128, lsl #24
 ; CHECK-SD-NEXT:    bit v0.16b, v1.16b, v2.16b
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
@@ -27,10 +28,11 @@ entry:
 define float @frem2_nsz(float %x) {
 ; CHECK-SD-LABEL: frem2_nsz:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov s1, #2.00000000
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    fmov s1, #0.50000000
+; CHECK-SD-NEXT:    fmov s2, #-2.00000000
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem2_nsz:
@@ -65,10 +67,11 @@ define float @frem2_abs(float %x) {
 ; CHECK-SD-LABEL: frem2_abs:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fabs s0, s0
-; CHECK-SD-NEXT:    fmov s1, #2.00000000
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    fmov s1, #0.50000000
+; CHECK-SD-NEXT:    fmov s2, #-2.00000000
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem2_abs:
@@ -85,9 +88,9 @@ entry:
 define half @hrem2_nsz(half %x) {
 ; CHECK-SD-LABEL: hrem2_nsz:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov h1, #2.00000000
+; CHECK-SD-NEXT:    fmov h1, #0.50000000
 ; CHECK-SD-NEXT:    fmov h2, #-2.00000000
-; CHECK-SD-NEXT:    fdiv h1, h0, h1
+; CHECK-SD-NEXT:    fmul h1, h0, h1
 ; CHECK-SD-NEXT:    frintz h1, h1
 ; CHECK-SD-NEXT:    fmadd h0, h1, h2, h0
 ; CHECK-SD-NEXT:    ret
@@ -112,10 +115,11 @@ entry:
 define double @drem2_nsz(double %x) {
 ; CHECK-SD-LABEL: drem2_nsz:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov d1, #2.00000000
-; CHECK-SD-NEXT:    fdiv d2, d0, d1
-; CHECK-SD-NEXT:    frintz d2, d2
-; CHECK-SD-NEXT:    fmsub d0, d2, d1, d0
+; CHECK-SD-NEXT:    fmov d1, #0.50000000
+; CHECK-SD-NEXT:    fmov d2, #-2.00000000
+; CHECK-SD-NEXT:    fmul d1, d0, d1
+; CHECK-SD-NEXT:    frintz d1, d1
+; CHECK-SD-NEXT:    fmadd d0, d1, d2, d0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: drem2_nsz:
@@ -176,10 +180,11 @@ entry:
 define float @fremm2_nsz(float %x) {
 ; CHECK-SD-LABEL: fremm2_nsz:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    fmov s1, #-2.00000000
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    fmov s1, #-0.50000000
+; CHECK-SD-NEXT:    fmov s2, #2.00000000
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fremm2_nsz:
@@ -195,10 +200,11 @@ define float @frem4_abs(float %x) {
 ; CHECK-SD-LABEL: frem4_abs:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fabs s0, s0
-; CHECK-SD-NEXT:    fmov s1, #4.00000000
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    fmov s1, #0.25000000
+; CHECK-SD-NEXT:    fmov s2, #-4.00000000
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem4_abs:
@@ -216,10 +222,12 @@ define float @frem16_abs(float %x) {
 ; CHECK-SD-LABEL: frem16_abs:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fabs s0, s0
-; CHECK-SD-NEXT:    fmov s1, #16.00000000
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    mov w8, #1031798784 // =0x3d800000
+; CHECK-SD-NEXT:    fmov s2, #-16.00000000
+; CHECK-SD-NEXT:    fmov s1, w8
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem16_abs:
@@ -237,11 +245,13 @@ define float @frem4294967296_abs(float %x) {
 ; CHECK-SD-LABEL: frem4294967296_abs:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fabs s0, s0
-; CHECK-SD-NEXT:    mov w8, #1333788672 // =0x4f800000
+; CHECK-SD-NEXT:    mov w8, #796917760 // =0x2f800000
 ; CHECK-SD-NEXT:    fmov s1, w8
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    mov w8, #-813694976 // =0xcf800000
+; CHECK-SD-NEXT:    fmov s2, w8
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem4294967296_abs:
@@ -260,11 +270,13 @@ define float @frem1152921504606846976_abs(float %x) {
 ; CHECK-SD-LABEL: frem1152921504606846976_abs:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fabs s0, s0
-; CHECK-SD-NEXT:    mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT:    mov w8, #562036736 // =0x21800000
 ; CHECK-SD-NEXT:    fmov s1, w8
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    mov w8, #-578813952 // =0xdd800000
+; CHECK-SD-NEXT:    fmov s2, w8
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem1152921504606846976_abs:
@@ -283,11 +295,13 @@ define float @frem4611686018427387904_abs(float %x) {
 ; CHECK-SD-LABEL: frem4611686018427387904_abs:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fabs s0, s0
-; CHECK-SD-NEXT:    mov w8, #1585446912 // =0x5e800000
+; CHECK-SD-NEXT:    mov w8, #545259520 // =0x20800000
 ; CHECK-SD-NEXT:    fmov s1, w8
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    mov w8, #-562036736 // =0xde800000
+; CHECK-SD-NEXT:    fmov s2, w8
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem4611686018427387904_abs:
@@ -305,11 +319,12 @@ entry:
 define float @frem9223372036854775808_abs(float %x) {
 ; CHECK-SD-LABEL: frem9223372036854775808_abs:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi v1.2s, #95, lsl #24
+; CHECK-SD-NEXT:    movi v1.2s, #32, lsl #24
 ; CHECK-SD-NEXT:    fabs s0, s0
-; CHECK-SD-NEXT:    fdiv s2, s0, s1
-; CHECK-SD-NEXT:    frintz s2, s2
-; CHECK-SD-NEXT:    fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT:    movi v2.2s, #223, lsl #24
+; CHECK-SD-NEXT:    fmul s1, s0, s1
+; CHECK-SD-NEXT:    frintz s1, s1
+; CHECK-SD-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem9223372036854775808_abs:
@@ -326,11 +341,12 @@ entry:
 define <4 x float> @frem2_vec(<4 x float> %x) {
 ; CHECK-SD-LABEL: frem2_vec:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi v1.4s, #64, lsl #24
+; CHECK-SD-NEXT:    movi v1.4s, #63, lsl #24
+; CHECK-SD-NEXT:    movi v2.4s, #64, lsl #24
 ; CHECK-SD-NEXT:    mov v3.16b, v0.16b
-; CHECK-SD-NEXT:    fdiv v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT:    frintz v2.4s, v2.4s
-; CHECK-SD-NEXT:    fmls v3.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    fmul v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    frintz v1.4s, v1.4s
+; CHECK-SD-NEXT:    fmls v3.4s, v2.4s, v1.4s
 ; CHECK-SD-NEXT:    mvni v1.4s, #128, lsl #24
 ; CHECK-SD-NEXT:    bit v0.16b, v3.16b, v1.16b
 ; CHECK-SD-NEXT:    ret
@@ -387,10 +403,11 @@ entry:
 define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
 ; CHECK-SD-LABEL: frem2_nsz_vec:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi v1.4s, #64, lsl #24
-; CHECK-SD-NEXT:    fdiv v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT:    frintz v2.4s, v2.4s
-; CHECK-SD-NEXT:    fmls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    movi v1.4s, #63, lsl #24
+; CHECK-SD-NEXT:    movi v2.4s, #64, lsl #24
+; CHECK-SD-NEXT:    fmul v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    frintz v1.4s, v1.4s
+; CHECK-SD-NEXT:    fmls v0.4s, v2.4s, v1.4s
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -445,12 +462,14 @@ entry:
 define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
 ; CHECK-SD-LABEL: frem1152921504606846976_absv:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT:    mov w8, #562036736 // =0x21800000
 ; CHECK-SD-NEXT:    fabs v0.4s, v0.4s
 ; CHECK-SD-NEXT:    dup v1.4s, w8
-; CHECK-SD-NEXT:    fdiv v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT:    frintz v2.4s, v2.4s
-; CHECK-SD-NEXT:    fmls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT:    mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT:    dup v2.4s, w8
+; CHECK-SD-NEXT:    fmul v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    frintz v1.4s, v1.4s
+; CHECK-SD-NEXT:    fmls v0.4s, v2.4s, v1.4s
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem1152921504606846976_absv:

diff  --git a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
index 5386ef425dcb5..64d4a0cf78501 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
@@ -17,7 +17,7 @@ define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x,
 ; GCN-NEXT:    s_mov_b32 exec_lo, s2
 ; GCN-NEXT:    s_cbranch_execz .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %if
-; GCN-NEXT:    s_mov_b32 s2, 2.0
+; GCN-NEXT:    s_mov_b32 s2, 0x40400000
 ; GCN-NEXT:    v_div_scale_f32 v1, s3, s2, s2, v0
 ; GCN-NEXT:    v_rcp_f32_e64 v2, v1
 ; GCN-NEXT:    s_mov_b32 s3, 1.0
@@ -39,7 +39,7 @@ entry:
   br i1 %cc, label %if, label %end
 
 if:
-  %v.if = fdiv float %v, 2.0
+  %v.if = fdiv float %v, 3.0
   br label %end
 
 end:

diff  --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll
index 71c2c09c0105c..63ecd9fec7883 100644
--- a/llvm/test/CodeGen/ARM/frem-power2.ll
+++ b/llvm/test/CodeGen/ARM/frem-power2.ll
@@ -14,26 +14,28 @@ define float @frem4(float %x) {
 ;
 ; CHECK-FP-LABEL: frem4:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vmov.f32 s0, #4.000000e+00
-; CHECK-FP-NEXT:    vmov s2, r0
+; CHECK-FP-NEXT:    vmov.f32 s0, #2.500000e-01
+; CHECK-FP-NEXT:    vmov.f32 s2, #-4.000000e+00
+; CHECK-FP-NEXT:    vmov s4, r0
 ; CHECK-FP-NEXT:    lsrs r0, r0, #31
-; CHECK-FP-NEXT:    vdiv.f32 s4, s2, s0
-; CHECK-FP-NEXT:    vrintz.f32 s4, s4
-; CHECK-FP-NEXT:    vfms.f32 s2, s4, s0
-; CHECK-FP-NEXT:    vmov r1, s2
+; CHECK-FP-NEXT:    vmul.f32 s0, s4, s0
+; CHECK-FP-NEXT:    vrintz.f32 s0, s0
+; CHECK-FP-NEXT:    vfma.f32 s4, s0, s2
+; CHECK-FP-NEXT:    vmov r1, s4
 ; CHECK-FP-NEXT:    bfi r1, r0, #31, #1
 ; CHECK-FP-NEXT:    mov r0, r1
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-M33-LABEL: frem4:
 ; CHECK-M33:       @ %bb.0: @ %entry
-; CHECK-M33-NEXT:    vmov.f32 s0, #4.000000e+00
-; CHECK-M33-NEXT:    vmov s2, r0
+; CHECK-M33-NEXT:    vmov.f32 s0, #2.500000e-01
+; CHECK-M33-NEXT:    vmov.f32 s2, #-4.000000e+00
+; CHECK-M33-NEXT:    vmov s4, r0
 ; CHECK-M33-NEXT:    lsrs r0, r0, #31
-; CHECK-M33-NEXT:    vdiv.f32 s4, s2, s0
-; CHECK-M33-NEXT:    vrintz.f32 s4, s4
-; CHECK-M33-NEXT:    vmls.f32 s2, s4, s0
-; CHECK-M33-NEXT:    vmov r1, s2
+; CHECK-M33-NEXT:    vmul.f32 s0, s4, s0
+; CHECK-M33-NEXT:    vrintz.f32 s0, s0
+; CHECK-M33-NEXT:    vmla.f32 s4, s0, s2
+; CHECK-M33-NEXT:    vmov r1, s4
 ; CHECK-M33-NEXT:    bfi r1, r0, #31, #1
 ; CHECK-M33-NEXT:    mov r0, r1
 ; CHECK-M33-NEXT:    bx lr
@@ -53,22 +55,24 @@ define float @frem4_nsz(float %x) {
 ;
 ; CHECK-FP-LABEL: frem4_nsz:
 ; CHECK-FP:       @ %bb.0: @ %entry
-; CHECK-FP-NEXT:    vmov.f32 s0, #4.000000e+00
-; CHECK-FP-NEXT:    vmov s2, r0
-; CHECK-FP-NEXT:    vdiv.f32 s4, s2, s0
-; CHECK-FP-NEXT:    vrintz.f32 s4, s4
-; CHECK-FP-NEXT:    vfms.f32 s2, s4, s0
-; CHECK-FP-NEXT:    vmov r0, s2
+; CHECK-FP-NEXT:    vmov.f32 s0, #2.500000e-01
+; CHECK-FP-NEXT:    vmov.f32 s2, #-4.000000e+00
+; CHECK-FP-NEXT:    vmov s4, r0
+; CHECK-FP-NEXT:    vmul.f32 s0, s4, s0
+; CHECK-FP-NEXT:    vrintz.f32 s0, s0
+; CHECK-FP-NEXT:    vfma.f32 s4, s0, s2
+; CHECK-FP-NEXT:    vmov r0, s4
 ; CHECK-FP-NEXT:    bx lr
 ;
 ; CHECK-M33-LABEL: frem4_nsz:
 ; CHECK-M33:       @ %bb.0: @ %entry
-; CHECK-M33-NEXT:    vmov.f32 s0, #4.000000e+00
-; CHECK-M33-NEXT:    vmov s2, r0
-; CHECK-M33-NEXT:    vdiv.f32 s4, s2, s0
-; CHECK-M33-NEXT:    vrintz.f32 s4, s4
-; CHECK-M33-NEXT:    vmls.f32 s2, s4, s0
-; CHECK-M33-NEXT:    vmov r0, s2
+; CHECK-M33-NEXT:    vmov.f32 s0, #2.500000e-01
+; CHECK-M33-NEXT:    vmov.f32 s2, #-4.000000e+00
+; CHECK-M33-NEXT:    vmov s4, r0
+; CHECK-M33-NEXT:    vmul.f32 s0, s4, s0
+; CHECK-M33-NEXT:    vrintz.f32 s0, s0
+; CHECK-M33-NEXT:    vmla.f32 s4, s0, s2
+; CHECK-M33-NEXT:    vmov r0, s4
 ; CHECK-M33-NEXT:    bx lr
 entry:
   %fmod = frem nsz float %x, 4.0

diff  --git a/llvm/test/CodeGen/ARM/vdiv_combine.ll b/llvm/test/CodeGen/ARM/vdiv_combine.ll
index 988844661085e..899487f9efb2c 100644
--- a/llvm/test/CodeGen/ARM/vdiv_combine.ll
+++ b/llvm/test/CodeGen/ARM/vdiv_combine.ll
@@ -5,10 +5,7 @@
 define arm_aapcs_vfpcc <2 x float> @t1(<2 x i32> %vecinit2.i) nounwind {
 ; CHECK-LABEL: t1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.f32 s2, #8.000000e+00
-; CHECK-NEXT:    vcvt.f32.s32 d2, d0
-; CHECK-NEXT:    vdiv.f32 s1, s5, s2
-; CHECK-NEXT:    vdiv.f32 s0, s4, s2
+; CHECK-NEXT:    vcvt.f32.s32 d0, d0, #3
 ; CHECK-NEXT:    bx lr
 entry:
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -20,10 +17,7 @@ entry:
 define arm_aapcs_vfpcc <2 x float> @t2(<2 x i32> %vecinit2.i) nounwind {
 ; CHECK-LABEL: t2:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.f32 s2, #8.000000e+00
-; CHECK-NEXT:    vcvt.f32.u32 d2, d0
-; CHECK-NEXT:    vdiv.f32 s1, s5, s2
-; CHECK-NEXT:    vdiv.f32 s0, s4, s2
+; CHECK-NEXT:    vcvt.f32.u32 d0, d0, #3
 ; CHECK-NEXT:    bx lr
 entry:
   %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -56,17 +50,10 @@ entry:
 define arm_aapcs_vfpcc <2 x float> @t4(<2 x i32> %vecinit2.i) nounwind {
 ; CHECK-LABEL: t4:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vcvt.f32.s32 d2, d0
-; CHECK-NEXT:    vldr s2, LCPI3_0
-; CHECK-NEXT:    vdiv.f32 s1, s5, s2
-; CHECK-NEXT:    vdiv.f32 s0, s4, s2
+; CHECK-NEXT:    vcvt.f32.s32 d16, d0
+; CHECK-NEXT:    vmov.i32 d17, #0x2f000000
+; CHECK-NEXT:    vmul.f32 d0, d16, d17
 ; CHECK-NEXT:    bx lr
-; CHECK-NEXT:    .p2align 2
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:    .data_region
-; CHECK-NEXT:  LCPI3_0:
-; CHECK-NEXT:    .long 0x50000000 @ float 8.58993459E+9
-; CHECK-NEXT:    .end_data_region
 entry:
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
   %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
@@ -77,17 +64,8 @@ entry:
 define arm_aapcs_vfpcc <2 x float> @t5(<2 x i32> %vecinit2.i) nounwind {
 ; CHECK-LABEL: t5:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vcvt.f32.s32 d2, d0
-; CHECK-NEXT:    vldr s2, LCPI4_0
-; CHECK-NEXT:    vdiv.f32 s1, s5, s2
-; CHECK-NEXT:    vdiv.f32 s0, s4, s2
+; CHECK-NEXT:    vcvt.f32.s32 d0, d0, #32
 ; CHECK-NEXT:    bx lr
-; CHECK-NEXT:    .p2align 2
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:    .data_region
-; CHECK-NEXT:  LCPI4_0:
-; CHECK-NEXT:    .long 0x4f800000 @ float 4.2949673E+9
-; CHECK-NEXT:    .end_data_region
 entry:
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
   %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
@@ -98,12 +76,7 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @t6(<4 x i32> %vecinit6.i) nounwind {
 ; CHECK-LABEL: t6:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov.f32 s4, #8.000000e+00
-; CHECK-NEXT:    vcvt.f32.s32 q2, q0
-; CHECK-NEXT:    vdiv.f32 s3, s11, s4
-; CHECK-NEXT:    vdiv.f32 s2, s10, s4
-; CHECK-NEXT:    vdiv.f32 s1, s9, s4
-; CHECK-NEXT:    vdiv.f32 s0, s8, s4
+; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #3
 ; CHECK-NEXT:    bx lr
 entry:
   %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float>
@@ -115,12 +88,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) {
 ; CHECK-LABEL: fix_unsigned_i16_to_float:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmovl.u16 q8, d0
-; CHECK-NEXT:    vmov.f32 s4, #2.000000e+00
-; CHECK-NEXT:    vcvt.f32.u32 q2, q8
-; CHECK-NEXT:    vdiv.f32 s3, s11, s4
-; CHECK-NEXT:    vdiv.f32 s2, s10, s4
-; CHECK-NEXT:    vdiv.f32 s1, s9, s4
-; CHECK-NEXT:    vdiv.f32 s0, s8, s4
+; CHECK-NEXT:    vcvt.f32.u32 q0, q8, #1
 ; CHECK-NEXT:    bx lr
   %conv = uitofp <4 x i16> %in to <4 x float>
   %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
@@ -131,12 +99,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) {
 ; CHECK-LABEL: fix_signed_i16_to_float:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmovl.s16 q8, d0
-; CHECK-NEXT:    vmov.f32 s4, #2.000000e+00
-; CHECK-NEXT:    vcvt.f32.s32 q2, q8
-; CHECK-NEXT:    vdiv.f32 s3, s11, s4
-; CHECK-NEXT:    vdiv.f32 s2, s10, s4
-; CHECK-NEXT:    vdiv.f32 s1, s9, s4
-; CHECK-NEXT:    vdiv.f32 s0, s8, s4
+; CHECK-NEXT:    vcvt.f32.s32 q0, q8, #1
 ; CHECK-NEXT:    bx lr
   %conv = sitofp <4 x i16> %in to <4 x float>
   %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
@@ -152,13 +115,12 @@ define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float(<2 x i64> %in) {
 ; CHECK-NEXT:    vmov r0, r1, d9
 ; CHECK-NEXT:    bl ___floatundisf
 ; CHECK-NEXT:    vmov r2, r1, d8
-; CHECK-NEXT:    vmov s18, r0
-; CHECK-NEXT:    vmov.f32 s16, #2.000000e+00
+; CHECK-NEXT:    vmov s19, r0
+; CHECK-NEXT:    vmov.i32 d8, #0x3f000000
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    bl ___floatundisf
-; CHECK-NEXT:    vmov s2, r0
-; CHECK-NEXT:    vdiv.f32 s1, s18, s16
-; CHECK-NEXT:    vdiv.f32 s0, s2, s16
+; CHECK-NEXT:    vmov s18, r0
+; CHECK-NEXT:    vmul.f32 d0, d9, d8
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {lr}
 ; CHECK-NEXT:    bx lr
@@ -177,13 +139,13 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) {
 ; CHECK-NEXT:    bl ___floatundidf
 ; CHECK-NEXT:    vmov r2, r3, d8
 ; CHECK-NEXT:    vmov d9, r0, r1
-; CHECK-NEXT:    vmov.f64 d8, #2.000000e+00
+; CHECK-NEXT:    vmov.f64 d8, #5.000000e-01
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    mov r1, r3
 ; CHECK-NEXT:    bl ___floatundidf
 ; CHECK-NEXT:    vmov d16, r0, r1
-; CHECK-NEXT:    vdiv.f64 d1, d9, d8
-; CHECK-NEXT:    vdiv.f64 d0, d16, d8
+; CHECK-NEXT:    vmul.f64 d1, d9, d8
+; CHECK-NEXT:    vmul.f64 d0, d16, d8
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    pop {lr}
 ; CHECK-NEXT:    bx lr
@@ -196,19 +158,8 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) {
 define arm_aapcs_vfpcc <8 x float> @test7(<8 x i32> %in) nounwind {
 ; CHECK-LABEL: test7:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vmov.f32 s12, #8.000000e+00
-; CHECK-NEXT:    vcvt.f32.s32 q4, q0
-; CHECK-NEXT:    vcvt.f32.s32 q2, q1
-; CHECK-NEXT:    vdiv.f32 s3, s19, s12
-; CHECK-NEXT:    vdiv.f32 s7, s11, s12
-; CHECK-NEXT:    vdiv.f32 s2, s18, s12
-; CHECK-NEXT:    vdiv.f32 s6, s10, s12
-; CHECK-NEXT:    vdiv.f32 s1, s17, s12
-; CHECK-NEXT:    vdiv.f32 s5, s9, s12
-; CHECK-NEXT:    vdiv.f32 s0, s16, s12
-; CHECK-NEXT:    vdiv.f32 s4, s8, s12
-; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #3
+; CHECK-NEXT:    vcvt.f32.s32 q1, q1, #3
 ; CHECK-NEXT:    bx lr
 entry:
   %vcvt.i = sitofp <8 x i32> %in to <8 x float>
@@ -220,19 +171,8 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) {
 ; CHECK-LABEL: test8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.f32 s4, #2.000000e+00
-; CHECK-NEXT:    vcvt.f32.s32 q2, q0
-; CHECK-NEXT:    vdiv.f32 s2, s10, s4
-; CHECK-NEXT:    vdiv.f32 s1, s9, s4
-; CHECK-NEXT:    vdiv.f32 s0, s8, s4
-; CHECK-NEXT:    vldr s3, LCPI11_0
+; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #1
 ; CHECK-NEXT:    bx lr
-; CHECK-NEXT:    .p2align 2
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:    .data_region
-; CHECK-NEXT:  LCPI11_0:
-; CHECK-NEXT:    .long 0x7fc00000 @ float NaN
-; CHECK-NEXT:    .end_data_region
   %vcvt.i = sitofp <4 x i32> %in to <4 x float>
   %div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef>
   ret <4 x float> %div.i
@@ -241,19 +181,8 @@ define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) {
 define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) {
 ; CHECK-LABEL: test_illegal_int_to_fp:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.f32 s4, #4.000000e+00
-; CHECK-NEXT:    vcvt.f32.s32 q2, q0
-; CHECK-NEXT:    vdiv.f32 s2, s10, s4
-; CHECK-NEXT:    vdiv.f32 s1, s9, s4
-; CHECK-NEXT:    vdiv.f32 s0, s8, s4
-; CHECK-NEXT:    vldr s3, LCPI12_0
+; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #2
 ; CHECK-NEXT:    bx lr
-; CHECK-NEXT:    .p2align 2
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:    .data_region
-; CHECK-NEXT:  LCPI12_0:
-; CHECK-NEXT:    .long 0x7fc00000 @ float NaN
-; CHECK-NEXT:    .end_data_region
   %conv = sitofp <3 x i32> %in to <3 x float>
   %res = fdiv <3 x float> %conv, <float 4.0, float 4.0, float 4.0>
   ret <3 x float> %res

diff  --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
index 47ac8848a437d..ac65a1112be43 100644
--- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
+++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
@@ -1367,7 +1367,7 @@ define void @bcast_unfold_fdiv_v16f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT:    vbroadcastss {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT:    vbroadcastss {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB42_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -1386,7 +1386,7 @@ bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
   %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
   %tmp4 = load <16 x float>, ptr %tmp2, align 4
-  %tmp5 = fdiv <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+  %tmp5 = fdiv <16 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
   store <16 x float> %tmp5, ptr %tmp2, align 4
   %tmp7 = add i64 %tmp, 16
   %tmp8 = icmp eq i64 %tmp7, 1024
@@ -1400,7 +1400,7 @@ define void @bcast_unfold_fdiv_v8f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT:    vbroadcastss {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT:    vbroadcastss {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB43_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -1419,7 +1419,7 @@ bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
   %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
   %tmp4 = load <8 x float>, ptr %tmp2, align 4
-  %tmp5 = fdiv <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+  %tmp5 = fdiv <8 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
   store <8 x float> %tmp5, ptr %tmp2, align 4
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
@@ -1433,7 +1433,7 @@ define void @bcast_unfold_fdiv_v4f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB44_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -1451,7 +1451,7 @@ bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
   %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
   %tmp4 = load <4 x float>, ptr %tmp2, align 4
-  %tmp5 = fdiv <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+  %tmp5 = fdiv <4 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
   store <4 x float> %tmp5, ptr %tmp2, align 4
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
@@ -1465,7 +1465,7 @@ define void @bcast_unfold_fdiv_v8f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
-; CHECK-NEXT:    vbroadcastsd {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT:    vbroadcastsd {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB45_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -1484,7 +1484,7 @@ bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
   %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
   %tmp4 = load <8 x double>, ptr %tmp2, align 8
-  %tmp5 = fdiv <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+  %tmp5 = fdiv <8 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
   store <8 x double> %tmp5, ptr %tmp2, align 8
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
@@ -1498,7 +1498,7 @@ define void @bcast_unfold_fdiv_v4f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
-; CHECK-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB46_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -1517,7 +1517,7 @@ bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
   %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
   %tmp4 = load <4 x double>, ptr %tmp2, align 8
-  %tmp5 = fdiv <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+  %tmp5 = fdiv <4 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
   store <4 x double> %tmp5, ptr %tmp2, align 8
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
@@ -1531,7 +1531,7 @@ define void @bcast_unfold_fdiv_v2f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
-; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0]
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = [3.0E+0,3.0E+0]
 ; CHECK-NEXT:    # xmm0 = mem[0,0]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB47_1: # %bb1
@@ -1550,7 +1550,7 @@ bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
   %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
   %tmp4 = load <2 x double>, ptr %tmp2, align 8
-  %tmp5 = fdiv <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
+  %tmp5 = fdiv <2 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00>
   store <2 x double> %tmp5, ptr %tmp2, align 8
   %tmp7 = add i64 %tmp, 2
   %tmp8 = icmp eq i64 %tmp7, 1024

diff  --git a/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll
index 33a7ec9bfc794..ba09ba8b6402b 100644
--- a/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll
+++ b/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll
@@ -14,7 +14,7 @@
 define double @unsafe_fp_math_default0(double %x) {
 ; SAFE:      divsd
 ; UNSAFE:    mulsd
-  %div = fdiv double %x, 2.0
+  %div = fdiv double %x, 3.0
   ret double %div
 }
 
@@ -22,7 +22,7 @@ define double @unsafe_fp_math_default0(double %x) {
 define double @unsafe_fp_math_off(double %x) #0 {
 ; SAFE:      divsd
 ; UNSAFE:    divsd
-  %div = fdiv double %x, 2.0
+  %div = fdiv double %x, 3.0
   ret double %div
 }
 
@@ -31,7 +31,7 @@ define double @unsafe_fp_math_default1(double %x) {
 ; With unsafe math enabled, can change this div to a mul.
 ; SAFE:      divsd
 ; UNSAFE:    mulsd
-  %div = fdiv double %x, 2.0
+  %div = fdiv double %x, 3.0
   ret double %div
 }
 
@@ -39,7 +39,7 @@ define double @unsafe_fp_math_default1(double %x) {
 define double @unsafe_fp_math_on(double %x) #1 {
 ; SAFE:      mulsd
 ; UNSAFE:    mulsd
-  %div = fdiv double %x, 2.0
+  %div = fdiv double %x, 3.0
   ret double %div
 }
 
@@ -48,7 +48,7 @@ define double @unsafe_fp_math_default2(double %x) {
 ; With unsafe math enabled, can change this div to a mul.
 ; SAFE:      divsd
 ; UNSAFE:    mulsd
-  %div = fdiv double %x, 2.0
+  %div = fdiv double %x, 3.0
   ret double %div
 }
 


        


More information about the llvm-commits mailing list