[llvm] a284bdb - [DAG] Fold fdiv X, c2 -> fmul X, 1/c2 without AllowReciprocal if exact (#93882)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 9 04:28:25 PDT 2024
Author: David Green
Date: 2024-06-09T12:28:20+01:00
New Revision: a284bdb31146160352da905a888da738f2661b50
URL: https://github.com/llvm/llvm-project/commit/a284bdb31146160352da905a888da738f2661b50
DIFF: https://github.com/llvm/llvm-project/commit/a284bdb31146160352da905a888da738f2661b50.diff
LOG: [DAG] Fold fdiv X, c2 -> fmul X, 1/c2 without AllowReciprocal if exact (#93882)
This moves the combine of fdiv by constant to fmul out of an
'if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()' block,
so that it triggers if the divide is exact. An extra check for
Recip.isDenormal() is added as multiple places make reference
to it being unsafe or slow on certain platforms.
Added:
Modified:
llvm/include/llvm/ADT/APFloat.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/fcvt-fixed.ll
llvm/test/CodeGen/AArch64/fdiv-const.ll
llvm/test/CodeGen/AArch64/frem-power2.ll
llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
llvm/test/CodeGen/ARM/frem-power2.ll
llvm/test/CodeGen/ARM/vdiv_combine.ll
llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
llvm/test/CodeGen/X86/change-unsafe-fp-math.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 44a301ecc9928..78faadb30d9eb 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -964,6 +964,13 @@ class APFloat : public APFloatBase {
return Val;
}
+ /// Factory for Positive and Negative One.
+ ///
+ /// \param Negative True iff the number should be negative.
+ static APFloat getOne(const fltSemantics &Sem, bool Negative = false) {
+ return APFloat(Sem, Negative ? -1 : 1);
+ }
+
/// Factory for Positive and Negative Infinity.
///
/// \param Negative True iff the number should be negative.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e3bd4ea3ffd90..4fcbe08e4b2b9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17262,26 +17262,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue V = combineRepeatedFPDivisors(N))
return V;
- if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
- // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
- if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
- // Compute the reciprocal 1.0 / c2.
- const APFloat &N1APF = N1CFP->getValueAPF();
- APFloat Recip(N1APF.getSemantics(), 1); // 1.0
- APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
- // Only do the transform if the reciprocal is a legal fp immediate that
- // isn't too nasty (eg NaN, denormal, ...).
- if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
- (!LegalOperations ||
- // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
- // backend)... we should handle this gracefully after Legalize.
- // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
- TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT));
- }
+ // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
+ // the loss is acceptable with AllowReciprocal.
+ if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
+ // Compute the reciprocal 1.0 / c2.
+ const APFloat &N1APF = N1CFP->getValueAPF();
+ APFloat Recip = APFloat::getOne(N1APF.getSemantics());
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if (((st == APFloat::opOK && !Recip.isDenormal()) ||
+ (st == APFloat::opInexact &&
+ (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(Recip, DL, VT));
+ }
+ if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 296be831da762..7056a4d28fed3 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -412,10 +412,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
; CHECK-NO16-LABEL: scvtf_f16_i32_7:
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: scvtf s1, w0
-; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
; CHECK-NO16-NEXT: fcvt h1, s1
; CHECK-NO16-NEXT: fcvt s1, h1
-; CHECK-NO16-NEXT: fdiv s0, s1, s0
+; CHECK-NO16-NEXT: fmul s0, s1, s0
; CHECK-NO16-NEXT: fcvt h0, s0
; CHECK-NO16-NEXT: ret
;
@@ -432,10 +432,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
; CHECK-NO16-LABEL: scvtf_f16_i32_15:
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: scvtf s1, w0
-; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
+; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
; CHECK-NO16-NEXT: fcvt h1, s1
; CHECK-NO16-NEXT: fcvt s1, h1
-; CHECK-NO16-NEXT: fdiv s0, s1, s0
+; CHECK-NO16-NEXT: fmul s0, s1, s0
; CHECK-NO16-NEXT: fcvt h0, s0
; CHECK-NO16-NEXT: ret
;
@@ -452,10 +452,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
; CHECK-NO16-LABEL: scvtf_f16_i64_7:
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: scvtf s1, x0
-; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
; CHECK-NO16-NEXT: fcvt h1, s1
; CHECK-NO16-NEXT: fcvt s1, h1
-; CHECK-NO16-NEXT: fdiv s0, s1, s0
+; CHECK-NO16-NEXT: fmul s0, s1, s0
; CHECK-NO16-NEXT: fcvt h0, s0
; CHECK-NO16-NEXT: ret
;
@@ -472,10 +472,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
; CHECK-NO16-LABEL: scvtf_f16_i64_15:
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: scvtf s1, x0
-; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
+; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
; CHECK-NO16-NEXT: fcvt h1, s1
; CHECK-NO16-NEXT: fcvt s1, h1
-; CHECK-NO16-NEXT: fdiv s0, s1, s0
+; CHECK-NO16-NEXT: fmul s0, s1, s0
; CHECK-NO16-NEXT: fcvt h0, s0
; CHECK-NO16-NEXT: ret
;
@@ -574,10 +574,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
; CHECK-NO16-LABEL: ucvtf_f16_i32_7:
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: ucvtf s1, w0
-; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
; CHECK-NO16-NEXT: fcvt h1, s1
; CHECK-NO16-NEXT: fcvt s1, h1
-; CHECK-NO16-NEXT: fdiv s0, s1, s0
+; CHECK-NO16-NEXT: fmul s0, s1, s0
; CHECK-NO16-NEXT: fcvt h0, s0
; CHECK-NO16-NEXT: ret
;
@@ -594,10 +594,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
; CHECK-NO16-LABEL: ucvtf_f16_i32_15:
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: ucvtf s1, w0
-; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
+; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
; CHECK-NO16-NEXT: fcvt h1, s1
; CHECK-NO16-NEXT: fcvt s1, h1
-; CHECK-NO16-NEXT: fdiv s0, s1, s0
+; CHECK-NO16-NEXT: fmul s0, s1, s0
; CHECK-NO16-NEXT: fcvt h0, s0
; CHECK-NO16-NEXT: ret
;
@@ -614,10 +614,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
; CHECK-NO16-LABEL: ucvtf_f16_i64_7:
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: ucvtf s1, x0
-; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24
+; CHECK-NO16-NEXT: movi v0.2s, #60, lsl #24
; CHECK-NO16-NEXT: fcvt h1, s1
; CHECK-NO16-NEXT: fcvt s1, h1
-; CHECK-NO16-NEXT: fdiv s0, s1, s0
+; CHECK-NO16-NEXT: fmul s0, s1, s0
; CHECK-NO16-NEXT: fcvt h0, s0
; CHECK-NO16-NEXT: ret
;
@@ -634,10 +634,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
; CHECK-NO16-LABEL: ucvtf_f16_i64_15:
; CHECK-NO16: // %bb.0:
; CHECK-NO16-NEXT: ucvtf s1, x0
-; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24
+; CHECK-NO16-NEXT: movi v0.2s, #56, lsl #24
; CHECK-NO16-NEXT: fcvt h1, s1
; CHECK-NO16-NEXT: fcvt s1, h1
-; CHECK-NO16-NEXT: fdiv s0, s1, s0
+; CHECK-NO16-NEXT: fmul s0, s1, s0
; CHECK-NO16-NEXT: fcvt h0, s0
; CHECK-NO16-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fdiv-const.ll b/llvm/test/CodeGen/AArch64/fdiv-const.ll
index 5a8f733843401..7aa89db71adfe 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-const.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-const.ll
@@ -4,8 +4,8 @@
define float @divf32_2(float %a) nounwind {
; CHECK-LABEL: divf32_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fmov s1, #0.50000000
+; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: ret
%r = fdiv float %a, 2.0
ret float %r
@@ -46,8 +46,8 @@ define float @divf32_p75_arcp(float %a) nounwind {
define half @divf16_2(half %a) nounwind {
; CHECK-LABEL: divf16_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov h1, #2.00000000
-; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fmov h1, #0.50000000
+; CHECK-NEXT: fmul h0, h0, h1
; CHECK-NEXT: ret
%r = fdiv half %a, 2.0
ret half %r
@@ -67,9 +67,9 @@ define half @divf16_32768(half %a) nounwind {
define half @divf16_32768_arcp(half %a) nounwind {
; CHECK-LABEL: divf16_32768_arcp:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #512 // =0x200
+; CHECK-NEXT: mov w8, #30720 // =0x7800
; CHECK-NEXT: fmov h1, w8
-; CHECK-NEXT: fmul h0, h0, h1
+; CHECK-NEXT: fdiv h0, h0, h1
; CHECK-NEXT: ret
%r = fdiv arcp half %a, 32768.0
ret half %r
@@ -78,8 +78,8 @@ define half @divf16_32768_arcp(half %a) nounwind {
define double @divf64_2(double %a) nounwind {
; CHECK-LABEL: divf64_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d1, #2.00000000
-; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fmov d1, #0.50000000
+; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: ret
%r = fdiv double %a, 2.0
ret double %r
@@ -88,8 +88,8 @@ define double @divf64_2(double %a) nounwind {
define <4 x float> @divv4f32_2(<4 x float> %a) nounwind {
; CHECK-LABEL: divv4f32_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.4s, #64, lsl #24
-; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.4s, #63, lsl #24
+; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%r = fdiv <4 x float> %a, <float 2.0, float 2.0, float 2.0, float 2.0>
ret <4 x float> %r
@@ -141,9 +141,8 @@ define <4 x float> @divv4f32_24816(<4 x float> %a) nounwind {
define <vscale x 4 x float> @divnxv4f32_2(<vscale x 4 x float> %a) nounwind {
; CHECK-LABEL: divnxv4f32_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov z1.s, #2.00000000
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5
; CHECK-NEXT: ret
%r = fdiv <vscale x 4 x float> %a, splat (float 2.0)
ret <vscale x 4 x float> %r
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 4192745abd347..98276b68481a1 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -5,11 +5,12 @@
define float @frem2(float %x) {
; CHECK-SD-LABEL: frem2:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fmov s1, #0.50000000
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s1, s2, s1, s0
+; CHECK-SD-NEXT: fmov s2, #-2.00000000
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s1, s1, s2, s0
; CHECK-SD-NEXT: mvni v2.4s, #128, lsl #24
; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
@@ -27,10 +28,11 @@ entry:
define float @frem2_nsz(float %x) {
; CHECK-SD-LABEL: frem2_nsz:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: fmov s1, #0.50000000
+; CHECK-SD-NEXT: fmov s2, #-2.00000000
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz:
@@ -65,10 +67,11 @@ define float @frem2_abs(float %x) {
; CHECK-SD-LABEL: frem2_abs:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fabs s0, s0
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: fmov s1, #0.50000000
+; CHECK-SD-NEXT: fmov s2, #-2.00000000
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_abs:
@@ -85,9 +88,9 @@ entry:
define half @hrem2_nsz(half %x) {
; CHECK-SD-LABEL: hrem2_nsz:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov h1, #2.00000000
+; CHECK-SD-NEXT: fmov h1, #0.50000000
; CHECK-SD-NEXT: fmov h2, #-2.00000000
-; CHECK-SD-NEXT: fdiv h1, h0, h1
+; CHECK-SD-NEXT: fmul h1, h0, h1
; CHECK-SD-NEXT: frintz h1, h1
; CHECK-SD-NEXT: fmadd h0, h1, h2, h0
; CHECK-SD-NEXT: ret
@@ -112,10 +115,11 @@ entry:
define double @drem2_nsz(double %x) {
; CHECK-SD-LABEL: drem2_nsz:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov d1, #2.00000000
-; CHECK-SD-NEXT: fdiv d2, d0, d1
-; CHECK-SD-NEXT: frintz d2, d2
-; CHECK-SD-NEXT: fmsub d0, d2, d1, d0
+; CHECK-SD-NEXT: fmov d1, #0.50000000
+; CHECK-SD-NEXT: fmov d2, #-2.00000000
+; CHECK-SD-NEXT: fmul d1, d0, d1
+; CHECK-SD-NEXT: frintz d1, d1
+; CHECK-SD-NEXT: fmadd d0, d1, d2, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: drem2_nsz:
@@ -176,10 +180,11 @@ entry:
define float @fremm2_nsz(float %x) {
; CHECK-SD-LABEL: fremm2_nsz:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov s1, #-2.00000000
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: fmov s1, #-0.50000000
+; CHECK-SD-NEXT: fmov s2, #2.00000000
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fremm2_nsz:
@@ -195,10 +200,11 @@ define float @frem4_abs(float %x) {
; CHECK-SD-LABEL: frem4_abs:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fabs s0, s0
-; CHECK-SD-NEXT: fmov s1, #4.00000000
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: fmov s1, #0.25000000
+; CHECK-SD-NEXT: fmov s2, #-4.00000000
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4_abs:
@@ -216,10 +222,12 @@ define float @frem16_abs(float %x) {
; CHECK-SD-LABEL: frem16_abs:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fabs s0, s0
-; CHECK-SD-NEXT: fmov s1, #16.00000000
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: mov w8, #1031798784 // =0x3d800000
+; CHECK-SD-NEXT: fmov s2, #-16.00000000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem16_abs:
@@ -237,11 +245,13 @@ define float @frem4294967296_abs(float %x) {
; CHECK-SD-LABEL: frem4294967296_abs:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fabs s0, s0
-; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-SD-NEXT: mov w8, #796917760 // =0x2f800000
; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: mov w8, #-813694976 // =0xcf800000
+; CHECK-SD-NEXT: fmov s2, w8
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4294967296_abs:
@@ -260,11 +270,13 @@ define float @frem1152921504606846976_abs(float %x) {
; CHECK-SD-LABEL: frem1152921504606846976_abs:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fabs s0, s0
-; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000
; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: mov w8, #-578813952 // =0xdd800000
+; CHECK-SD-NEXT: fmov s2, w8
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_abs:
@@ -283,11 +295,13 @@ define float @frem4611686018427387904_abs(float %x) {
; CHECK-SD-LABEL: frem4611686018427387904_abs:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fabs s0, s0
-; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-SD-NEXT: mov w8, #545259520 // =0x20800000
; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: mov w8, #-562036736 // =0xde800000
+; CHECK-SD-NEXT: fmov s2, w8
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4611686018427387904_abs:
@@ -305,11 +319,12 @@ entry:
define float @frem9223372036854775808_abs(float %x) {
; CHECK-SD-LABEL: frem9223372036854775808_abs:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-SD-NEXT: movi v1.2s, #32, lsl #24
; CHECK-SD-NEXT: fabs s0, s0
-; CHECK-SD-NEXT: fdiv s2, s0, s1
-; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
+; CHECK-SD-NEXT: movi v2.2s, #223, lsl #24
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem9223372036854775808_abs:
@@ -326,11 +341,12 @@ entry:
define <4 x float> @frem2_vec(<4 x float> %x) {
; CHECK-SD-LABEL: frem2_vec:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
+; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24
+; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24
; CHECK-SD-NEXT: mov v3.16b, v0.16b
-; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: frintz v2.4s, v2.4s
-; CHECK-SD-NEXT: fmls v3.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v1.4s, v1.4s
+; CHECK-SD-NEXT: fmls v3.4s, v2.4s, v1.4s
; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24
; CHECK-SD-NEXT: bit v0.16b, v3.16b, v1.16b
; CHECK-SD-NEXT: ret
@@ -387,10 +403,11 @@ entry:
define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
; CHECK-SD-LABEL: frem2_nsz_vec:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
-; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: frintz v2.4s, v2.4s
-; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24
+; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24
+; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v1.4s, v1.4s
+; CHECK-SD-NEXT: fmls v0.4s, v2.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -445,12 +462,14 @@ entry:
define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
; CHECK-SD-LABEL: frem1152921504606846976_absv:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: mov w8, #562036736 // =0x21800000
; CHECK-SD-NEXT: fabs v0.4s, v0.4s
; CHECK-SD-NEXT: dup v1.4s, w8
-; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: frintz v2.4s, v2.4s
-; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: dup v2.4s, w8
+; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v1.4s, v1.4s
+; CHECK-SD-NEXT: fmls v0.4s, v2.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_absv:
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
index 5386ef425dcb5..64d4a0cf78501 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-phi-with-undef.ll
@@ -17,7 +17,7 @@ define amdgpu_ps float @uniform_phi_with_undef(float inreg %c, float %v, i32 %x,
; GCN-NEXT: s_mov_b32 exec_lo, s2
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: s_mov_b32 s2, 2.0
+; GCN-NEXT: s_mov_b32 s2, 0x40400000
; GCN-NEXT: v_div_scale_f32 v1, s3, s2, s2, v0
; GCN-NEXT: v_rcp_f32_e64 v2, v1
; GCN-NEXT: s_mov_b32 s3, 1.0
@@ -39,7 +39,7 @@ entry:
br i1 %cc, label %if, label %end
if:
- %v.if = fdiv float %v, 2.0
+ %v.if = fdiv float %v, 3.0
br label %end
end:
diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll
index 71c2c09c0105c..63ecd9fec7883 100644
--- a/llvm/test/CodeGen/ARM/frem-power2.ll
+++ b/llvm/test/CodeGen/ARM/frem-power2.ll
@@ -14,26 +14,28 @@ define float @frem4(float %x) {
;
; CHECK-FP-LABEL: frem4:
; CHECK-FP: @ %bb.0: @ %entry
-; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00
-; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vmov.f32 s0, #2.500000e-01
+; CHECK-FP-NEXT: vmov.f32 s2, #-4.000000e+00
+; CHECK-FP-NEXT: vmov s4, r0
; CHECK-FP-NEXT: lsrs r0, r0, #31
-; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0
-; CHECK-FP-NEXT: vrintz.f32 s4, s4
-; CHECK-FP-NEXT: vfms.f32 s2, s4, s0
-; CHECK-FP-NEXT: vmov r1, s2
+; CHECK-FP-NEXT: vmul.f32 s0, s4, s0
+; CHECK-FP-NEXT: vrintz.f32 s0, s0
+; CHECK-FP-NEXT: vfma.f32 s4, s0, s2
+; CHECK-FP-NEXT: vmov r1, s4
; CHECK-FP-NEXT: bfi r1, r0, #31, #1
; CHECK-FP-NEXT: mov r0, r1
; CHECK-FP-NEXT: bx lr
;
; CHECK-M33-LABEL: frem4:
; CHECK-M33: @ %bb.0: @ %entry
-; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00
-; CHECK-M33-NEXT: vmov s2, r0
+; CHECK-M33-NEXT: vmov.f32 s0, #2.500000e-01
+; CHECK-M33-NEXT: vmov.f32 s2, #-4.000000e+00
+; CHECK-M33-NEXT: vmov s4, r0
; CHECK-M33-NEXT: lsrs r0, r0, #31
-; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0
-; CHECK-M33-NEXT: vrintz.f32 s4, s4
-; CHECK-M33-NEXT: vmls.f32 s2, s4, s0
-; CHECK-M33-NEXT: vmov r1, s2
+; CHECK-M33-NEXT: vmul.f32 s0, s4, s0
+; CHECK-M33-NEXT: vrintz.f32 s0, s0
+; CHECK-M33-NEXT: vmla.f32 s4, s0, s2
+; CHECK-M33-NEXT: vmov r1, s4
; CHECK-M33-NEXT: bfi r1, r0, #31, #1
; CHECK-M33-NEXT: mov r0, r1
; CHECK-M33-NEXT: bx lr
@@ -53,22 +55,24 @@ define float @frem4_nsz(float %x) {
;
; CHECK-FP-LABEL: frem4_nsz:
; CHECK-FP: @ %bb.0: @ %entry
-; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00
-; CHECK-FP-NEXT: vmov s2, r0
-; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0
-; CHECK-FP-NEXT: vrintz.f32 s4, s4
-; CHECK-FP-NEXT: vfms.f32 s2, s4, s0
-; CHECK-FP-NEXT: vmov r0, s2
+; CHECK-FP-NEXT: vmov.f32 s0, #2.500000e-01
+; CHECK-FP-NEXT: vmov.f32 s2, #-4.000000e+00
+; CHECK-FP-NEXT: vmov s4, r0
+; CHECK-FP-NEXT: vmul.f32 s0, s4, s0
+; CHECK-FP-NEXT: vrintz.f32 s0, s0
+; CHECK-FP-NEXT: vfma.f32 s4, s0, s2
+; CHECK-FP-NEXT: vmov r0, s4
; CHECK-FP-NEXT: bx lr
;
; CHECK-M33-LABEL: frem4_nsz:
; CHECK-M33: @ %bb.0: @ %entry
-; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00
-; CHECK-M33-NEXT: vmov s2, r0
-; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0
-; CHECK-M33-NEXT: vrintz.f32 s4, s4
-; CHECK-M33-NEXT: vmls.f32 s2, s4, s0
-; CHECK-M33-NEXT: vmov r0, s2
+; CHECK-M33-NEXT: vmov.f32 s0, #2.500000e-01
+; CHECK-M33-NEXT: vmov.f32 s2, #-4.000000e+00
+; CHECK-M33-NEXT: vmov s4, r0
+; CHECK-M33-NEXT: vmul.f32 s0, s4, s0
+; CHECK-M33-NEXT: vrintz.f32 s0, s0
+; CHECK-M33-NEXT: vmla.f32 s4, s0, s2
+; CHECK-M33-NEXT: vmov r0, s4
; CHECK-M33-NEXT: bx lr
entry:
%fmod = frem nsz float %x, 4.0
diff --git a/llvm/test/CodeGen/ARM/vdiv_combine.ll b/llvm/test/CodeGen/ARM/vdiv_combine.ll
index 988844661085e..899487f9efb2c 100644
--- a/llvm/test/CodeGen/ARM/vdiv_combine.ll
+++ b/llvm/test/CodeGen/ARM/vdiv_combine.ll
@@ -5,10 +5,7 @@
define arm_aapcs_vfpcc <2 x float> @t1(<2 x i32> %vecinit2.i) nounwind {
; CHECK-LABEL: t1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, #8.000000e+00
-; CHECK-NEXT: vcvt.f32.s32 d2, d0
-; CHECK-NEXT: vdiv.f32 s1, s5, s2
-; CHECK-NEXT: vdiv.f32 s0, s4, s2
+; CHECK-NEXT: vcvt.f32.s32 d0, d0, #3
; CHECK-NEXT: bx lr
entry:
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -20,10 +17,7 @@ entry:
define arm_aapcs_vfpcc <2 x float> @t2(<2 x i32> %vecinit2.i) nounwind {
; CHECK-LABEL: t2:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s2, #8.000000e+00
-; CHECK-NEXT: vcvt.f32.u32 d2, d0
-; CHECK-NEXT: vdiv.f32 s1, s5, s2
-; CHECK-NEXT: vdiv.f32 s0, s4, s2
+; CHECK-NEXT: vcvt.f32.u32 d0, d0, #3
; CHECK-NEXT: bx lr
entry:
%vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -56,17 +50,10 @@ entry:
define arm_aapcs_vfpcc <2 x float> @t4(<2 x i32> %vecinit2.i) nounwind {
; CHECK-LABEL: t4:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcvt.f32.s32 d2, d0
-; CHECK-NEXT: vldr s2, LCPI3_0
-; CHECK-NEXT: vdiv.f32 s1, s5, s2
-; CHECK-NEXT: vdiv.f32 s0, s4, s2
+; CHECK-NEXT: vcvt.f32.s32 d16, d0
+; CHECK-NEXT: vmov.i32 d17, #0x2f000000
+; CHECK-NEXT: vmul.f32 d0, d16, d17
; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .data_region
-; CHECK-NEXT: LCPI3_0:
-; CHECK-NEXT: .long 0x50000000 @ float 8.58993459E+9
-; CHECK-NEXT: .end_data_region
entry:
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
%div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
@@ -77,17 +64,8 @@ entry:
define arm_aapcs_vfpcc <2 x float> @t5(<2 x i32> %vecinit2.i) nounwind {
; CHECK-LABEL: t5:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcvt.f32.s32 d2, d0
-; CHECK-NEXT: vldr s2, LCPI4_0
-; CHECK-NEXT: vdiv.f32 s1, s5, s2
-; CHECK-NEXT: vdiv.f32 s0, s4, s2
+; CHECK-NEXT: vcvt.f32.s32 d0, d0, #32
; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .data_region
-; CHECK-NEXT: LCPI4_0:
-; CHECK-NEXT: .long 0x4f800000 @ float 4.2949673E+9
-; CHECK-NEXT: .end_data_region
entry:
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
%div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
@@ -98,12 +76,7 @@ entry:
define arm_aapcs_vfpcc <4 x float> @t6(<4 x i32> %vecinit6.i) nounwind {
; CHECK-LABEL: t6:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.f32 s4, #8.000000e+00
-; CHECK-NEXT: vcvt.f32.s32 q2, q0
-; CHECK-NEXT: vdiv.f32 s3, s11, s4
-; CHECK-NEXT: vdiv.f32 s2, s10, s4
-; CHECK-NEXT: vdiv.f32 s1, s9, s4
-; CHECK-NEXT: vdiv.f32 s0, s8, s4
+; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3
; CHECK-NEXT: bx lr
entry:
%vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float>
@@ -115,12 +88,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) {
; CHECK-LABEL: fix_unsigned_i16_to_float:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.u16 q8, d0
-; CHECK-NEXT: vmov.f32 s4, #2.000000e+00
-; CHECK-NEXT: vcvt.f32.u32 q2, q8
-; CHECK-NEXT: vdiv.f32 s3, s11, s4
-; CHECK-NEXT: vdiv.f32 s2, s10, s4
-; CHECK-NEXT: vdiv.f32 s1, s9, s4
-; CHECK-NEXT: vdiv.f32 s0, s8, s4
+; CHECK-NEXT: vcvt.f32.u32 q0, q8, #1
; CHECK-NEXT: bx lr
%conv = uitofp <4 x i16> %in to <4 x float>
%shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
@@ -131,12 +99,7 @@ define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) {
; CHECK-LABEL: fix_signed_i16_to_float:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmovl.s16 q8, d0
-; CHECK-NEXT: vmov.f32 s4, #2.000000e+00
-; CHECK-NEXT: vcvt.f32.s32 q2, q8
-; CHECK-NEXT: vdiv.f32 s3, s11, s4
-; CHECK-NEXT: vdiv.f32 s2, s10, s4
-; CHECK-NEXT: vdiv.f32 s1, s9, s4
-; CHECK-NEXT: vdiv.f32 s0, s8, s4
+; CHECK-NEXT: vcvt.f32.s32 q0, q8, #1
; CHECK-NEXT: bx lr
%conv = sitofp <4 x i16> %in to <4 x float>
%shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
@@ -152,13 +115,12 @@ define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float(<2 x i64> %in) {
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl ___floatundisf
; CHECK-NEXT: vmov r2, r1, d8
-; CHECK-NEXT: vmov s18, r0
-; CHECK-NEXT: vmov.f32 s16, #2.000000e+00
+; CHECK-NEXT: vmov s19, r0
+; CHECK-NEXT: vmov.i32 d8, #0x3f000000
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl ___floatundisf
-; CHECK-NEXT: vmov s2, r0
-; CHECK-NEXT: vdiv.f32 s1, s18, s16
-; CHECK-NEXT: vdiv.f32 s0, s2, s16
+; CHECK-NEXT: vmov s18, r0
+; CHECK-NEXT: vmul.f32 d0, d9, d8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {lr}
; CHECK-NEXT: bx lr
@@ -177,13 +139,13 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) {
; CHECK-NEXT: bl ___floatundidf
; CHECK-NEXT: vmov r2, r3, d8
; CHECK-NEXT: vmov d9, r0, r1
-; CHECK-NEXT: vmov.f64 d8, #2.000000e+00
+; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: bl ___floatundidf
; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vdiv.f64 d1, d9, d8
-; CHECK-NEXT: vdiv.f64 d0, d16, d8
+; CHECK-NEXT: vmul.f64 d1, d9, d8
+; CHECK-NEXT: vmul.f64 d0, d16, d8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {lr}
; CHECK-NEXT: bx lr
@@ -196,19 +158,8 @@ define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) {
define arm_aapcs_vfpcc <8 x float> @test7(<8 x i32> %in) nounwind {
; CHECK-LABEL: test7:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: vmov.f32 s12, #8.000000e+00
-; CHECK-NEXT: vcvt.f32.s32 q4, q0
-; CHECK-NEXT: vcvt.f32.s32 q2, q1
-; CHECK-NEXT: vdiv.f32 s3, s19, s12
-; CHECK-NEXT: vdiv.f32 s7, s11, s12
-; CHECK-NEXT: vdiv.f32 s2, s18, s12
-; CHECK-NEXT: vdiv.f32 s6, s10, s12
-; CHECK-NEXT: vdiv.f32 s1, s17, s12
-; CHECK-NEXT: vdiv.f32 s5, s9, s12
-; CHECK-NEXT: vdiv.f32 s0, s16, s12
-; CHECK-NEXT: vdiv.f32 s4, s8, s12
-; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3
+; CHECK-NEXT: vcvt.f32.s32 q1, q1, #3
; CHECK-NEXT: bx lr
entry:
%vcvt.i = sitofp <8 x i32> %in to <8 x float>
@@ -220,19 +171,8 @@ entry:
define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) {
; CHECK-LABEL: test8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.f32 s4, #2.000000e+00
-; CHECK-NEXT: vcvt.f32.s32 q2, q0
-; CHECK-NEXT: vdiv.f32 s2, s10, s4
-; CHECK-NEXT: vdiv.f32 s1, s9, s4
-; CHECK-NEXT: vdiv.f32 s0, s8, s4
-; CHECK-NEXT: vldr s3, LCPI11_0
+; CHECK-NEXT: vcvt.f32.s32 q0, q0, #1
; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .data_region
-; CHECK-NEXT: LCPI11_0:
-; CHECK-NEXT: .long 0x7fc00000 @ float NaN
-; CHECK-NEXT: .end_data_region
%vcvt.i = sitofp <4 x i32> %in to <4 x float>
%div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef>
ret <4 x float> %div.i
@@ -241,19 +181,8 @@ define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) {
define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) {
; CHECK-LABEL: test_illegal_int_to_fp:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.f32 s4, #4.000000e+00
-; CHECK-NEXT: vcvt.f32.s32 q2, q0
-; CHECK-NEXT: vdiv.f32 s2, s10, s4
-; CHECK-NEXT: vdiv.f32 s1, s9, s4
-; CHECK-NEXT: vdiv.f32 s0, s8, s4
-; CHECK-NEXT: vldr s3, LCPI12_0
+; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2
; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .data_region
-; CHECK-NEXT: LCPI12_0:
-; CHECK-NEXT: .long 0x7fc00000 @ float NaN
-; CHECK-NEXT: .end_data_region
%conv = sitofp <3 x i32> %in to <3 x float>
%res = fdiv <3 x float> %conv, <float 4.0, float 4.0, float 4.0>
ret <3 x float> %res
diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
index 47ac8848a437d..ac65a1112be43 100644
--- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
+++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
@@ -1367,7 +1367,7 @@ define void @bcast_unfold_fdiv_v16f32(ptr nocapture %arg) {
; CHECK-LABEL: bcast_unfold_fdiv_v16f32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB42_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -1386,7 +1386,7 @@ bb1: ; preds = %bb1, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
%tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
%tmp4 = load <16 x float>, ptr %tmp2, align 4
- %tmp5 = fdiv <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+ %tmp5 = fdiv <16 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
store <16 x float> %tmp5, ptr %tmp2, align 4
%tmp7 = add i64 %tmp, 16
%tmp8 = icmp eq i64 %tmp7, 1024
@@ -1400,7 +1400,7 @@ define void @bcast_unfold_fdiv_v8f32(ptr nocapture %arg) {
; CHECK-LABEL: bcast_unfold_fdiv_v8f32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB43_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -1419,7 +1419,7 @@ bb1: ; preds = %bb1, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
%tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
%tmp4 = load <8 x float>, ptr %tmp2, align 4
- %tmp5 = fdiv <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+ %tmp5 = fdiv <8 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
store <8 x float> %tmp5, ptr %tmp2, align 4
%tmp7 = add i64 %tmp, 8
%tmp8 = icmp eq i64 %tmp7, 1024
@@ -1433,7 +1433,7 @@ define void @bcast_unfold_fdiv_v4f32(ptr nocapture %arg) {
; CHECK-LABEL: bcast_unfold_fdiv_v4f32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
-; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB44_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -1451,7 +1451,7 @@ bb1: ; preds = %bb1, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
%tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
%tmp4 = load <4 x float>, ptr %tmp2, align 4
- %tmp5 = fdiv <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+ %tmp5 = fdiv <4 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
store <4 x float> %tmp5, ptr %tmp2, align 4
%tmp7 = add i64 %tmp, 4
%tmp8 = icmp eq i64 %tmp7, 1024
@@ -1465,7 +1465,7 @@ define void @bcast_unfold_fdiv_v8f64(ptr nocapture %arg) {
; CHECK-LABEL: bcast_unfold_fdiv_v8f64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
-; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB45_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -1484,7 +1484,7 @@ bb1: ; preds = %bb1, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
%tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
%tmp4 = load <8 x double>, ptr %tmp2, align 8
- %tmp5 = fdiv <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+ %tmp5 = fdiv <8 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
store <8 x double> %tmp5, ptr %tmp2, align 8
%tmp7 = add i64 %tmp, 8
%tmp8 = icmp eq i64 %tmp7, 1024
@@ -1498,7 +1498,7 @@ define void @bcast_unfold_fdiv_v4f64(ptr nocapture %arg) {
; CHECK-LABEL: bcast_unfold_fdiv_v4f64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
-; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm0 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB46_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -1517,7 +1517,7 @@ bb1: ; preds = %bb1, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
%tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
%tmp4 = load <4 x double>, ptr %tmp2, align 8
- %tmp5 = fdiv <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+ %tmp5 = fdiv <4 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
store <4 x double> %tmp5, ptr %tmp2, align 8
%tmp7 = add i64 %tmp, 4
%tmp8 = icmp eq i64 %tmp7, 1024
@@ -1531,7 +1531,7 @@ define void @bcast_unfold_fdiv_v2f64(ptr nocapture %arg) {
; CHECK-LABEL: bcast_unfold_fdiv_v2f64:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000
-; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [2.0E+0,2.0E+0]
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3.0E+0,3.0E+0]
; CHECK-NEXT: # xmm0 = mem[0,0]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB47_1: # %bb1
@@ -1550,7 +1550,7 @@ bb1: ; preds = %bb1, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
%tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
%tmp4 = load <2 x double>, ptr %tmp2, align 8
- %tmp5 = fdiv <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
+ %tmp5 = fdiv <2 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00>
store <2 x double> %tmp5, ptr %tmp2, align 8
%tmp7 = add i64 %tmp, 2
%tmp8 = icmp eq i64 %tmp7, 1024
diff --git a/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll
index 33a7ec9bfc794..ba09ba8b6402b 100644
--- a/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll
+++ b/llvm/test/CodeGen/X86/change-unsafe-fp-math.ll
@@ -14,7 +14,7 @@
define double @unsafe_fp_math_default0(double %x) {
; SAFE: divsd
; UNSAFE: mulsd
- %div = fdiv double %x, 2.0
+ %div = fdiv double %x, 3.0
ret double %div
}
@@ -22,7 +22,7 @@ define double @unsafe_fp_math_default0(double %x) {
define double @unsafe_fp_math_off(double %x) #0 {
; SAFE: divsd
; UNSAFE: divsd
- %div = fdiv double %x, 2.0
+ %div = fdiv double %x, 3.0
ret double %div
}
@@ -31,7 +31,7 @@ define double @unsafe_fp_math_default1(double %x) {
; With unsafe math enabled, can change this div to a mul.
; SAFE: divsd
; UNSAFE: mulsd
- %div = fdiv double %x, 2.0
+ %div = fdiv double %x, 3.0
ret double %div
}
@@ -39,7 +39,7 @@ define double @unsafe_fp_math_default1(double %x) {
define double @unsafe_fp_math_on(double %x) #1 {
; SAFE: mulsd
; UNSAFE: mulsd
- %div = fdiv double %x, 2.0
+ %div = fdiv double %x, 3.0
ret double %div
}
@@ -48,7 +48,7 @@ define double @unsafe_fp_math_default2(double %x) {
; With unsafe math enabled, can change this div to a mul.
; SAFE: divsd
; UNSAFE: mulsd
- %div = fdiv double %x, 2.0
+ %div = fdiv double %x, 3.0
ret double %div
}
More information about the llvm-commits
mailing list