[llvm] [DAG] Lower frem of power-2 using div/trunc/mul+sub (PR #91148)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri May 10 03:18:02 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/91148
>From 532116aa3e1584930b299faa545d019d26a4391d Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 7 May 2024 13:50:50 +0100
Subject: [PATCH 1/5] [DAG] Lower frem of power-2 using div/trunk/mul+sub.
If we are lowering a frem and the divisor is known to me an integer power-2, we
can use the formula 'frem = x - trunc(x / d) * d'. This avoids the more
expensive call to fmod. The results are identical as fmod so long as d is a
power-2 (so the mul does not round incorrectly), and the sign of the return is
either always positive or signed-zero not important (nsz).
Unfortunately Alive2 does not handle this well at the moment. I was using
exhaustive checking to test this, hopefully I didn't make a mistake in it
(https://gist.github.com/davemgreen/6078015f30d3bacd1e9572f8db5d4b64).
I found this in cpythons implementation of float_pow. I currently added it as a
DAG combine for frem with power-2 fp constants, with some extra utility
functions for checking if a floating-point value is known non-negative or a
integer power-2.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 8 +
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 17 +
llvm/test/CodeGen/AArch64/frem-power2.ll | 388 +++++++++++-------
llvm/test/CodeGen/ARM/frem-power2.ll | 18 +-
5 files changed, 285 insertions(+), 163 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 4b1b58d4af0bb..bc4200a68f2d8 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1996,6 +1996,10 @@ class SelectionDAG {
/// is set.
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth = 0) const;
+ /// Test if the given _fp_ value is known to be an integer power-of-2, either
+ /// positive or negative.
+ bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth = 0) const;
+
/// Return the number of times the sign bit of the register is replicated into
/// the other bits. We know that at least 1 bit is always equal to the sign
/// bit (itself), but other cases can give us information. For example,
@@ -2111,6 +2115,10 @@ class SelectionDAG {
/// Test whether the given SDValue is known to contain non-zero value(s).
bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;
+ /// Test whether the given float value is known to not be negative. 0.0 is
+ /// considered non-negative, -0.0 is considered negative.
+ bool isKnownNonNegativeFP(SDValue Op) const;
+
/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
/// other positive zero.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fddc97d8901a2..5fe7ab0188254 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17365,17 +17365,32 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ SDLoc DL(N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
+ // power of 2.
+ if (DAG.isKnownToBeAPowerOfTwoFP(N1) &&
+ (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0)) &&
+ !TLI.isOperationLegal(ISD::FREM, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT)) {
+ SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
+ SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
+ SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index eef5acd032345..af5a3c8fde3a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4373,6 +4373,16 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
return false;
}
+bool SelectionDAG::isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth) const {
+ if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Val, true))
+ return C1->getValueAPF().getExactLog2Abs() >= 0;
+
+ if (Val.getOpcode() == ISD::UINT_TO_FP || Val.getOpcode() == ISD::SINT_TO_FP)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
+ return false;
+}
+
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
@@ -5555,6 +5565,13 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return computeKnownBits(Op, Depth).isNonZero();
}
+bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
+ if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
+ return !C1->isNegative();
+
+ return Op.getOpcode() == ISD::FABS;
+}
+
bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
// Check the obvious case.
if (A == B) return true;
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 5d627fcd6b656..94a9f701e1f57 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -13,31 +13,59 @@ entry:
}
define float @frem2_nsz(float %x) {
-; CHECK-LABEL: frem2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fdiv s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fadd s1, s1, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, 2.0
ret float %fmod
}
define float @frem2_fast(float %x) {
-; CHECK-LABEL: frem2_fast:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_fast:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #0.50000000
+; CHECK-SD-NEXT: fmov s2, #-2.00000000
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_fast:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem fast float %x, 2.0
ret float %fmod
}
define float @frem2_abs(float %x) {
-; CHECK-LABEL: frem2_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fdiv s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fadd s1, s1, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 2.0
@@ -47,14 +75,11 @@ entry:
define half @hrem2_nsz(half %x) {
; CHECK-SD-LABEL: hrem2_nsz:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: fcvt s0, h0
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fcvt h0, s0
-; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: fmov h1, #2.00000000
+; CHECK-SD-NEXT: fdiv h1, h0, h1
+; CHECK-SD-NEXT: frintz h1, h1
+; CHECK-SD-NEXT: fadd h1, h1, h1
+; CHECK-SD-NEXT: fsub h0, h0, h1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hrem2_nsz:
@@ -75,10 +100,19 @@ entry:
}
define double @drem2_nsz(double %x) {
-; CHECK-LABEL: drem2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov d1, #2.00000000
-; CHECK-NEXT: b fmod
+; CHECK-SD-LABEL: drem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov d1, #2.00000000
+; CHECK-SD-NEXT: fdiv d1, d0, d1
+; CHECK-SD-NEXT: frintz d1, d1
+; CHECK-SD-NEXT: fadd d1, d1, d1
+; CHECK-SD-NEXT: fsub d0, d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: drem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d1, #2.00000000
+; CHECK-GI-NEXT: b fmod
entry:
%fmod = frem nsz double %x, 2.0
ret double %fmod
@@ -105,10 +139,16 @@ entry:
}
define float @frem1_nsz(float %x) {
-; CHECK-LABEL: frem1_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #1.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem1_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frintz s1, s0
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #1.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, 1.0
ret float %fmod
@@ -125,21 +165,40 @@ entry:
}
define float @fremm2_nsz(float %x) {
-; CHECK-LABEL: fremm2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #-2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: fremm2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #-2.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fremm2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #-2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, -2.0
ret float %fmod
}
define float @frem4_abs(float %x) {
-; CHECK-LABEL: frem4_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #4.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #4.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #4.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4.0
@@ -147,11 +206,21 @@ entry:
}
define float @frem16_abs(float %x) {
-; CHECK-LABEL: frem16_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #16.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem16_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #16.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem16_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #16.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 16.0
@@ -159,12 +228,23 @@ entry:
}
define float @frem4294967296_abs(float %x) {
-; CHECK-LABEL: frem4294967296_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1333788672 // =0x4f800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4294967296_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4294967296_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4294967296.0
@@ -172,12 +252,23 @@ entry:
}
define float @frem1152921504606846976_abs(float %x) {
-; CHECK-LABEL: frem1152921504606846976_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1568669696 // =0x5d800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem1152921504606846976_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1152921504606846976_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 1152921504606846976.0
@@ -185,12 +276,23 @@ entry:
}
define float @frem4611686018427387904_abs(float %x) {
-; CHECK-LABEL: frem4611686018427387904_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1585446912 // =0x5e800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4611686018427387904_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4611686018427387904_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4611686018427387904.0
@@ -198,11 +300,21 @@ entry:
}
define float @frem9223372036854775808_abs(float %x) {
-; CHECK-LABEL: frem9223372036854775808_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: movi v1.2s, #95, lsl #24
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem9223372036854775808_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem9223372036854775808_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 9223372036854775808.0
@@ -212,42 +324,11 @@ entry:
define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
; CHECK-SD-LABEL: frem2_nsz_vec:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #48
-; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[2]
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
+; CHECK-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v1.4s, v1.4s
+; CHECK-SD-NEXT: fadd v1.4s, v1.4s, v1.4s
+; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -302,48 +383,13 @@ entry:
define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
; CHECK-SD-LABEL: frem1152921504606846976_absv:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #48
-; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w30, -8
-; CHECK-SD-NEXT: .cfi_offset b8, -16
-; CHECK-SD-NEXT: fabs v0.4s, v0.4s
; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
-; CHECK-SD-NEXT: fmov s8, w8
-; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[2]
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: fabs v0.4s, v0.4s
+; CHECK-SD-NEXT: dup v1.4s, w8
+; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v2.4s, v2.4s
+; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_absv:
@@ -401,12 +447,23 @@ entry:
}
define float @frem2_nsz_sitofp(float %x, i32 %sa) {
-; CHECK-LABEL: frem2_nsz_sitofp:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: lsl w8, w8, w0
-; CHECK-NEXT: scvtf s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz_sitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: scvtf s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_sitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsl w8, w8, w0
+; CHECK-GI-NEXT: scvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%s = shl i32 1, %sa
%y = sitofp i32 %s to float
@@ -415,12 +472,23 @@ entry:
}
define float @frem2_nsz_uitofp(float %x, i32 %sa) {
-; CHECK-LABEL: frem2_nsz_uitofp:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: lsl w8, w8, w0
-; CHECK-NEXT: ucvtf s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz_uitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: ucvtf s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_uitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsl w8, w8, w0
+; CHECK-GI-NEXT: ucvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%s = shl i32 1, %sa
%y = uitofp i32 %s to float
@@ -432,10 +500,14 @@ define float @frem2_const_sitofp(float %x, i32 %sa) {
; CHECK-SD-LABEL: frem2_const_sitofp:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #1 // =0x1
-; CHECK-SD-NEXT: fmov s0, #12.50000000
+; CHECK-SD-NEXT: fmov s1, #12.50000000
; CHECK-SD-NEXT: lsl w8, w8, w0
-; CHECK-SD-NEXT: scvtf s1, w8
-; CHECK-SD-NEXT: b fmodf
+; CHECK-SD-NEXT: scvtf s0, w8
+; CHECK-SD-NEXT: fdiv s2, s1, s0
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s0, s2, s0
+; CHECK-SD-NEXT: fsub s0, s1, s0
+; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_const_sitofp:
; CHECK-GI: // %bb.0: // %entry
diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll
index 8052c8c35bcfb..efce515af439c 100644
--- a/llvm/test/CodeGen/ARM/frem-power2.ll
+++ b/llvm/test/CodeGen/ARM/frem-power2.ll
@@ -37,13 +37,23 @@ define float @frem4_nsz(float %x) {
;
; CHECK-FP-LABEL: frem4_nsz:
; CHECK-FP: @ %bb.0: @ %entry
-; CHECK-FP-NEXT: mov.w r1, #1082130432
-; CHECK-FP-NEXT: b fmodf
+; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0
+; CHECK-FP-NEXT: vrintz.f32 s4, s4
+; CHECK-FP-NEXT: vmls.f32 s2, s4, s0
+; CHECK-FP-NEXT: vmov r0, s2
+; CHECK-FP-NEXT: bx lr
;
; CHECK-M33-LABEL: frem4_nsz:
; CHECK-M33: @ %bb.0: @ %entry
-; CHECK-M33-NEXT: mov.w r1, #1082130432
-; CHECK-M33-NEXT: b fmodf
+; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00
+; CHECK-M33-NEXT: vmov s2, r0
+; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0
+; CHECK-M33-NEXT: vrintz.f32 s4, s4
+; CHECK-M33-NEXT: vmls.f32 s2, s4, s0
+; CHECK-M33-NEXT: vmov r0, s2
+; CHECK-M33-NEXT: bx lr
entry:
%fmod = frem nsz float %x, 4.0
ret float %fmod
>From f3a99cc901732fe4a824d54375b93b0d2a04054a Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 7 May 2024 16:12:59 +0100
Subject: [PATCH 2/5] Do the legality checks first
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5fe7ab0188254..beee8e8b949c7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17379,12 +17379,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
// Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
// power of 2.
- if (DAG.isKnownToBeAPowerOfTwoFP(N1) &&
- (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0)) &&
- !TLI.isOperationLegal(ISD::FREM, VT) &&
+ if (!TLI.isOperationLegal(ISD::FREM, VT) &&
TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
- TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT)) {
+ TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
+ DAG.isKnownToBeAPowerOfTwoFP(N1) &&
+ (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
>From 52ac9860b8f3a8c73ea6401e9fcb09fbac94e864 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 9 May 2024 11:53:42 +0100
Subject: [PATCH 3/5] Use FMA
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 ++-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
llvm/test/CodeGen/AArch64/frem-power2.ll | 65 +++++++------------
llvm/test/CodeGen/ARM/frem-power2.ll | 2 +-
4 files changed, 34 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index beee8e8b949c7..9f5dc24e2376c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17387,8 +17387,13 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
(Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
- SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
- return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
+ if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+ return DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
+ N1, N0);
+ } else {
+ SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
+ }
}
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index af5a3c8fde3a7..3b950f8ace5da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5567,7 +5567,7 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
- return !C1->isNegative();
+ return !C1->isNegative() && !C1->isNaN();
return Op.getOpcode() == ISD::FABS;
}
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 94a9f701e1f57..402e03c5e2652 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -16,10 +16,9 @@ define float @frem2_nsz(float %x) {
; CHECK-SD-LABEL: frem2_nsz:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: fdiv s1, s0, s1
-; CHECK-SD-NEXT: frintz s1, s1
-; CHECK-SD-NEXT: fadd s1, s1, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz:
@@ -55,10 +54,9 @@ define float @frem2_abs(float %x) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fabs s0, s0
; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: fdiv s1, s0, s1
-; CHECK-SD-NEXT: frintz s1, s1
-; CHECK-SD-NEXT: fadd s1, s1, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_abs:
@@ -76,10 +74,10 @@ define half @hrem2_nsz(half %x) {
; CHECK-SD-LABEL: hrem2_nsz:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov h1, #2.00000000
+; CHECK-SD-NEXT: fmov h2, #-2.00000000
; CHECK-SD-NEXT: fdiv h1, h0, h1
; CHECK-SD-NEXT: frintz h1, h1
-; CHECK-SD-NEXT: fadd h1, h1, h1
-; CHECK-SD-NEXT: fsub h0, h0, h1
+; CHECK-SD-NEXT: fmadd h0, h1, h2, h0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hrem2_nsz:
@@ -103,10 +101,9 @@ define double @drem2_nsz(double %x) {
; CHECK-SD-LABEL: drem2_nsz:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov d1, #2.00000000
-; CHECK-SD-NEXT: fdiv d1, d0, d1
-; CHECK-SD-NEXT: frintz d1, d1
-; CHECK-SD-NEXT: fadd d1, d1, d1
-; CHECK-SD-NEXT: fsub d0, d0, d1
+; CHECK-SD-NEXT: fdiv d2, d0, d1
+; CHECK-SD-NEXT: frintz d2, d2
+; CHECK-SD-NEXT: fmsub d0, d2, d1, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: drem2_nsz:
@@ -170,8 +167,7 @@ define float @fremm2_nsz(float %x) {
; CHECK-SD-NEXT: fmov s1, #-2.00000000
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fremm2_nsz:
@@ -190,8 +186,7 @@ define float @frem4_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, #4.00000000
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4_abs:
@@ -212,8 +207,7 @@ define float @frem16_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, #16.00000000
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem16_abs:
@@ -235,8 +229,7 @@ define float @frem4294967296_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4294967296_abs:
@@ -259,8 +252,7 @@ define float @frem1152921504606846976_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_abs:
@@ -283,8 +275,7 @@ define float @frem4611686018427387904_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4611686018427387904_abs:
@@ -306,8 +297,7 @@ define float @frem9223372036854775808_abs(float %x) {
; CHECK-SD-NEXT: fabs s0, s0
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem9223372036854775808_abs:
@@ -325,10 +315,9 @@ define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
; CHECK-SD-LABEL: frem2_nsz_vec:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
-; CHECK-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: frintz v1.4s, v1.4s
-; CHECK-SD-NEXT: fadd v1.4s, v1.4s, v1.4s
-; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v2.4s, v2.4s
+; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -388,8 +377,7 @@ define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
; CHECK-SD-NEXT: dup v1.4s, w8
; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: frintz v2.4s, v2.4s
-; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v1.4s
-; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_absv:
@@ -454,8 +442,7 @@ define float @frem2_nsz_sitofp(float %x, i32 %sa) {
; CHECK-SD-NEXT: scvtf s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_sitofp:
@@ -479,8 +466,7 @@ define float @frem2_nsz_uitofp(float %x, i32 %sa) {
; CHECK-SD-NEXT: ucvtf s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_uitofp:
@@ -505,8 +491,7 @@ define float @frem2_const_sitofp(float %x, i32 %sa) {
; CHECK-SD-NEXT: scvtf s0, w8
; CHECK-SD-NEXT: fdiv s2, s1, s0
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s0, s2, s0
-; CHECK-SD-NEXT: fsub s0, s1, s0
+; CHECK-SD-NEXT: fmsub s0, s2, s0, s1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_const_sitofp:
diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll
index efce515af439c..7f52943175ac9 100644
--- a/llvm/test/CodeGen/ARM/frem-power2.ll
+++ b/llvm/test/CodeGen/ARM/frem-power2.ll
@@ -41,7 +41,7 @@ define float @frem4_nsz(float %x) {
; CHECK-FP-NEXT: vmov s2, r0
; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0
; CHECK-FP-NEXT: vrintz.f32 s4, s4
-; CHECK-FP-NEXT: vmls.f32 s2, s4, s0
+; CHECK-FP-NEXT: vfms.f32 s2, s4, s0
; CHECK-FP-NEXT: vmov r0, s2
; CHECK-FP-NEXT: bx lr
;
>From b6bbf9cd086cc36376f85cfd953fdee515de30ab Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 9 May 2024 12:37:41 +0100
Subject: [PATCH 4/5] No else after return
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +++-----
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9f5dc24e2376c..002576e43e3be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17387,13 +17387,11 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
(Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
- if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+ if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT))
return DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
N1, N0);
- } else {
- SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
- return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
- }
+ SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
}
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3b950f8ace5da..af5a3c8fde3a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5567,7 +5567,7 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
- return !C1->isNegative() && !C1->isNaN();
+ return !C1->isNegative();
return Op.getOpcode() == ISD::FABS;
}
>From d5013f18bffdf7f1ba3e85e839b887cbb051e339 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 10 May 2024 11:17:50 +0100
Subject: [PATCH 5/5] isKnownPositiveOrNaNFP
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +++---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index bc4200a68f2d8..3e4f0e1f6d3a0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2115,9 +2115,9 @@ class SelectionDAG {
/// Test whether the given SDValue is known to contain non-zero value(s).
bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;
- /// Test whether the given float value is known to not be negative. 0.0 is
- /// considered non-negative, -0.0 is considered negative.
- bool isKnownNonNegativeFP(SDValue Op) const;
+ /// Test whether the given float value is known to be positive. +0.0, +inf and
+ /// +nan are considered positive, -0.0, -inf and -nan are not.
+ bool isKnownPositiveOrNaNFP(SDValue Op) const;
/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 002576e43e3be..394de9203e5ab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17384,7 +17384,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
DAG.isKnownToBeAPowerOfTwoFP(N1) &&
- (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
+ (Flags.hasNoSignedZeros() || DAG.isKnownPositiveOrNaNFP(N0))) {
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index af5a3c8fde3a7..04584f40547db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5565,7 +5565,7 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return computeKnownBits(Op, Depth).isNonZero();
}
-bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
+bool SelectionDAG::isKnownPositiveOrNaNFP(SDValue Op) const {
if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
return !C1->isNegative();
More information about the llvm-commits
mailing list