[llvm] [DAG] Lower frem of power-2 using div/trunc/mul+sub (PR #91148)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 06:10:51 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/91148
>From bb658ae13cb2f64c562acde29b7ea273fd0091a4 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 5 May 2024 21:47:07 +0100
Subject: [PATCH 1/6] [AArch64] Add tests for frem power2 lowering. NFC
---
llvm/test/CodeGen/AArch64/frem-power2.ll | 479 +++++++++++++++++++++++
llvm/test/CodeGen/ARM/frem-power2.ll | 50 +++
2 files changed, 529 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/frem-power2.ll
create mode 100644 llvm/test/CodeGen/ARM/frem-power2.ll
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
new file mode 100644
index 0000000000000..5d627fcd6b656
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -0,0 +1,479 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define float @frem2(float %x) {
+; CHECK-LABEL: frem2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem float %x, 2.0
+ ret float %fmod
+}
+
+define float @frem2_nsz(float %x) {
+; CHECK-LABEL: frem2_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 2.0
+ ret float %fmod
+}
+
+define float @frem2_fast(float %x) {
+; CHECK-LABEL: frem2_fast:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem fast float %x, 2.0
+ ret float %fmod
+}
+
+define float @frem2_abs(float %x) {
+; CHECK-LABEL: frem2_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fmov s1, #2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 2.0
+ ret float %fmod
+}
+
+define half @hrem2_nsz(half %x) {
+; CHECK-SD-LABEL: hrem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: fcvt h0, s0
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hrem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: fmov h1, #2.00000000
+; CHECK-GI-NEXT: fcvt s0, h0
+; CHECK-GI-NEXT: fcvt s1, h1
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: fcvt h0, s0
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+entry:
+ %fmod = frem nsz half %x, 2.0
+ ret half %fmod
+}
+
+define double @drem2_nsz(double %x) {
+; CHECK-LABEL: drem2_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d1, #2.00000000
+; CHECK-NEXT: b fmod
+entry:
+ %fmod = frem nsz double %x, 2.0
+ ret double %fmod
+}
+
+define float @frem3_nsz(float %x) {
+; CHECK-LABEL: frem3_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #3.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 3.0
+ ret float %fmod
+}
+
+define float @frem05_nsz(float %x) {
+; CHECK-LABEL: frem05_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #0.50000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 0.5
+ ret float %fmod
+}
+
+define float @frem1_nsz(float %x) {
+; CHECK-LABEL: frem1_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #1.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 1.0
+ ret float %fmod
+}
+
+define float @frem0_nsz(float %x) {
+; CHECK-LABEL: frem0_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 0.0
+ ret float %fmod
+}
+
+define float @fremm2_nsz(float %x) {
+; CHECK-LABEL: fremm2_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s1, #-2.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, -2.0
+ ret float %fmod
+}
+
+define float @frem4_abs(float %x) {
+; CHECK-LABEL: frem4_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fmov s1, #4.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4.0
+ ret float %fmod
+}
+
+define float @frem16_abs(float %x) {
+; CHECK-LABEL: frem16_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fmov s1, #16.00000000
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 16.0
+ ret float %fmod
+}
+
+define float @frem4294967296_abs(float %x) {
+; CHECK-LABEL: frem4294967296_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4294967296.0
+ ret float %fmod
+}
+
+define float @frem1152921504606846976_abs(float %x) {
+; CHECK-LABEL: frem1152921504606846976_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 1152921504606846976.0
+ ret float %fmod
+}
+
+define float @frem4611686018427387904_abs(float %x) {
+; CHECK-LABEL: frem4611686018427387904_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 4611686018427387904.0
+ ret float %fmod
+}
+
+define float @frem9223372036854775808_abs(float %x) {
+; CHECK-LABEL: frem9223372036854775808_abs:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-NEXT: b fmodf
+entry:
+ %a = tail call float @llvm.fabs.f32(float %x)
+ %fmod = frem float %a, 9223372036854775808.0
+ ret float %fmod
+}
+
+define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
+; CHECK-SD-LABEL: frem2_nsz_vec:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov s0, v0.s[2]
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov s0, v0.s[3]
+; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_vec:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #80
+; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: .cfi_offset b9, -24
+; CHECK-GI-NEXT: .cfi_offset b10, -32
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: mov s9, v0.s[2]
+; CHECK-GI-NEXT: mov s10, v0.s[3]
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: fmov s0, s10
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NEXT: add sp, sp, #80
+; CHECK-GI-NEXT: ret
+entry:
+ %fmod = frem nsz <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
+ ret <4 x float> %fmod
+}
+
+define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
+; CHECK-SD-LABEL: frem1152921504606846976_absv:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset b8, -16
+; CHECK-SD-NEXT: fabs v0.4s, v0.4s
+; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: fmov s8, w8
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: fmov s1, s8
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: fmov s1, s8
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: fmov s1, s8
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov s0, v0.s[2]
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov s0, v0.s[3]
+; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: fmov s1, s8
+; CHECK-SD-NEXT: bl fmodf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1152921504606846976_absv:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #96
+; CHECK-GI-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: .cfi_offset b8, -24
+; CHECK-GI-NEXT: .cfi_offset b9, -32
+; CHECK-GI-NEXT: .cfi_offset b10, -40
+; CHECK-GI-NEXT: .cfi_offset b11, -48
+; CHECK-GI-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-GI-NEXT: fabs v0.4s, v0.4s
+; CHECK-GI-NEXT: fmov s11, w8
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: mov s9, v0.s[2]
+; CHECK-GI-NEXT: mov s10, v0.s[3]
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s9
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s1, s11
+; CHECK-GI-NEXT: fmov s0, s10
+; CHECK-GI-NEXT: bl fmodf
+; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NEXT: add sp, sp, #96
+; CHECK-GI-NEXT: ret
+entry:
+ %a = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
+ %fmod = frem <4 x float> %a, <float 1152921504606846976.0, float 1152921504606846976.0, float 1152921504606846976.0, float 1152921504606846976.0>
+ ret <4 x float> %fmod
+}
+
+define float @frem2_nsz_sitofp(float %x, i32 %sa) {
+; CHECK-LABEL: frem2_nsz_sitofp:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: scvtf s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %s = shl i32 1, %sa
+ %y = sitofp i32 %s to float
+ %fmod = frem nsz float %x, %y
+ ret float %fmod
+}
+
+define float @frem2_nsz_uitofp(float %x, i32 %sa) {
+; CHECK-LABEL: frem2_nsz_uitofp:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: ucvtf s1, w8
+; CHECK-NEXT: b fmodf
+entry:
+ %s = shl i32 1, %sa
+ %y = uitofp i32 %s to float
+ %fmod = frem nsz float %x, %y
+ ret float %fmod
+}
+
+define float @frem2_const_sitofp(float %x, i32 %sa) {
+; CHECK-SD-LABEL: frem2_const_sitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: fmov s0, #12.50000000
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: scvtf s1, w8
+; CHECK-SD-NEXT: b fmodf
+;
+; CHECK-GI-LABEL: frem2_const_sitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: and w9, w0, #0x1f
+; CHECK-GI-NEXT: fmov s0, #12.50000000
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: scvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %sa2 = and i32 %sa, 31
+ %s = shl i32 1, %sa2
+ %y = sitofp i32 %s to float
+ %fmod = frem float 12.50, %y
+ ret float %fmod
+}
+
+define float @frem2_constneg_sitofp(float %x, i32 %sa) {
+; CHECK-SD-LABEL: frem2_constneg_sitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: fmov s0, #-12.50000000
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: scvtf s1, w8
+; CHECK-SD-NEXT: b fmodf
+;
+; CHECK-GI-LABEL: frem2_constneg_sitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: and w9, w0, #0x1f
+; CHECK-GI-NEXT: fmov s0, #-12.50000000
+; CHECK-GI-NEXT: lsl w8, w8, w9
+; CHECK-GI-NEXT: scvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
+entry:
+ %sa2 = and i32 %sa, 31
+ %s = shl i32 1, %sa2
+ %y = sitofp i32 %s to float
+ %fmod = frem float -12.50, %y
+ ret float %fmod
+}
diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll
new file mode 100644
index 0000000000000..8052c8c35bcfb
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/frem-power2.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=thumbv8m.main-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
+; RUN: llc -mtriple=thumbv8m.main-none-eabi -mattr=+fp-armv8 %s -o - | FileCheck %s --check-prefix=CHECK-FP
+; RUN: llc -mtriple=thumbv8m.main-none-eabi -mattr=+fp-armv8,+slowfpvfmx %s -o - | FileCheck %s --check-prefix=CHECK-M33
+
+define float @frem4(float %x) {
+; CHECK-SOFT-LABEL: frem4:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r7, lr}
+; CHECK-SOFT-NEXT: push {r7, lr}
+; CHECK-SOFT-NEXT: mov.w r1, #1082130432
+; CHECK-SOFT-NEXT: bl fmodf
+; CHECK-SOFT-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: frem4:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: mov.w r1, #1082130432
+; CHECK-FP-NEXT: b fmodf
+;
+; CHECK-M33-LABEL: frem4:
+; CHECK-M33: @ %bb.0: @ %entry
+; CHECK-M33-NEXT: mov.w r1, #1082130432
+; CHECK-M33-NEXT: b fmodf
+entry:
+ %fmod = frem float %x, 4.0
+ ret float %fmod
+}
+
+define float @frem4_nsz(float %x) {
+; CHECK-SOFT-LABEL: frem4_nsz:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r7, lr}
+; CHECK-SOFT-NEXT: push {r7, lr}
+; CHECK-SOFT-NEXT: mov.w r1, #1082130432
+; CHECK-SOFT-NEXT: bl fmodf
+; CHECK-SOFT-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: frem4_nsz:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: mov.w r1, #1082130432
+; CHECK-FP-NEXT: b fmodf
+;
+; CHECK-M33-LABEL: frem4_nsz:
+; CHECK-M33: @ %bb.0: @ %entry
+; CHECK-M33-NEXT: mov.w r1, #1082130432
+; CHECK-M33-NEXT: b fmodf
+entry:
+ %fmod = frem nsz float %x, 4.0
+ ret float %fmod
+}
>From e1a6571ca3557f1d542dd0a8fdff4c1ec073aed4 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 7 May 2024 13:50:50 +0100
Subject: [PATCH 2/6] [DAG] Lower frem of power-2 using div/trunk/mul+sub.
If we are lowering a frem and the divisor is known to me an integer power-2, we
can use the formula 'frem = x - trunc(x / d) * d'. This avoids the more
expensive call to fmod. The results are identical as fmod so long as d is a
power-2 (so the mul does not round incorrectly), and the sign of the return is
either always positive or signed-zero not important (nsz).
Unfortunately Alive2 does not handle this well at the moment. I was using
exhaustive checking to test this, hopefully I didn't make a mistake in it
(https://gist.github.com/davemgreen/6078015f30d3bacd1e9572f8db5d4b64).
I found this in cpythons implementation of float_pow. I currently added it as a
DAG combine for frem with power-2 fp constants, with some extra utility
functions for checking if a floating-point value is known non-negative or a
integer power-2.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 8 +
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 17 +
llvm/test/CodeGen/AArch64/frem-power2.ll | 388 +++++++++++-------
llvm/test/CodeGen/ARM/frem-power2.ll | 18 +-
5 files changed, 285 insertions(+), 163 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 4b1b58d4af0bb..bc4200a68f2d8 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1996,6 +1996,10 @@ class SelectionDAG {
/// is set.
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth = 0) const;
+ /// Test if the given _fp_ value is known to be an integer power-of-2, either
+ /// positive or negative.
+ bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth = 0) const;
+
/// Return the number of times the sign bit of the register is replicated into
/// the other bits. We know that at least 1 bit is always equal to the sign
/// bit (itself), but other cases can give us information. For example,
@@ -2111,6 +2115,10 @@ class SelectionDAG {
/// Test whether the given SDValue is known to contain non-zero value(s).
bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;
+ /// Test whether the given float value is known to not be negative. 0.0 is
+ /// considered non-negative, -0.0 is considered negative.
+ bool isKnownNonNegativeFP(SDValue Op) const;
+
/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
/// other positive zero.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4589d201d6203..fb71b074f48ec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17327,17 +17327,32 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ SDLoc DL(N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
+ // power of 2.
+ if (DAG.isKnownToBeAPowerOfTwoFP(N1) &&
+ (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0)) &&
+ !TLI.isOperationLegal(ISD::FREM, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT)) {
+ SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
+ SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
+ SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index eef5acd032345..af5a3c8fde3a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4373,6 +4373,16 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
return false;
}
+bool SelectionDAG::isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth) const {
+ if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Val, true))
+ return C1->getValueAPF().getExactLog2Abs() >= 0;
+
+ if (Val.getOpcode() == ISD::UINT_TO_FP || Val.getOpcode() == ISD::SINT_TO_FP)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
+ return false;
+}
+
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
@@ -5555,6 +5565,13 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return computeKnownBits(Op, Depth).isNonZero();
}
+bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
+ if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
+ return !C1->isNegative();
+
+ return Op.getOpcode() == ISD::FABS;
+}
+
bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
// Check the obvious case.
if (A == B) return true;
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 5d627fcd6b656..94a9f701e1f57 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -13,31 +13,59 @@ entry:
}
define float @frem2_nsz(float %x) {
-; CHECK-LABEL: frem2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fdiv s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fadd s1, s1, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, 2.0
ret float %fmod
}
define float @frem2_fast(float %x) {
-; CHECK-LABEL: frem2_fast:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_fast:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #0.50000000
+; CHECK-SD-NEXT: fmov s2, #-2.00000000
+; CHECK-SD-NEXT: fmul s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fmadd s0, s1, s2, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_fast:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem fast float %x, 2.0
ret float %fmod
}
define float @frem2_abs(float %x) {
-; CHECK-LABEL: frem2_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #2.00000000
+; CHECK-SD-NEXT: fdiv s1, s0, s1
+; CHECK-SD-NEXT: frintz s1, s1
+; CHECK-SD-NEXT: fadd s1, s1, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 2.0
@@ -47,14 +75,11 @@ entry:
define half @hrem2_nsz(half %x) {
; CHECK-SD-LABEL: hrem2_nsz:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
-; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: fcvt s0, h0
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fcvt h0, s0
-; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: fmov h1, #2.00000000
+; CHECK-SD-NEXT: fdiv h1, h0, h1
+; CHECK-SD-NEXT: frintz h1, h1
+; CHECK-SD-NEXT: fadd h1, h1, h1
+; CHECK-SD-NEXT: fsub h0, h0, h1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hrem2_nsz:
@@ -75,10 +100,19 @@ entry:
}
define double @drem2_nsz(double %x) {
-; CHECK-LABEL: drem2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov d1, #2.00000000
-; CHECK-NEXT: b fmod
+; CHECK-SD-LABEL: drem2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov d1, #2.00000000
+; CHECK-SD-NEXT: fdiv d1, d0, d1
+; CHECK-SD-NEXT: frintz d1, d1
+; CHECK-SD-NEXT: fadd d1, d1, d1
+; CHECK-SD-NEXT: fsub d0, d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: drem2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov d1, #2.00000000
+; CHECK-GI-NEXT: b fmod
entry:
%fmod = frem nsz double %x, 2.0
ret double %fmod
@@ -105,10 +139,16 @@ entry:
}
define float @frem1_nsz(float %x) {
-; CHECK-LABEL: frem1_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #1.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem1_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frintz s1, s0
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #1.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, 1.0
ret float %fmod
@@ -125,21 +165,40 @@ entry:
}
define float @fremm2_nsz(float %x) {
-; CHECK-LABEL: fremm2_nsz:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, #-2.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: fremm2_nsz:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s1, #-2.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fremm2_nsz:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s1, #-2.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%fmod = frem nsz float %x, -2.0
ret float %fmod
}
define float @frem4_abs(float %x) {
-; CHECK-LABEL: frem4_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #4.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #4.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #4.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4.0
@@ -147,11 +206,21 @@ entry:
}
define float @frem16_abs(float %x) {
-; CHECK-LABEL: frem16_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: fmov s1, #16.00000000
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem16_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fmov s1, #16.00000000
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem16_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: fmov s1, #16.00000000
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 16.0
@@ -159,12 +228,23 @@ entry:
}
define float @frem4294967296_abs(float %x) {
-; CHECK-LABEL: frem4294967296_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1333788672 // =0x4f800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4294967296_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4294967296_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4294967296.0
@@ -172,12 +252,23 @@ entry:
}
define float @frem1152921504606846976_abs(float %x) {
-; CHECK-LABEL: frem1152921504606846976_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1568669696 // =0x5d800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem1152921504606846976_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem1152921504606846976_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1568669696 // =0x5d800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 1152921504606846976.0
@@ -185,12 +276,23 @@ entry:
}
define float @frem4611686018427387904_abs(float %x) {
-; CHECK-LABEL: frem4611686018427387904_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: mov w8, #1585446912 // =0x5e800000
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem4611686018427387904_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-SD-NEXT: fmov s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem4611686018427387904_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: mov w8, #1585446912 // =0x5e800000
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 4611686018427387904.0
@@ -198,11 +300,21 @@ entry:
}
define float @frem9223372036854775808_abs(float %x) {
-; CHECK-LABEL: frem9223372036854775808_abs:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fabs s0, s0
-; CHECK-NEXT: movi v1.2s, #95, lsl #24
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem9223372036854775808_abs:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-SD-NEXT: fabs s0, s0
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem9223372036854775808_abs:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fabs s0, s0
+; CHECK-GI-NEXT: movi v1.2s, #95, lsl #24
+; CHECK-GI-NEXT: b fmodf
entry:
%a = tail call float @llvm.fabs.f32(float %x)
%fmod = frem float %a, 9223372036854775808.0
@@ -212,42 +324,11 @@ entry:
define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
; CHECK-SD-LABEL: frem2_nsz_vec:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #48
-; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[2]
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
+; CHECK-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v1.4s, v1.4s
+; CHECK-SD-NEXT: fadd v1.4s, v1.4s, v1.4s
+; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -302,48 +383,13 @@ entry:
define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
; CHECK-SD-LABEL: frem1152921504606846976_absv:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #48
-; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w30, -8
-; CHECK-SD-NEXT: .cfi_offset b8, -16
-; CHECK-SD-NEXT: fabs v0.4s, v0.4s
; CHECK-SD-NEXT: mov w8, #1568669696 // =0x5d800000
-; CHECK-SD-NEXT: fmov s8, w8
-; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[2]
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: mov v1.s[2], v0.s[0]
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: fmov s1, s8
-; CHECK-SD-NEXT: bl fmodf
-; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v1.s[3], v0.s[0]
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: fabs v0.4s, v0.4s
+; CHECK-SD-NEXT: dup v1.4s, w8
+; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v2.4s, v2.4s
+; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_absv:
@@ -401,12 +447,23 @@ entry:
}
define float @frem2_nsz_sitofp(float %x, i32 %sa) {
-; CHECK-LABEL: frem2_nsz_sitofp:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: lsl w8, w8, w0
-; CHECK-NEXT: scvtf s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz_sitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: scvtf s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_sitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsl w8, w8, w0
+; CHECK-GI-NEXT: scvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%s = shl i32 1, %sa
%y = sitofp i32 %s to float
@@ -415,12 +472,23 @@ entry:
}
define float @frem2_nsz_uitofp(float %x, i32 %sa) {
-; CHECK-LABEL: frem2_nsz_uitofp:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: lsl w8, w8, w0
-; CHECK-NEXT: ucvtf s1, w8
-; CHECK-NEXT: b fmodf
+; CHECK-SD-LABEL: frem2_nsz_uitofp:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov w8, #1 // =0x1
+; CHECK-SD-NEXT: lsl w8, w8, w0
+; CHECK-SD-NEXT: ucvtf s1, w8
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s1, s2, s1
+; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: frem2_nsz_uitofp:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: lsl w8, w8, w0
+; CHECK-GI-NEXT: ucvtf s1, w8
+; CHECK-GI-NEXT: b fmodf
entry:
%s = shl i32 1, %sa
%y = uitofp i32 %s to float
@@ -432,10 +500,14 @@ define float @frem2_const_sitofp(float %x, i32 %sa) {
; CHECK-SD-LABEL: frem2_const_sitofp:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov w8, #1 // =0x1
-; CHECK-SD-NEXT: fmov s0, #12.50000000
+; CHECK-SD-NEXT: fmov s1, #12.50000000
; CHECK-SD-NEXT: lsl w8, w8, w0
-; CHECK-SD-NEXT: scvtf s1, w8
-; CHECK-SD-NEXT: b fmodf
+; CHECK-SD-NEXT: scvtf s0, w8
+; CHECK-SD-NEXT: fdiv s2, s1, s0
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmul s0, s2, s0
+; CHECK-SD-NEXT: fsub s0, s1, s0
+; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_const_sitofp:
; CHECK-GI: // %bb.0: // %entry
diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll
index 8052c8c35bcfb..efce515af439c 100644
--- a/llvm/test/CodeGen/ARM/frem-power2.ll
+++ b/llvm/test/CodeGen/ARM/frem-power2.ll
@@ -37,13 +37,23 @@ define float @frem4_nsz(float %x) {
;
; CHECK-FP-LABEL: frem4_nsz:
; CHECK-FP: @ %bb.0: @ %entry
-; CHECK-FP-NEXT: mov.w r1, #1082130432
-; CHECK-FP-NEXT: b fmodf
+; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0
+; CHECK-FP-NEXT: vrintz.f32 s4, s4
+; CHECK-FP-NEXT: vmls.f32 s2, s4, s0
+; CHECK-FP-NEXT: vmov r0, s2
+; CHECK-FP-NEXT: bx lr
;
; CHECK-M33-LABEL: frem4_nsz:
; CHECK-M33: @ %bb.0: @ %entry
-; CHECK-M33-NEXT: mov.w r1, #1082130432
-; CHECK-M33-NEXT: b fmodf
+; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00
+; CHECK-M33-NEXT: vmov s2, r0
+; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0
+; CHECK-M33-NEXT: vrintz.f32 s4, s4
+; CHECK-M33-NEXT: vmls.f32 s2, s4, s0
+; CHECK-M33-NEXT: vmov r0, s2
+; CHECK-M33-NEXT: bx lr
entry:
%fmod = frem nsz float %x, 4.0
ret float %fmod
>From db0b8a1a15a1785329d80a8e5dd95fe12516dcbe Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 7 May 2024 16:12:59 +0100
Subject: [PATCH 3/6] Do the legality checks first
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fb71b074f48ec..36aa55206cf59 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17341,12 +17341,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
// Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
// power of 2.
- if (DAG.isKnownToBeAPowerOfTwoFP(N1) &&
- (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0)) &&
- !TLI.isOperationLegal(ISD::FREM, VT) &&
+ if (!TLI.isOperationLegal(ISD::FREM, VT) &&
TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
- TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT)) {
+ TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
+ DAG.isKnownToBeAPowerOfTwoFP(N1) &&
+ (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
>From 5042aea0c21e03c118adf484a911b4a45435d169 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 9 May 2024 11:53:42 +0100
Subject: [PATCH 4/6] Use FMA
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 ++-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
llvm/test/CodeGen/AArch64/frem-power2.ll | 65 +++++++------------
llvm/test/CodeGen/ARM/frem-power2.ll | 2 +-
4 files changed, 34 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 36aa55206cf59..f2a9de933696c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17349,8 +17349,13 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
(Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
- SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
- return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
+ if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+ return DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
+ N1, N0);
+ } else {
+ SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
+ }
}
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index af5a3c8fde3a7..3b950f8ace5da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5567,7 +5567,7 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
- return !C1->isNegative();
+ return !C1->isNegative() && !C1->isNaN();
return Op.getOpcode() == ISD::FABS;
}
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 94a9f701e1f57..402e03c5e2652 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -16,10 +16,9 @@ define float @frem2_nsz(float %x) {
; CHECK-SD-LABEL: frem2_nsz:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: fdiv s1, s0, s1
-; CHECK-SD-NEXT: frintz s1, s1
-; CHECK-SD-NEXT: fadd s1, s1, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz:
@@ -55,10 +54,9 @@ define float @frem2_abs(float %x) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fabs s0, s0
; CHECK-SD-NEXT: fmov s1, #2.00000000
-; CHECK-SD-NEXT: fdiv s1, s0, s1
-; CHECK-SD-NEXT: frintz s1, s1
-; CHECK-SD-NEXT: fadd s1, s1, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fdiv s2, s0, s1
+; CHECK-SD-NEXT: frintz s2, s2
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_abs:
@@ -76,10 +74,10 @@ define half @hrem2_nsz(half %x) {
; CHECK-SD-LABEL: hrem2_nsz:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov h1, #2.00000000
+; CHECK-SD-NEXT: fmov h2, #-2.00000000
; CHECK-SD-NEXT: fdiv h1, h0, h1
; CHECK-SD-NEXT: frintz h1, h1
-; CHECK-SD-NEXT: fadd h1, h1, h1
-; CHECK-SD-NEXT: fsub h0, h0, h1
+; CHECK-SD-NEXT: fmadd h0, h1, h2, h0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hrem2_nsz:
@@ -103,10 +101,9 @@ define double @drem2_nsz(double %x) {
; CHECK-SD-LABEL: drem2_nsz:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov d1, #2.00000000
-; CHECK-SD-NEXT: fdiv d1, d0, d1
-; CHECK-SD-NEXT: frintz d1, d1
-; CHECK-SD-NEXT: fadd d1, d1, d1
-; CHECK-SD-NEXT: fsub d0, d0, d1
+; CHECK-SD-NEXT: fdiv d2, d0, d1
+; CHECK-SD-NEXT: frintz d2, d2
+; CHECK-SD-NEXT: fmsub d0, d2, d1, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: drem2_nsz:
@@ -170,8 +167,7 @@ define float @fremm2_nsz(float %x) {
; CHECK-SD-NEXT: fmov s1, #-2.00000000
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fremm2_nsz:
@@ -190,8 +186,7 @@ define float @frem4_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, #4.00000000
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4_abs:
@@ -212,8 +207,7 @@ define float @frem16_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, #16.00000000
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem16_abs:
@@ -235,8 +229,7 @@ define float @frem4294967296_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4294967296_abs:
@@ -259,8 +252,7 @@ define float @frem1152921504606846976_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_abs:
@@ -283,8 +275,7 @@ define float @frem4611686018427387904_abs(float %x) {
; CHECK-SD-NEXT: fmov s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem4611686018427387904_abs:
@@ -306,8 +297,7 @@ define float @frem9223372036854775808_abs(float %x) {
; CHECK-SD-NEXT: fabs s0, s0
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem9223372036854775808_abs:
@@ -325,10 +315,9 @@ define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
; CHECK-SD-LABEL: frem2_nsz_vec:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
-; CHECK-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: frintz v1.4s, v1.4s
-; CHECK-SD-NEXT: fadd v1.4s, v1.4s, v1.4s
-; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: frintz v2.4s, v2.4s
+; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -388,8 +377,7 @@ define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
; CHECK-SD-NEXT: dup v1.4s, w8
; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: frintz v2.4s, v2.4s
-; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v1.4s
-; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem1152921504606846976_absv:
@@ -454,8 +442,7 @@ define float @frem2_nsz_sitofp(float %x, i32 %sa) {
; CHECK-SD-NEXT: scvtf s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_sitofp:
@@ -479,8 +466,7 @@ define float @frem2_nsz_uitofp(float %x, i32 %sa) {
; CHECK-SD-NEXT: ucvtf s1, w8
; CHECK-SD-NEXT: fdiv s2, s0, s1
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s1, s2, s1
-; CHECK-SD-NEXT: fsub s0, s0, s1
+; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_nsz_uitofp:
@@ -505,8 +491,7 @@ define float @frem2_const_sitofp(float %x, i32 %sa) {
; CHECK-SD-NEXT: scvtf s0, w8
; CHECK-SD-NEXT: fdiv s2, s1, s0
; CHECK-SD-NEXT: frintz s2, s2
-; CHECK-SD-NEXT: fmul s0, s2, s0
-; CHECK-SD-NEXT: fsub s0, s1, s0
+; CHECK-SD-NEXT: fmsub s0, s2, s0, s1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: frem2_const_sitofp:
diff --git a/llvm/test/CodeGen/ARM/frem-power2.ll b/llvm/test/CodeGen/ARM/frem-power2.ll
index efce515af439c..7f52943175ac9 100644
--- a/llvm/test/CodeGen/ARM/frem-power2.ll
+++ b/llvm/test/CodeGen/ARM/frem-power2.ll
@@ -41,7 +41,7 @@ define float @frem4_nsz(float %x) {
; CHECK-FP-NEXT: vmov s2, r0
; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0
; CHECK-FP-NEXT: vrintz.f32 s4, s4
-; CHECK-FP-NEXT: vmls.f32 s2, s4, s0
+; CHECK-FP-NEXT: vfms.f32 s2, s4, s0
; CHECK-FP-NEXT: vmov r0, s2
; CHECK-FP-NEXT: bx lr
;
>From e6367fb1a5c38b6835962d4d730f007248dc24a9 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 9 May 2024 12:37:41 +0100
Subject: [PATCH 5/6] No else after return
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +++-----
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f2a9de933696c..276b7d4e2846d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17349,13 +17349,11 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
(Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
- if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+ if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT))
return DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
N1, N0);
- } else {
- SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
- return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
- }
+ SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
}
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3b950f8ace5da..af5a3c8fde3a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5567,7 +5567,7 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
- return !C1->isNegative() && !C1->isNaN();
+ return !C1->isNegative();
return Op.getOpcode() == ISD::FABS;
}
>From 6e9c19971d5f8945b9262e536e1dd6bccfd9b00b Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 9 May 2024 14:07:33 +0100
Subject: [PATCH 6/6] isKnownPositiveOrNaNFP
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +++---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index bc4200a68f2d8..3e4f0e1f6d3a0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2115,9 +2115,9 @@ class SelectionDAG {
/// Test whether the given SDValue is known to contain non-zero value(s).
bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;
- /// Test whether the given float value is known to not be negative. 0.0 is
- /// considered non-negative, -0.0 is considered negative.
- bool isKnownNonNegativeFP(SDValue Op) const;
+ /// Test whether the given float value is known to be positive. +0.0, +inf and
+ /// +nan are considered positive, -0.0, -inf and -nan are not.
+ bool isKnownPositiveOrNaNFP(SDValue Op) const;
/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 276b7d4e2846d..aa4396f7bcbb1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17346,7 +17346,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
DAG.isKnownToBeAPowerOfTwoFP(N1) &&
- (Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
+ (Flags.hasNoSignedZeros() || DAG.isKnownPositiveOrNaNFP(N0))) {
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index af5a3c8fde3a7..04584f40547db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5565,7 +5565,7 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return computeKnownBits(Op, Depth).isNonZero();
}
-bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
+bool SelectionDAG::isKnownPositiveOrNaNFP(SDValue Op) const {
if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
return !C1->isNegative();
More information about the llvm-commits
mailing list